diff --git a/resources/fts_sqlite.sql b/resources/fts_sqlite.sql new file mode 100644 index 0000000000..796510a5ea --- /dev/null +++ b/resources/fts_sqlite.sql @@ -0,0 +1,36 @@ +CREATE TABLE fts_db.books_text ( id INTEGER PRIMARY KEY, + book INTEGER NOT NULL, + format TEXT NOT NULL COLLATE NOCASE, + timestamp REAL NOT NULL, + format_hash TEXT NOT NULL COLLATE NOCASE, + format_size INTEGER NOT NULL, + text_hash TEXT NOT NULL COLLATE NOCASE, + searchable_text TEXT NOT NULL DEFAULT "", + UNIQUE(book, format) +); + + +CREATE VIRTUAL TABLE fts_db.books_fts USING fts5(searchable_text, content = 'fts_db.books_text', content_rowid = 'id', tokenize = 'calibre remove_diacritics 2'); +CREATE VIRTUAL TABLE fts_db.books_fts_stemmed USING fts5(searchable_text, content = 'fts_db.books_text', content_rowid = 'id', tokenize = 'porter calibre remove_diacritics 2'); + +CREATE TRIGGER fts_db.books_fts_insert_trg AFTER INSERT ON fts_db.books_text +BEGIN + INSERT INTO fts_db.books_fts(rowid, searchable_text) VALUES (NEW.id, NEW.searchable_text); + INSERT INTO fts_db.books_fts_stemmed(rowid, searchable_text) VALUES (NEW.id, NEW.searchable_text); +END; + +CREATE TRIGGER fts_db.books_fts_delete_trg AFTER DELETE ON fts_db.books_text +BEGIN + INSERT INTO fts_db.books_fts(fts_db.books_fts, rowid, searchable_text) VALUES('delete', OLD.id, OLD.searchable_text); + INSERT INTO fts_db.books_fts_stemmed(fts_db.books_fts_stemmed, rowid, searchable_text) VALUES('delete', OLD.id, OLD.searchable_text); +END; + +CREATE TRIGGER fts_db.books_fts_update_trg AFTER UPDATE ON fts_db.books_text +BEGIN + INSERT INTO fts_db.books_fts(fts_db.books_fts, rowid, searchable_text) VALUES('delete', OLD.id, OLD.searchable_text); + INSERT INTO fts_db.books_fts(rowid, searchable_text) VALUES (NEW.id, NEW.searchable_text); + INSERT INTO fts_db.books_fts_stemmed(fts_db.books_fts_stemmed, rowid, searchable_text) VALUES('delete', OLD.id, OLD.searchable_text); + INSERT INTO fts_db.books_fts_stemmed(rowid, searchable_text) VALUES (NEW.id, NEW.searchable_text); +END; + +PRAGMA fts_db.user_version=1; diff --git a/src/calibre/db/backend.py b/src/calibre/db/backend.py index 71978c8c7d..7c0554a816 100644 --- a/src/calibre/db/backend.py +++ b/src/calibre/db/backend.py @@ -492,6 +492,7 @@ class DB: self.initialize_prefs(default_prefs, restore_all_prefs, progress_callback) self.initialize_custom_columns() self.initialize_tables() + self.initialize_fts() self.set_user_template_functions(compile_user_template_functions( self.prefs.get('user_template_functions', []))) if load_user_formatter_functions: @@ -564,6 +565,7 @@ class DB: defs['cover_browser_subtitle_field'] = 'rating' defs['styled_columns'] = {} defs['edit_metadata_ignore_display_order'] = False + defs['fts_enabled'] = False # Migrate the bool tristate tweak defs['bools_are_tristate'] = \ @@ -919,6 +921,16 @@ class DB: # }}} + def initialize_fts(self): + self.fts = None + if not self.prefs['fts_enabled']: + return + from .fts.connect import FTS + self.fts = FTS(self.get_connection) + + def get_connection(self): + return self.conn + @property def conn(self): if self._conn is None: @@ -1224,6 +1236,7 @@ class DB: cur.execute(metadata_sqlite) except: cur.execute('ROLLBACK') + raise else: cur.execute('COMMIT') if self.user_version == 0: diff --git a/src/calibre/db/fts/__init__.py b/src/calibre/db/fts/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/calibre/db/fts/connect.py b/src/calibre/db/fts/connect.py new file mode 100644 index 0000000000..1138118222 --- /dev/null +++ b/src/calibre/db/fts/connect.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +# License: GPL v3 Copyright: 2022, Kovid Goyal + + +import os +from .schema_upgrade import SchemaUpgrade + + +# TODO: db dump+restore +# TODO: calibre export/import +# TODO: check library and vacuuming of fts db + + +class FTS: + + def __init__(self, get_connection): + self.get_connection = get_connection + conn = self.get_connection() + main_db_path = os.path.abspath(conn.db_filename('main')) + self.dbpath = os.path.join(os.path.dirname(main_db_path), 'full-text-search.db') + conn.execute(f'ATTACH DATABASE "{self.dbpath}" AS fts_db') + fts_sqlite = P('fts_sqlite.sql', data=True, allow_user_override=False).decode('utf-8') + cur = self.conn.cursor() + cur.execute('BEGIN EXCLUSIVE TRANSACTION') + try: + cur.execute(fts_sqlite) + except (Exception, BaseException): + cur.execute('ROLLBACK') + raise + else: + cur.execute('COMMIT') + SchemaUpgrade(conn) diff --git a/src/calibre/db/fts/schema_upgrade.py b/src/calibre/db/fts/schema_upgrade.py new file mode 100644 index 0000000000..90aaee7618 --- /dev/null +++ b/src/calibre/db/fts/schema_upgrade.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +# License: GPL v3 Copyright: 2022, Kovid Goyal + + +class SchemaUpgrade: + + def __init__(self, conn): + self.conn = conn + conn.execute('BEGIN EXCLUSIVE TRANSACTION') + try: + while True: + uv = self.user_version + meth = getattr(self, f'upgrade_version_{uv}', None) + if meth is None: + break + print(f'Upgrading FTS database to version {uv+1}...') + meth() + self.user_version = uv + 1 + except (Exception, BaseException): + conn.execute('ROLLBACK') + raise + else: + conn.execute('COMMIT') + self.conn = None + + @property + def user_version(self): + return self.conn.get('PRAGMA fts_db.user_version', all=False) or 0 + + @user_version.setter + def user_version(self, val): + self.conn.execute(f'PRAGMA fts_db.user_version={val}') diff --git a/src/calibre/library/check_library.py b/src/calibre/library/check_library.py index 45069014cc..403072d016 100644 --- a/src/calibre/library/check_library.py +++ b/src/calibre/library/check_library.py @@ -14,7 +14,7 @@ from polyglot.builtins import iteritems EBOOK_EXTENSIONS = frozenset(BOOK_EXTENSIONS) NORMALS = frozenset({'metadata.opf', 'cover.jpg'}) -IGNORE_AT_TOP_LEVEL = frozenset({'metadata.db', 'metadata_db_prefs_backup.json', 'metadata_pre_restore.db'}) +IGNORE_AT_TOP_LEVEL = frozenset({'metadata.db', 'metadata_db_prefs_backup.json', 'metadata_pre_restore.db', 'full-text-search.db'}) ''' Checks fields: