From 5916fd1f5d0efe2ad57509c447dc66915d29ef90 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 13 Jun 2020 14:51:32 +0530 Subject: [PATCH] Initial implementation of FTS on annotations --- resources/metadata_sqlite.sql | 26 ++++++++++++++++++++++- src/calibre/db/backend.py | 21 ++++++++++++++++++- src/calibre/db/cache.py | 16 +++++++++++++++ src/calibre/db/schema_upgrades.py | 33 +++++++++++++++++++++++++++++- src/calibre/db/tests/metadata.db | Bin 252928 -> 264192 bytes src/calibre/db/tests/writing.py | 14 +++++++++++-- 6 files changed, 105 insertions(+), 5 deletions(-) diff --git a/resources/metadata_sqlite.sql b/resources/metadata_sqlite.sql index 84237e58c9..3f8459f74c 100644 --- a/resources/metadata_sqlite.sql +++ b/resources/metadata_sqlite.sql @@ -151,10 +151,34 @@ CREATE TABLE annotations ( id INTEGER PRIMARY KEY, annot_id TEXT NOT NULL, annot_type TEXT NOT NULL, annot_data TEXT NOT NULL, - searchable_text TEXT NOT NULL, + searchable_text TEXT NOT NULL DEFAULT "", UNIQUE(book, user_type, user, format, annot_type, annot_id) ); +CREATE VIRTUAL TABLE annotations_fts USING fts5(searchable_text, content = 'annotations', content_rowid = 'id', tokenize = 'unicode61 remove_diacritics 2'); +CREATE VIRTUAL TABLE annotations_fts_stemmed USING fts5(searchable_text, content = 'annotations', content_rowid = 'id', tokenize = 'porter unicode61 remove_diacritics 2'); + +CREATE TRIGGER annotations_fts_insert_trg AFTER INSERT ON annotations +BEGIN + INSERT INTO annotations_fts(rowid, searchable_text) VALUES (NEW.id, NEW.searchable_text); + INSERT INTO annotations_fts_stemmed(rowid, searchable_text) VALUES (NEW.id, NEW.searchable_text); +END; + +CREATE TRIGGER annotations_fts_delete_trg AFTER DELETE ON annotations +BEGIN + INSERT INTO annotations_fts(annotations_fts, rowid, searchable_text) VALUES('delete', OLD.id, OLD.searchable_text); + INSERT INTO annotations_fts_stemmed(annotations_fts_stemmed, rowid, searchable_text) VALUES('delete', OLD.id, OLD.searchable_text); +END; + +CREATE TRIGGER annotations_fts_update_trg AFTER UPDATE ON annotations +BEGIN + INSERT INTO annotations_fts(annotations_fts, rowid, searchable_text) VALUES('delete', OLD.id, OLD.searchable_text); + INSERT INTO annotations_fts(rowid, searchable_text) VALUES (NEW.id, NEW.searchable_text); + INSERT INTO annotations_fts_stemmed(annotations_fts_stemmed, rowid, searchable_text) VALUES('delete', OLD.id, OLD.searchable_text); + INSERT INTO annotations_fts_stemmed(rowid, searchable_text) VALUES (NEW.id, NEW.searchable_text); +END; + + CREATE VIEW meta AS SELECT id, title, (SELECT sortconcat(bal.id, name) FROM books_authors_link AS bal JOIN authors ON(author = authors.id) WHERE book = books.id) authors, diff --git a/src/calibre/db/backend.py b/src/calibre/db/backend.py index af44ef4293..113ac36bd4 100644 --- a/src/calibre/db/backend.py +++ b/src/calibre/db/backend.py @@ -307,7 +307,7 @@ def save_annotations_for_book(cursor, book_id, fmt, annots_list, user_type='loca text = annot.get('highlighed_text') or '' notes = annot.get('notes') or '' if notes: - text += '0x1f\n\n' + notes + text += '\n0x1f\n' + notes else: continue data.append((book_id, fmt, user_type, user, timestamp_in_secs, aid, atype, json.dumps(annot), text)) @@ -1774,6 +1774,25 @@ class DB(object): for x in annotations_for_book(self.conn, book_id, fmt, user_type, user): yield x + def search_annotations(self, + fts_engine_query, use_stemming, highlight_start, highlight_end, annotation_type, + restrict_to_book_ids, restrict_to_user + ): + fts_table = 'annotations_fts_stemmed' if use_stemming else 'annotations_fts' + query = 'SELECT {0}.id, {0}.book, {0}.format, {0}.user_type, {0}.user, {0}.annot_data FROM {0} ' + query = query.format('annotations') + query += ' JOIN {fts_table} ON annotations.id = {fts_table}.rowid'.format(fts_table=fts_table) + query += ' WHERE {fts_table} MATCH ?'.format(fts_table=fts_table) + data = [fts_engine_query] + if restrict_to_user: + query += ' AND annotations.user_type = ? AND annotations.user = ?' + data += list(*restrict_to_user) + if annotation_type: + query += ' AND annotations.annot_type = ? ' + data.append(annotation_type) + for (rowid, book_id, fmt, user_type, user, annot_data) in self.execute(query, tuple(data)): + yield {'id': rowid, 'book_id': book_id, 'format': fmt, 'user_type': user_type, 'user': user, 'annotation': annot_data} + def all_annotations_for_book(self, book_id): for (fmt, user_type, user, data) in self.execute('SELECT format, user_type, user, annot_data FROM annotations WHERE book=?', (book_id,)): try: diff --git a/src/calibre/db/cache.py b/src/calibre/db/cache.py index 267673244d..4a2d5df514 100644 --- a/src/calibre/db/cache.py +++ b/src/calibre/db/cache.py @@ -2301,6 +2301,22 @@ class Cache(object): def all_annotations_for_book(self, book_id): return tuple(self.backend.all_annotations_for_book(book_id)) + @read_api + def search_annotations( + self, + fts_engine_query, + use_stemming=True, + highlight_start=None, + highlight_end=None, + annotation_type=None, + restrict_to_book_ids=None, + restrict_to_user=None, + ): + return tuple(self.backend.search_annotations( + fts_engine_query, use_stemming, highlight_start, highlight_end, + annotation_type, restrict_to_book_ids, restrict_to_user + )) + @write_api def restore_annotations(self, book_id, annotations): from calibre.utils.iso8601 import parse_iso8601 diff --git a/src/calibre/db/schema_upgrades.py b/src/calibre/db/schema_upgrades.py index d79796dbc2..bdf9a72b64 100644 --- a/src/calibre/db/schema_upgrades.py +++ b/src/calibre/db/schema_upgrades.py @@ -714,13 +714,44 @@ CREATE TABLE annotations ( id INTEGER PRIMARY KEY, annot_id TEXT NOT NULL, annot_type TEXT NOT NULL, annot_data TEXT NOT NULL, - searchable_text TEXT NOT NULL, + searchable_text TEXT NOT NULL DEFAULT "", UNIQUE(book, user_type, user, format, annot_type, annot_id) ); DROP INDEX IF EXISTS annot_idx; CREATE INDEX annot_idx ON annotations (book); +DROP TABLE IF EXISTS annotations_fts; +DROP TABLE IF EXISTS annotations_fts_stemmed; +CREATE VIRTUAL TABLE annotations_fts USING fts5(searchable_text, + content = 'annotations', content_rowid = 'id', tokenize = 'unicode61 remove_diacritics 2'); +CREATE VIRTUAL TABLE annotations_fts_stemmed USING fts5(searchable_text, + content = 'annotations', content_rowid = 'id', tokenize = 'porter unicode61 remove_diacritics 2'); + +DROP TRIGGER IF EXISTS annotations_fts_insert_trg; +CREATE TRIGGER annotations_fts_insert_trg AFTER INSERT ON annotations +BEGIN + INSERT INTO annotations_fts(rowid, searchable_text) VALUES (NEW.id, NEW.searchable_text); + INSERT INTO annotations_fts_stemmed(rowid, searchable_text) VALUES (NEW.id, NEW.searchable_text); +END; + +DROP TRIGGER IF EXISTS annotations_fts_delete_trg; +CREATE TRIGGER annotations_fts_delete_trg AFTER DELETE ON annotations +BEGIN + INSERT INTO annotations_fts(annotations_fts, rowid, searchable_text) VALUES('delete', OLD.id, OLD.searchable_text); + INSERT INTO annotations_fts_stemmed(annotations_fts_stemmed, rowid, searchable_text) VALUES('delete', OLD.id, OLD.searchable_text); +END; + +DROP TRIGGER IF EXISTS annotations_fts_update_trg; +CREATE TRIGGER annotations_fts_update_trg AFTER UPDATE ON annotations +BEGIN + INSERT INTO annotations_fts(annotations_fts, rowid, searchable_text) VALUES('delete', OLD.id, OLD.searchable_text); + INSERT INTO annotations_fts(rowid, searchable_text) VALUES (NEW.id, NEW.searchable_text); + INSERT INTO annotations_fts_stemmed(annotations_fts_stemmed, rowid, searchable_text) VALUES('delete', OLD.id, OLD.searchable_text); + INSERT INTO annotations_fts_stemmed(rowid, searchable_text) VALUES (NEW.id, NEW.searchable_text); +END; + + DROP TRIGGER IF EXISTS books_delete_trg; CREATE TRIGGER books_delete_trg AFTER DELETE ON books diff --git a/src/calibre/db/tests/metadata.db b/src/calibre/db/tests/metadata.db index d86bd55a8c920d1473a15b1fe210a0057b03a240..d0df012f3fb90f4948779a1ec5bea85b92cfad58 100644 GIT binary patch delta 3322 zcmdT`U2NM_6u!PUt=sfx3$S@>8NH&QBs7ebtVL6`hqQ5v)hwyfbk*P?%3?1yZR5)E zO4Tx4yW>YZt*Tfgct=9oo(jtz%0M7MNFenM#w$-Cr1CI+Hz?PMy}GWGM(hbo@paBQ zIX|Dz`A(K^9$3C|@Jgqz1po?yo$Ke-P?!jh4}ID|gfGH2a;MJAt@Oe-dLczVu+$6D=7&d#903i$Yt$Gy z4wJ8y>s{h8=g~&BrBd)FT0&Q+sK)krfJ)Rh^#wH#f*`mWVIcaJ*u4YM06DIsGn!hw zwt+D9Yw?>66r%gJR~s3wySVuoJW~Ap7V2nOIjLQuneJ}*`qL%PM<`7dx8H`|HT)1= z6mOx^XjprtxcxqQv`Bx9o_g;SbnpYp<53$)I24BK@G+`L9U@;aSg{f%XLI>ko6;L- zW?-O67nRKU86lI;OG-u&rFZ*S=9!qj(BFS?q9%avk@O%(Ya;fJN6DgcbOma~T>Bs;a zYMrh*@KUxwigG4liou_?voOv40g+z|XCF~RxUh%^V~N3_=F=g~gu_r9ag8hT3(lBC z1pcTUkC^z$%gl*9c^`s28^UwBycF7nfn6VTpm9m3Xr`~PDi|3h? z2Ips_?6e63rvsYl>1onO-C%G8Go!fnx7FVJwRO&F-~ZCr_J(%pXM@uq+5+e<+FJLe zen3927l96Q$`OS?>y2gTX=~FqyWzz2iM1Z#1mstWBVuK_A#n+j%|}~I1k}HgRPeT8Ca7Y z@bkiqpa^8|Ct}g^h#Tj=BW_R42vq-MtMxF5)BSw{anxtYUFxh>rusm2S^?{?*IDk# z1tmD(B~-Qo^_Ftyyw+2N=_{|Mdgtuu&CZFkG4}t~IrkCW_tg%%FQ>X&m|%W&6MYHw fw$fLd=s9%bJM(?0bZv23@i*Q8JJ!${A60juo5UZli9a1X?o>eW(CG?(`W5v*5Pi} zY)oJmS5{_f6W`9Yk2#)kI$s(S+w`ja%(m=ijfrgHp32iN9A=W3{&X+16c4i@11HdY z1_rmr`04*wGRsWAwx3xGWaW9F>8H1~FtKnfX8FT0m4)RnQ)BYhlPu=U+j