mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Initial implementation of FTS on annotations
This commit is contained in:
parent
cda8dffc3a
commit
5916fd1f5d
@ -151,10 +151,34 @@ CREATE TABLE annotations ( id INTEGER PRIMARY KEY,
|
|||||||
annot_id TEXT NOT NULL,
|
annot_id TEXT NOT NULL,
|
||||||
annot_type TEXT NOT NULL,
|
annot_type TEXT NOT NULL,
|
||||||
annot_data TEXT NOT NULL,
|
annot_data TEXT NOT NULL,
|
||||||
searchable_text TEXT NOT NULL,
|
searchable_text TEXT NOT NULL DEFAULT "",
|
||||||
UNIQUE(book, user_type, user, format, annot_type, annot_id)
|
UNIQUE(book, user_type, user, format, annot_type, annot_id)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
CREATE VIRTUAL TABLE annotations_fts USING fts5(searchable_text, content = 'annotations', content_rowid = 'id', tokenize = 'unicode61 remove_diacritics 2');
|
||||||
|
CREATE VIRTUAL TABLE annotations_fts_stemmed USING fts5(searchable_text, content = 'annotations', content_rowid = 'id', tokenize = 'porter unicode61 remove_diacritics 2');
|
||||||
|
|
||||||
|
CREATE TRIGGER annotations_fts_insert_trg AFTER INSERT ON annotations
|
||||||
|
BEGIN
|
||||||
|
INSERT INTO annotations_fts(rowid, searchable_text) VALUES (NEW.id, NEW.searchable_text);
|
||||||
|
INSERT INTO annotations_fts_stemmed(rowid, searchable_text) VALUES (NEW.id, NEW.searchable_text);
|
||||||
|
END;
|
||||||
|
|
||||||
|
CREATE TRIGGER annotations_fts_delete_trg AFTER DELETE ON annotations
|
||||||
|
BEGIN
|
||||||
|
INSERT INTO annotations_fts(annotations_fts, rowid, searchable_text) VALUES('delete', OLD.id, OLD.searchable_text);
|
||||||
|
INSERT INTO annotations_fts_stemmed(annotations_fts_stemmed, rowid, searchable_text) VALUES('delete', OLD.id, OLD.searchable_text);
|
||||||
|
END;
|
||||||
|
|
||||||
|
CREATE TRIGGER annotations_fts_update_trg AFTER UPDATE ON annotations
|
||||||
|
BEGIN
|
||||||
|
INSERT INTO annotations_fts(annotations_fts, rowid, searchable_text) VALUES('delete', OLD.id, OLD.searchable_text);
|
||||||
|
INSERT INTO annotations_fts(rowid, searchable_text) VALUES (NEW.id, NEW.searchable_text);
|
||||||
|
INSERT INTO annotations_fts_stemmed(annotations_fts_stemmed, rowid, searchable_text) VALUES('delete', OLD.id, OLD.searchable_text);
|
||||||
|
INSERT INTO annotations_fts_stemmed(rowid, searchable_text) VALUES (NEW.id, NEW.searchable_text);
|
||||||
|
END;
|
||||||
|
|
||||||
|
|
||||||
CREATE VIEW meta AS
|
CREATE VIEW meta AS
|
||||||
SELECT id, title,
|
SELECT id, title,
|
||||||
(SELECT sortconcat(bal.id, name) FROM books_authors_link AS bal JOIN authors ON(author = authors.id) WHERE book = books.id) authors,
|
(SELECT sortconcat(bal.id, name) FROM books_authors_link AS bal JOIN authors ON(author = authors.id) WHERE book = books.id) authors,
|
||||||
|
@ -307,7 +307,7 @@ def save_annotations_for_book(cursor, book_id, fmt, annots_list, user_type='loca
|
|||||||
text = annot.get('highlighed_text') or ''
|
text = annot.get('highlighed_text') or ''
|
||||||
notes = annot.get('notes') or ''
|
notes = annot.get('notes') or ''
|
||||||
if notes:
|
if notes:
|
||||||
text += '0x1f\n\n' + notes
|
text += '\n0x1f\n' + notes
|
||||||
else:
|
else:
|
||||||
continue
|
continue
|
||||||
data.append((book_id, fmt, user_type, user, timestamp_in_secs, aid, atype, json.dumps(annot), text))
|
data.append((book_id, fmt, user_type, user, timestamp_in_secs, aid, atype, json.dumps(annot), text))
|
||||||
@ -1774,6 +1774,25 @@ class DB(object):
|
|||||||
for x in annotations_for_book(self.conn, book_id, fmt, user_type, user):
|
for x in annotations_for_book(self.conn, book_id, fmt, user_type, user):
|
||||||
yield x
|
yield x
|
||||||
|
|
||||||
|
def search_annotations(self,
|
||||||
|
fts_engine_query, use_stemming, highlight_start, highlight_end, annotation_type,
|
||||||
|
restrict_to_book_ids, restrict_to_user
|
||||||
|
):
|
||||||
|
fts_table = 'annotations_fts_stemmed' if use_stemming else 'annotations_fts'
|
||||||
|
query = 'SELECT {0}.id, {0}.book, {0}.format, {0}.user_type, {0}.user, {0}.annot_data FROM {0} '
|
||||||
|
query = query.format('annotations')
|
||||||
|
query += ' JOIN {fts_table} ON annotations.id = {fts_table}.rowid'.format(fts_table=fts_table)
|
||||||
|
query += ' WHERE {fts_table} MATCH ?'.format(fts_table=fts_table)
|
||||||
|
data = [fts_engine_query]
|
||||||
|
if restrict_to_user:
|
||||||
|
query += ' AND annotations.user_type = ? AND annotations.user = ?'
|
||||||
|
data += list(*restrict_to_user)
|
||||||
|
if annotation_type:
|
||||||
|
query += ' AND annotations.annot_type = ? '
|
||||||
|
data.append(annotation_type)
|
||||||
|
for (rowid, book_id, fmt, user_type, user, annot_data) in self.execute(query, tuple(data)):
|
||||||
|
yield {'id': rowid, 'book_id': book_id, 'format': fmt, 'user_type': user_type, 'user': user, 'annotation': annot_data}
|
||||||
|
|
||||||
def all_annotations_for_book(self, book_id):
|
def all_annotations_for_book(self, book_id):
|
||||||
for (fmt, user_type, user, data) in self.execute('SELECT format, user_type, user, annot_data FROM annotations WHERE book=?', (book_id,)):
|
for (fmt, user_type, user, data) in self.execute('SELECT format, user_type, user, annot_data FROM annotations WHERE book=?', (book_id,)):
|
||||||
try:
|
try:
|
||||||
|
@ -2301,6 +2301,22 @@ class Cache(object):
|
|||||||
def all_annotations_for_book(self, book_id):
|
def all_annotations_for_book(self, book_id):
|
||||||
return tuple(self.backend.all_annotations_for_book(book_id))
|
return tuple(self.backend.all_annotations_for_book(book_id))
|
||||||
|
|
||||||
|
@read_api
|
||||||
|
def search_annotations(
|
||||||
|
self,
|
||||||
|
fts_engine_query,
|
||||||
|
use_stemming=True,
|
||||||
|
highlight_start=None,
|
||||||
|
highlight_end=None,
|
||||||
|
annotation_type=None,
|
||||||
|
restrict_to_book_ids=None,
|
||||||
|
restrict_to_user=None,
|
||||||
|
):
|
||||||
|
return tuple(self.backend.search_annotations(
|
||||||
|
fts_engine_query, use_stemming, highlight_start, highlight_end,
|
||||||
|
annotation_type, restrict_to_book_ids, restrict_to_user
|
||||||
|
))
|
||||||
|
|
||||||
@write_api
|
@write_api
|
||||||
def restore_annotations(self, book_id, annotations):
|
def restore_annotations(self, book_id, annotations):
|
||||||
from calibre.utils.iso8601 import parse_iso8601
|
from calibre.utils.iso8601 import parse_iso8601
|
||||||
|
@ -714,13 +714,44 @@ CREATE TABLE annotations ( id INTEGER PRIMARY KEY,
|
|||||||
annot_id TEXT NOT NULL,
|
annot_id TEXT NOT NULL,
|
||||||
annot_type TEXT NOT NULL,
|
annot_type TEXT NOT NULL,
|
||||||
annot_data TEXT NOT NULL,
|
annot_data TEXT NOT NULL,
|
||||||
searchable_text TEXT NOT NULL,
|
searchable_text TEXT NOT NULL DEFAULT "",
|
||||||
UNIQUE(book, user_type, user, format, annot_type, annot_id)
|
UNIQUE(book, user_type, user, format, annot_type, annot_id)
|
||||||
);
|
);
|
||||||
|
|
||||||
DROP INDEX IF EXISTS annot_idx;
|
DROP INDEX IF EXISTS annot_idx;
|
||||||
CREATE INDEX annot_idx ON annotations (book);
|
CREATE INDEX annot_idx ON annotations (book);
|
||||||
|
|
||||||
|
DROP TABLE IF EXISTS annotations_fts;
|
||||||
|
DROP TABLE IF EXISTS annotations_fts_stemmed;
|
||||||
|
CREATE VIRTUAL TABLE annotations_fts USING fts5(searchable_text,
|
||||||
|
content = 'annotations', content_rowid = 'id', tokenize = 'unicode61 remove_diacritics 2');
|
||||||
|
CREATE VIRTUAL TABLE annotations_fts_stemmed USING fts5(searchable_text,
|
||||||
|
content = 'annotations', content_rowid = 'id', tokenize = 'porter unicode61 remove_diacritics 2');
|
||||||
|
|
||||||
|
DROP TRIGGER IF EXISTS annotations_fts_insert_trg;
|
||||||
|
CREATE TRIGGER annotations_fts_insert_trg AFTER INSERT ON annotations
|
||||||
|
BEGIN
|
||||||
|
INSERT INTO annotations_fts(rowid, searchable_text) VALUES (NEW.id, NEW.searchable_text);
|
||||||
|
INSERT INTO annotations_fts_stemmed(rowid, searchable_text) VALUES (NEW.id, NEW.searchable_text);
|
||||||
|
END;
|
||||||
|
|
||||||
|
DROP TRIGGER IF EXISTS annotations_fts_delete_trg;
|
||||||
|
CREATE TRIGGER annotations_fts_delete_trg AFTER DELETE ON annotations
|
||||||
|
BEGIN
|
||||||
|
INSERT INTO annotations_fts(annotations_fts, rowid, searchable_text) VALUES('delete', OLD.id, OLD.searchable_text);
|
||||||
|
INSERT INTO annotations_fts_stemmed(annotations_fts_stemmed, rowid, searchable_text) VALUES('delete', OLD.id, OLD.searchable_text);
|
||||||
|
END;
|
||||||
|
|
||||||
|
DROP TRIGGER IF EXISTS annotations_fts_update_trg;
|
||||||
|
CREATE TRIGGER annotations_fts_update_trg AFTER UPDATE ON annotations
|
||||||
|
BEGIN
|
||||||
|
INSERT INTO annotations_fts(annotations_fts, rowid, searchable_text) VALUES('delete', OLD.id, OLD.searchable_text);
|
||||||
|
INSERT INTO annotations_fts(rowid, searchable_text) VALUES (NEW.id, NEW.searchable_text);
|
||||||
|
INSERT INTO annotations_fts_stemmed(annotations_fts_stemmed, rowid, searchable_text) VALUES('delete', OLD.id, OLD.searchable_text);
|
||||||
|
INSERT INTO annotations_fts_stemmed(rowid, searchable_text) VALUES (NEW.id, NEW.searchable_text);
|
||||||
|
END;
|
||||||
|
|
||||||
|
|
||||||
DROP TRIGGER IF EXISTS books_delete_trg;
|
DROP TRIGGER IF EXISTS books_delete_trg;
|
||||||
CREATE TRIGGER books_delete_trg
|
CREATE TRIGGER books_delete_trg
|
||||||
AFTER DELETE ON books
|
AFTER DELETE ON books
|
||||||
|
Binary file not shown.
@ -776,9 +776,9 @@ class WritingTest(BaseTest):
|
|||||||
return kw, (ts - EPOCH).total_seconds()
|
return kw, (ts - EPOCH).total_seconds()
|
||||||
|
|
||||||
annot_list = [
|
annot_list = [
|
||||||
a(type='bookmark', title='bookmark1', seq=1),
|
a(type='bookmark', title='bookmark1 changed', seq=1),
|
||||||
a(type='highlight', highlighted_text='text1', uuid='1', seq=2),
|
a(type='highlight', highlighted_text='text1', uuid='1', seq=2),
|
||||||
a(type='highlight', highlighted_text='text2', uuid='2', seq=3, notes='notes2'),
|
a(type='highlight', highlighted_text='text2', uuid='2', seq=3, notes='notes2 some word changed'),
|
||||||
]
|
]
|
||||||
|
|
||||||
def map_as_list(amap):
|
def map_as_list(amap):
|
||||||
@ -798,6 +798,16 @@ class WritingTest(BaseTest):
|
|||||||
cache.check_dirtied_annotations()
|
cache.check_dirtied_annotations()
|
||||||
self.assertFalse(cache.dirtied_cache)
|
self.assertFalse(cache.dirtied_cache)
|
||||||
|
|
||||||
|
# Test searching
|
||||||
|
results = cache.search_annotations('"changed"')
|
||||||
|
self.assertEqual([1, 3], [x['id'] for x in results])
|
||||||
|
results = cache.search_annotations('"changed"', annotation_type='bookmark')
|
||||||
|
self.assertEqual([1], [x['id'] for x in results])
|
||||||
|
results = cache.search_annotations('"Change"')
|
||||||
|
self.assertEqual([1, 3], [x['id'] for x in results])
|
||||||
|
results = cache.search_annotations('"change"', use_stemming=False)
|
||||||
|
self.assertEqual([], [x['id'] for x in results])
|
||||||
|
|
||||||
annot_list[0][0]['title'] = 'changed title'
|
annot_list[0][0]['title'] = 'changed title'
|
||||||
cache.set_annotations_for_book(1, 'moo', annot_list)
|
cache.set_annotations_for_book(1, 'moo', annot_list)
|
||||||
amap = cache.annotations_map_for_book(1, 'moo')
|
amap = cache.annotations_map_for_book(1, 'moo')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user