diff --git a/resources/notes_sqlite.sql b/resources/notes_sqlite.sql index 729d04f3bf..ac0143e841 100644 --- a/resources/notes_sqlite.sql +++ b/resources/notes_sqlite.sql @@ -6,16 +6,16 @@ CREATE TABLE notes_db.notes ( id INTEGER PRIMARY KEY AUTOINCREMENT, UNIQUE(item, colname) ); -CREATE TABLE notes_db.resources ( id INTEGER PRIMARY KEY AUTOINCREMENT, - hash TEXT NOT NULL UNIQUE ON CONFLICT FAIL, +CREATE TABLE notes_db.resources ( + hash TEXT NOT NULL PRIMARY KEY ON CONFLICT FAIL, name TEXT NOT NULL UNIQUE ON CONFLICT FAIL -); +) WITHOUT ROWID; CREATE TABLE notes_db.notes_resources_link ( id INTEGER PRIMARY KEY, note INTEGER NOT NULL, - resource INTEGER NOT NULL, + resource TEXT NOT NULL, FOREIGN KEY(note) REFERENCES notes(id), - FOREIGN KEY(resource) REFERENCES resources(id), + FOREIGN KEY(resource) REFERENCES resources(hash), UNIQUE(note, resource) ); @@ -45,7 +45,7 @@ END; CREATE TRIGGER notes_db.notes_db_resources_delete_trg BEFORE DELETE ON notes_db.resources BEGIN - DELETE FROM notes_resources_link WHERE resource=OLD.id; + DELETE FROM notes_resources_link WHERE resource=OLD.hash; END; PRAGMA notes_db.user_version=1; diff --git a/src/calibre/db/backend.py b/src/calibre/db/backend.py index 922536a484..fd5f0b6d84 100644 --- a/src/calibre/db/backend.py +++ b/src/calibre/db/backend.py @@ -972,9 +972,9 @@ class DB: def notes_for(self, field_name, item_id): return self.notes.get_note(self.conn, field_name, item_id) or '' - def set_notes_for(self, field, item_id, doc: str, searchable_text: str, resource_ids) -> int: + def set_notes_for(self, field, item_id, doc: str, searchable_text: str, resource_hashes) -> int: id_val = self.tables[field].id_map[item_id] - return self.notes.set_note(self.conn, field, item_id, id_val, doc, resource_ids, searchable_text) + return self.notes.set_note(self.conn, field, item_id, id_val, doc, resource_hashes, searchable_text) def unretire_note_for(self, field, item_id) -> int: id_val = self.tables[field].id_map[item_id] @@ -983,8 +983,8 @@ class DB: def add_notes_resource(self, path_or_stream, name) -> int: return self.notes.add_resource(self.conn, path_or_stream, name) - def get_notes_resource(self, resource_id) -> Optional[dict]: - return self.notes.get_resource_data(self.conn, resource_id) + def get_notes_resource(self, resource_hash) -> Optional[dict]: + return self.notes.get_resource_data(self.conn, resource_hash) def notes_resources_used_by(self, field, item_id): conn = self.conn diff --git a/src/calibre/db/cache.py b/src/calibre/db/cache.py index 667c727832..5e4c0883f8 100644 --- a/src/calibre/db/cache.py +++ b/src/calibre/db/cache.py @@ -681,16 +681,16 @@ class Cache: return self.backend.notes_for(field, item_id) @write_api - def set_notes_for(self, field, item_id, doc: str, searchable_text: str = copy_marked_up_text, resource_ids=()) -> int: - return self.backend.set_notes_for(field, item_id, doc, searchable_text, resource_ids) + def set_notes_for(self, field, item_id, doc: str, searchable_text: str = copy_marked_up_text, resource_hashes=()) -> int: + return self.backend.set_notes_for(field, item_id, doc, searchable_text, resource_hashes) @write_api def add_notes_resource(self, path_or_stream_or_data, name: str) -> int: return self.backend.add_notes_resource(path_or_stream_or_data, name) @read_api - def get_notes_resource(self, resource_id) -> Optional[dict]: - return self.backend.get_notes_resource(resource_id) + def get_notes_resource(self, resource_hash) -> Optional[dict]: + return self.backend.get_notes_resource(resource_hash) @read_api def notes_resources_used_by(self, field, item_id): diff --git a/src/calibre/db/notes/connect.py b/src/calibre/db/notes/connect.py index 5c9f1c2d53..c07c2c5cb8 100644 --- a/src/calibre/db/notes/connect.py +++ b/src/calibre/db/notes/connect.py @@ -8,7 +8,7 @@ import time import xxhash from contextlib import suppress from itertools import count, repeat -from typing import Optional, Union +from typing import Optional from calibre.constants import iswindows from calibre.db import FTSQueryError @@ -103,15 +103,14 @@ class Notes: self.remove_unreferenced_resources(conn) def remove_unreferenced_resources(self, conn): - for (rhash,) in conn.get('SELECT hash FROM notes_db.resources WHERE id NOT IN (SELECT resource FROM notes_db.notes_resources_link)'): + found = False + for (rhash,) in conn.get('SELECT hash FROM notes_db.resources WHERE hash NOT IN (SELECT resource FROM notes_db.notes_resources_link)'): + found = True remove_with_retry(self.path_for_resource(conn, rhash)) - conn.execute('DELETE FROM notes_db.resources WHERE id NOT IN (SELECT resource FROM notes_db.notes_resources_link)') + if found: + conn.execute('DELETE FROM notes_db.resources WHERE hash NOT IN (SELECT resource FROM notes_db.notes_resources_link)') - def path_for_resource(self, conn, resource_hash_or_resource_id: Union[str,int]) -> str: - if isinstance(resource_hash_or_resource_id, str): - resource_hash = resource_hash_or_resource_id - else: - resource_hash = conn.get('SELECT hash FROM notes_db.resources WHERE id=?', (resource_hash_or_resource_id,), all=False) + def path_for_resource(self, conn, resource_hash: str) -> str: hashalg, digest = resource_hash.split(':', 1) prefix = digest[:2] # Cant use colons in filenames on windows safely @@ -188,7 +187,7 @@ class Notes: remove_with_retry(srcdir, is_dir=True) return note_id - def set_note(self, conn, field_name, item_id, item_value, marked_up_text='', used_resource_ids=(), searchable_text=copy_marked_up_text): + def set_note(self, conn, field_name, item_id, item_value, marked_up_text='', used_resource_hashes=(), searchable_text=copy_marked_up_text): if searchable_text is copy_marked_up_text: searchable_text = marked_up_text note_id = self.note_id_for(conn, field_name, item_id) @@ -199,13 +198,13 @@ class Notes: resources = () if old_resources: resources = conn.get( - 'SELECT hash,name FROM notes_db.resources WHERE id IN ({})'.format(','.join(repeat('?', len(old_resources)))), + 'SELECT hash,name FROM notes_db.resources WHERE hash IN ({})'.format(','.join(repeat('?', len(old_resources)))), tuple(old_resources)) self.retire_entry(field_name, item_id, item_value, resources, note_id) if old_resources: self.remove_resources(conn, note_id, old_resources, delete_from_link_table=False) return -1 - new_resources = frozenset(used_resource_ids) + new_resources = frozenset(used_resource_hashes) resources_to_potentially_remove = old_resources - new_resources resources_to_add = new_resources - old_resources if note_id is None: @@ -232,7 +231,7 @@ class Notes: ): return { 'id': note_id, 'doc': doc, 'searchable_text': searchable_text, - 'resource_ids': frozenset(self.resources_used_by(conn, note_id)), + 'resource_hashes': frozenset(self.resources_used_by(conn, note_id)), } def rename_note(self, conn, field_name, old_item_id, new_item_id, new_item_value): @@ -245,7 +244,7 @@ class Notes: old_note = self.get_note_data(conn, field_name, old_item_id) if not old_note or not old_note['doc']: return - self.set_note(conn, field_name, new_item_id, new_item_value, old_note['doc'], old_note['resource_ids'], old_note['searchable_text']) + self.set_note(conn, field_name, new_item_id, new_item_value, old_note['doc'], old_note['resource_hashes'], old_note['searchable_text']) def trim_retired_dir(self): items = [] @@ -290,11 +289,11 @@ class Notes: f.write(data) base_name, ext = os.path.splitext(name) c = 0 - for (resource_id, existing_name) in conn.execute('SELECT id,name FROM notes_db.resources WHERE hash=?', (resource_hash,)): + for (existing_name,) in conn.execute('SELECT name FROM notes_db.resources WHERE hash=?', (resource_hash,)): if existing_name != name and update_name: while True: try: - conn.execute('UPDATE notes_db.resources SET name=? WHERE id=?', (name, resource_id)) + conn.execute('UPDATE notes_db.resources SET name=? WHERE hash=?', (name, resource_hash)) break except apsw.ConstraintError: c += 1 @@ -303,15 +302,15 @@ class Notes: else: while True: try: - resource_id = conn.get('INSERT INTO notes_db.resources (hash,name) VALUES (?,?) RETURNING id', (resource_hash, name), all=False) + conn.get('INSERT INTO notes_db.resources (hash,name) VALUES (?,?)', (resource_hash, name), all=False) break except apsw.ConstraintError: c += 1 name = f'{base_name}-{c}{ext}' - return resource_id + return resource_hash - def get_resource_data(self, conn, resource_id) -> Optional[dict]: - for (name, resource_hash) in conn.execute('SELECT name,hash FROM notes_db.resources WHERE id=?', (resource_id,)): + def get_resource_data(self, conn, resource_hash) -> Optional[dict]: + for (name,) in conn.execute('SELECT name FROM notes_db.resources WHERE hash=?', (resource_hash,)): path = self.path_for_resource(conn, resource_hash) path = make_long_path_useable(path) os.listdir(os.path.dirname(path)) diff --git a/src/calibre/db/tests/filesystem.py b/src/calibre/db/tests/filesystem.py index 085e783427..4784236ce5 100644 --- a/src/calibre/db/tests/filesystem.py +++ b/src/calibre/db/tests/filesystem.py @@ -266,7 +266,7 @@ class FilesystemTest(BaseTest): self.assertEqual('recurse', open(os.path.join(bookdir, 'sub', 'recurse')).read()) r1 = cache.add_notes_resource(b'res1', 'res.jpg') r2 = cache.add_notes_resource(b'res2', 'res.jpg') - cache.set_notes_for('authors', 2, 'some notes', resource_ids=(r1, r2)) + cache.set_notes_for('authors', 2, 'some notes', resource_hashes=(r1, r2)) cache.add_format(1, 'TXT', BytesIO(b'testing exim')) cache.fts_indexing_sleep_time = 0.001 cache.enable_fts() diff --git a/src/calibre/db/tests/notes.py b/src/calibre/db/tests/notes.py index 94c9fb1ccf..01ba62fc4a 100644 --- a/src/calibre/db/tests/notes.py +++ b/src/calibre/db/tests/notes.py @@ -16,13 +16,13 @@ def test_notes_api(self: 'NotesTest'): h2 = cache.add_notes_resource(b'resource2', 'r1.jpg') self.ae(cache.get_notes_resource(h1)['name'], 'r1.jpg') self.ae(cache.get_notes_resource(h2)['name'], 'r1-1.jpg') - note_id = cache.set_notes_for('authors', authors[0], doc, resource_ids=(h1, h2)) + note_id = cache.set_notes_for('authors', authors[0], doc, resource_hashes=(h1, h2)) self.ae(cache.notes_for('authors', authors[0]), doc) self.ae(cache.notes_resources_used_by('authors', authors[0]), frozenset({h1, h2})) self.ae(cache.get_notes_resource(h1)['data'], b'resource1') self.ae(cache.get_notes_resource(h2)['data'], b'resource2') doc2 = 'a different note to replace the first one' - self.ae(note_id, cache.set_notes_for('authors', authors[0], doc2, resource_ids=(h1,))) + self.ae(note_id, cache.set_notes_for('authors', authors[0], doc2, resource_hashes=(h1,))) self.ae(cache.notes_for('authors', authors[0]), doc2) self.ae(cache.notes_resources_used_by('authors', authors[0]), frozenset({h1})) self.ae(cache.get_notes_resource(h1)['data'], b'resource1') @@ -32,7 +32,7 @@ def test_notes_api(self: 'NotesTest'): # check retirement h2 = cache.add_notes_resource(b'resource2', 'r1.jpg') - self.ae(note_id, cache.set_notes_for('authors', authors[0], doc2, resource_ids=(h1,h2))) + self.ae(note_id, cache.set_notes_for('authors', authors[0], doc2, resource_hashes=(h1,h2))) self.ae(-1, cache.set_notes_for('authors', authors[0], '')) self.ae(cache.notes_for('authors', authors[0]), '') self.ae(cache.notes_resources_used_by('authors', authors[0]), frozenset()) @@ -41,7 +41,7 @@ def test_notes_api(self: 'NotesTest'): h1 = cache.add_notes_resource(b'resource1', 'r1.jpg') h2 = cache.add_notes_resource(b'resource2', 'r1.jpg') - nnote_id = cache.set_notes_for('authors', authors[1], doc, resource_ids=(h1, h2)) + nnote_id = cache.set_notes_for('authors', authors[1], doc, resource_hashes=(h1, h2)) self.assertNotEqual(note_id, nnote_id) self.ae(-1, cache.set_notes_for('authors', authors[1], '')) after = os.listdir(notes.retired_dir) @@ -63,7 +63,7 @@ def test_cache_api(self: 'NotesTest'): doc = 'simple notes for an author' h1 = cache.add_notes_resource(b'resource1', 'r1.jpg') h2 = cache.add_notes_resource(b'resource2', 'r1.jpg') - cache.set_notes_for('authors', author_id, doc, resource_ids=(h1, h2)) + cache.set_notes_for('authors', author_id, doc, resource_hashes=(h1, h2)) # test renaming to a new author preserves notes cache.rename_items('authors', {author_id: 'renamed author'}) raid = cache.get_item_id('authors', 'renamed author') @@ -92,7 +92,7 @@ def test_cache_api(self: 'NotesTest'): tag_id = cache.get_item_id('#tags', tags[0]) h1 = cache.add_notes_resource(b'resource1t', 'r1.jpg') h2 = cache.add_notes_resource(b'resource2t', 'r1.jpg') - cache.set_notes_for('#tags', tag_id, doc, resource_ids=(h1, h2)) + cache.set_notes_for('#tags', tag_id, doc, resource_hashes=(h1, h2)) self.ae(cache.notes_for('#tags', tag_id), doc) cache.delete_custom_column('tags') cache.close()