diff --git a/src/calibre/db/backend.py b/src/calibre/db/backend.py index a8658bbf63..10795fe296 100644 --- a/src/calibre/db/backend.py +++ b/src/calibre/db/backend.py @@ -977,6 +977,12 @@ class DB: def get_notes_resource(self, resource_hash) -> bytes: return self.notes.get_resource(resource_hash) + def notes_resources_used_by(self, field, item_id): + conn = self.conn + note_id = self.notes.note_id_for(conn, field, item_id) + if note_id is not None: + yield from self.notes.resources_used_by(conn, note_id) + def initialize_fts(self, dbref): self.fts = None if not self.prefs['fts_enabled']: diff --git a/src/calibre/db/cache.py b/src/calibre/db/cache.py index 7f465f15a8..c641da08fe 100644 --- a/src/calibre/db/cache.py +++ b/src/calibre/db/cache.py @@ -685,6 +685,9 @@ class Cache: def get_notes_resource(self, resource_hash) -> bytes: return self.backend.get_notes_resource(resource_hash) + + def notes_resources_used_by(self, field, item_id): + return frozenset(self.backend.notes_resources_used_by(field, item_id)) # }}} # Cache Layer API {{{ diff --git a/src/calibre/db/notes/connect.py b/src/calibre/db/notes/connect.py index eca6054455..c16f9b817b 100644 --- a/src/calibre/db/notes/connect.py +++ b/src/calibre/db/notes/connect.py @@ -43,6 +43,8 @@ def remove_with_retry(x): class Notes: + max_retired_items = 256 + def __init__(self, backend): conn = backend.get_connection() libdir = os.path.dirname(os.path.abspath(conn.db_filename('main'))) @@ -86,11 +88,12 @@ class Notes: conn.executemany(''' DELETE FROM notes_db.notes_resources_link WHERE note=? AND hash=? ''', tuple((note_id, x) for x in resources_to_potentially_remove)) - for (x,) in conn.execute( - ''' - SELECT value FROM (VALUES {}) AS my_values(value) WHERE value NOT IN (SELECT hash FROM notes_db.notes_resources_link) - '''.format(','.join(repeat('(?)', len(resources_to_potentially_remove)))), resources_to_potentially_remove): - remove_with_retry(self.path_for_resource(x)) + stmt = ''' + WITH resources_table(value) AS (VALUES {}) + SELECT value FROM resources_table WHERE value NOT IN (SELECT hash FROM notes_db.notes_resources_link) + '''.format(','.join(repeat('(?)', len(resources_to_potentially_remove)))) + for (x,) in conn.execute(stmt, resources_to_potentially_remove): + remove_with_retry(self.path_for_resource(x)) def note_id_for(self, conn, field_name, item_id): for (ans,) in conn.execute('SELECT id FROM notes_db.notes WHERE item=? AND colname=?', (item_id, field_name)): @@ -117,7 +120,7 @@ class Notes: if os.path.exists(path): dest = make_long_path_useable(os.path.join(self.retired_dir, f'{item_id}_{field_name}')) os.replace(path, dest) - self.trim_retire_dir() + self.trim_retired_dir() def set_note(self, conn, field_name, item_id, marked_up_text='', hashes_of_used_resources=(), searchable_text=copy_marked_up_text): if searchable_text is copy_marked_up_text: @@ -134,15 +137,18 @@ class Notes: new_resources = frozenset(hashes_of_used_resources) resources_to_potentially_remove = old_resources - new_resources resources_to_add = new_resources - old_resources - inserted_id, = next(conn.execute(''' - INSERT OR REPLACE INTO notes_db.notes (item,colname,doc,searchable_text) VALUES (?,?,?,?) RETURNING id; - ''', (item_id, field_name, marked_up_text, searchable_text))) + if note_id is None: + note_id, = next(conn.execute(''' + INSERT INTO notes_db.notes (item,colname,doc,searchable_text) VALUES (?,?,?,?) RETURNING id; + ''', (item_id, field_name, marked_up_text, searchable_text))) + else: + conn.execute('UPDATE notes_db.notes SET doc=?,searchable_text=?', (marked_up_text, searchable_text)) if resources_to_potentially_remove: - self.remove_resources(conn, inserted_id, resources_to_potentially_remove) + self.remove_resources(conn, note_id, resources_to_potentially_remove) if resources_to_add: conn.executemany(''' INSERT INTO notes_db.notes_resources_link (note,hash) VALUES (?,?); - ''', tuple((inserted_id, x) for x in resources_to_add)) + ''', tuple((note_id, x) for x in resources_to_add)) self.set_backup_for(field_name, item_id, marked_up_text, searchable_text) return note_id diff --git a/src/calibre/db/tests/notes.py b/src/calibre/db/tests/notes.py index d48f0112ee..c76435b004 100644 --- a/src/calibre/db/tests/notes.py +++ b/src/calibre/db/tests/notes.py @@ -2,18 +2,38 @@ # License: GPLv3 Copyright: 2023, Kovid Goyal +import os + from calibre.db.tests.base import BaseTest + class NotesTest(BaseTest): ae = BaseTest.assertEqual def test_notes(self): - cache = self.init_cache() + + def create(): + cache = self.init_cache() + cache.backend.notes.max_retired_items = 1 + return cache, cache.backend.notes + + cache, notes = create() authors = sorted(cache.all_field_ids('authors')) self.ae(cache.notes_for('authors', authors[0]), '') doc = 'simple notes for an author' h1 = cache.add_notes_resource(b'resource1') h2 = cache.add_notes_resource(b'resource2') - cache.set_notes_for('authors', authors[0], doc, resource_hashes=(h1, h2)) + note_id = cache.set_notes_for('authors', authors[0], doc, resource_hashes=(h1, h2)) self.ae(cache.notes_for('authors', authors[0]), doc) + self.ae(cache.notes_resources_used_by('authors', authors[0]), frozenset({h1, h2})) + self.ae(cache.get_notes_resource(h1), b'resource1') + self.ae(cache.get_notes_resource(h2), b'resource2') + doc2 = 'a different note to replace the first one' + self.ae(note_id, cache.set_notes_for('authors', authors[0], doc2, resource_hashes=(h1,))) + self.ae(cache.notes_for('authors', authors[0]), doc2) + self.ae(cache.notes_resources_used_by('authors', authors[0]), frozenset({h1})) + self.ae(cache.get_notes_resource(h1), b'resource1') + self.ae(cache.get_notes_resource(h2), b'') + self.assertTrue(os.path.exists(notes.path_for_resource(h1))) + self.assertFalse(os.path.exists(notes.path_for_resource(h2)))