diff --git a/resources/notes_sqlite.sql b/resources/notes_sqlite.sql index bc517b3e9e..9b53737b98 100644 --- a/resources/notes_sqlite.sql +++ b/resources/notes_sqlite.sql @@ -6,10 +6,17 @@ CREATE TABLE notes_db.notes ( id INTEGER PRIMARY KEY, UNIQUE(item, colname) ); +CREATE TABLE notes_db.resources ( id INTEGER PRIMARY KEY, + hash TEXT NOT NULL UNIQUE ON CONFLICT FAIL, + name TEXT NOT NULL UNIQUE ON CONFLICT FAIL +); + CREATE TABLE notes_db.notes_resources_link ( id INTEGER PRIMARY KEY, note INTEGER NOT NULL, - hash TEXT NOT NULL, - UNIQUE(note, hash) + resource INTEGER NOT NULL, + FOREIGN KEY(note) REFERENCES notes(id), + FOREIGN KEY(resource) REFERENCES resources(id), + UNIQUE(note, resource) ); CREATE VIRTUAL TABLE notes_db.notes_fts USING fts5(searchable_text, content = 'notes', content_rowid = 'id', tokenize = 'calibre remove_diacritics 2'); @@ -21,7 +28,7 @@ BEGIN INSERT INTO notes_fts_stemmed(rowid, searchable_text) VALUES (NEW.id, NEW.searchable_text); END; -CREATE TRIGGER notes_db.notes_db_notes_delete_trg AFTER DELETE ON notes_db.notes +CREATE TRIGGER notes_db.notes_db_notes_delete_trg BEFORE DELETE ON notes_db.notes BEGIN DELETE FROM notes_resources_link WHERE note=OLD.id; INSERT INTO notes_fts(notes_fts, rowid, searchable_text) VALUES('delete', OLD.id, OLD.searchable_text); @@ -36,5 +43,9 @@ BEGIN INSERT INTO notes_fts_stemmed(rowid, searchable_text) VALUES (NEW.id, NEW.searchable_text); END; +CREATE TRIGGER notes_db.notes_db_resources_delete_trg BEFORE DELETE ON notes_db.resources +BEGIN + DELETE FROM notes_resources_link WHERE resource=OLD.id; +END; PRAGMA notes_db.user_version=1; diff --git a/src/calibre/db/backend.py b/src/calibre/db/backend.py index 10795fe296..81a90abe98 100644 --- a/src/calibre/db/backend.py +++ b/src/calibre/db/backend.py @@ -17,6 +17,7 @@ import sys import time import uuid from contextlib import closing, suppress +from typing import Optional from functools import partial from calibre import as_unicode, force_unicode, isbytestring, prints @@ -339,10 +340,9 @@ class Connection(apsw.Connection): # {{{ self.fts_dbpath = self.notes_dbpath = None self.setbusytimeout(self.BUSY_TIMEOUT) - self.execute('pragma cache_size=-5000') - self.execute('pragma temp_store=2') + self.execute('PRAGMA cache_size=-5000; PRAGMA temp_store=2; PRAGMA foreign_keys=ON;') - encoding = next(self.execute('pragma encoding'))[0] + encoding = next(self.execute('PRAGMA encoding'))[0] self.createcollation('PYNOCASE', partial(pynocase, encoding=encoding)) @@ -968,14 +968,14 @@ class DB: def notes_for(self, field_name, item_id): return self.notes.get_note(self.conn, field_name, item_id) or '' - def set_notes_for(self, field, item_id, doc: str, searchable_text: str, resource_hashes) -> int: - return self.notes.set_note(self.conn, field, item_id, doc, resource_hashes, searchable_text) + def set_notes_for(self, field, item_id, doc: str, searchable_text: str, resource_ids) -> int: + return self.notes.set_note(self.conn, field, item_id, doc, resource_ids, searchable_text) - def add_notes_resource(self, path_or_stream) -> str: - return self.notes.add_resource(path_or_stream) + def add_notes_resource(self, path_or_stream, name) -> int: + return self.notes.add_resource(self.conn, path_or_stream, name) - def get_notes_resource(self, resource_hash) -> bytes: - return self.notes.get_resource(resource_hash) + def get_notes_resource(self, resource_id) -> Optional[dict]: + return self.notes.get_resource_data(self.conn, resource_id) def notes_resources_used_by(self, field, item_id): conn = self.conn @@ -1354,11 +1354,11 @@ class DB: @property def user_version(self): '''The user version of this database''' - return self.conn.get('pragma user_version;', all=False) + return self.conn.get('PRAGMA user_version;', all=False) @user_version.setter def user_version(self, val): - self.execute('pragma user_version=%d'%int(val)) + self.execute('PRAGMA user_version=%d'%int(val)) def initialize_database(self): metadata_sqlite = P('metadata_sqlite.sql', data=True, diff --git a/src/calibre/db/cache.py b/src/calibre/db/cache.py index c641da08fe..1b6b1435b2 100644 --- a/src/calibre/db/cache.py +++ b/src/calibre/db/cache.py @@ -20,7 +20,7 @@ from io import DEFAULT_BUFFER_SIZE, BytesIO from queue import Queue from threading import Lock from time import monotonic, sleep, time -from typing import NamedTuple, Tuple +from typing import NamedTuple, Tuple, Optional from calibre import as_unicode, detect_ncpus, isbytestring from calibre.constants import iswindows, preferred_encoding @@ -677,14 +677,14 @@ class Cache: def notes_for(self, field, item_id) -> str: return self.backend.notes_for(field, item_id) - def set_notes_for(self, field, item_id, doc: str, searchable_text: str = copy_marked_up_text, resource_hashes=()) -> int: - return self.backend.set_notes_for(field, item_id, doc, searchable_text, resource_hashes) + def set_notes_for(self, field, item_id, doc: str, searchable_text: str = copy_marked_up_text, resource_ids=()) -> int: + return self.backend.set_notes_for(field, item_id, doc, searchable_text, resource_ids) - def add_notes_resource(self, path_or_stream_or_data) -> str: - return self.backend.add_notes_resource(path_or_stream_or_data) + def add_notes_resource(self, path_or_stream_or_data, name: str) -> int: + return self.backend.add_notes_resource(path_or_stream_or_data, name) - def get_notes_resource(self, resource_hash) -> bytes: - return self.backend.get_notes_resource(resource_hash) + def get_notes_resource(self, resource_id) -> Optional[dict]: + return self.backend.get_notes_resource(resource_id) def notes_resources_used_by(self, field, item_id): return frozenset(self.backend.notes_resources_used_by(field, item_id)) diff --git a/src/calibre/db/notes/connect.py b/src/calibre/db/notes/connect.py index c16f9b817b..3aa65d65d1 100644 --- a/src/calibre/db/notes/connect.py +++ b/src/calibre/db/notes/connect.py @@ -1,9 +1,11 @@ #!/usr/bin/env python # License: GPLv3 Copyright: 2023, Kovid Goyal +import apsw import os import time import xxhash +from typing import Union, Optional from contextlib import suppress from itertools import repeat @@ -76,7 +78,11 @@ class Notes: SchemaUpgrade(conn, '\n'.join(triggers)) conn.notes_dbpath = dbpath - def path_for_resource(self, resource_hash: str) -> str: + def path_for_resource(self, conn, resource_hash_or_resource_id: Union[str,int]) -> str: + if isinstance(resource_hash_or_resource_id, str): + resource_hash = resource_hash_or_resource_id + else: + resource_hash = conn.get('SELECT hash FROM notes_db.resources WHERE id=?', (resource_hash_or_resource_id,), all=False) idx = resource_hash.index(':') prefix = resource_hash[idx + 1: idx + 3] return os.path.join(self.resources_dir, prefix, resource_hash) @@ -86,22 +92,21 @@ class Notes: resources_to_potentially_remove = tuple(resources_to_potentially_remove) if delete_from_link_table: conn.executemany(''' - DELETE FROM notes_db.notes_resources_link WHERE note=? AND hash=? + DELETE FROM notes_db.notes_resources_link WHERE note=? AND resource=? ''', tuple((note_id, x) for x in resources_to_potentially_remove)) stmt = ''' WITH resources_table(value) AS (VALUES {}) - SELECT value FROM resources_table WHERE value NOT IN (SELECT hash FROM notes_db.notes_resources_link) + SELECT value FROM resources_table WHERE value NOT IN (SELECT resource FROM notes_db.notes_resources_link) '''.format(','.join(repeat('(?)', len(resources_to_potentially_remove)))) for (x,) in conn.execute(stmt, resources_to_potentially_remove): - remove_with_retry(self.path_for_resource(x)) + remove_with_retry(self.path_for_resource(conn, x)) def note_id_for(self, conn, field_name, item_id): - for (ans,) in conn.execute('SELECT id FROM notes_db.notes WHERE item=? AND colname=?', (item_id, field_name)): - return ans + return conn.get('SELECT id FROM notes_db.notes WHERE item=? AND colname=?', (item_id, field_name), all=False) def resources_used_by(self, conn, note_id): if note_id is not None: - for (h,) in conn.execute('SELECT hash from notes_db.notes_resources_link WHERE note=?', (note_id,)): + for (h,) in conn.execute('SELECT resource from notes_db.notes_resources_link WHERE note=?', (note_id,)): yield h def set_backup_for(self, field_name, item_id, marked_up_text='', searchable_text=''): @@ -122,7 +127,7 @@ class Notes: os.replace(path, dest) self.trim_retired_dir() - def set_note(self, conn, field_name, item_id, marked_up_text='', hashes_of_used_resources=(), searchable_text=copy_marked_up_text): + def set_note(self, conn, field_name, item_id, marked_up_text='', used_resource_ids=(), searchable_text=copy_marked_up_text): if searchable_text is copy_marked_up_text: searchable_text = marked_up_text note_id = self.note_id_for(conn, field_name, item_id) @@ -134,27 +139,26 @@ class Notes: if old_resources: self.remove_resources(conn, note_id, old_resources, delete_from_link_table=False) return - new_resources = frozenset(hashes_of_used_resources) + new_resources = frozenset(used_resource_ids) resources_to_potentially_remove = old_resources - new_resources resources_to_add = new_resources - old_resources if note_id is None: - note_id, = next(conn.execute(''' + note_id = conn.get(''' INSERT INTO notes_db.notes (item,colname,doc,searchable_text) VALUES (?,?,?,?) RETURNING id; - ''', (item_id, field_name, marked_up_text, searchable_text))) + ''', (item_id, field_name, marked_up_text, searchable_text), all=False) else: conn.execute('UPDATE notes_db.notes SET doc=?,searchable_text=?', (marked_up_text, searchable_text)) if resources_to_potentially_remove: self.remove_resources(conn, note_id, resources_to_potentially_remove) if resources_to_add: conn.executemany(''' - INSERT INTO notes_db.notes_resources_link (note,hash) VALUES (?,?); + INSERT INTO notes_db.notes_resources_link (note,resource) VALUES (?,?); ''', tuple((note_id, x) for x in resources_to_add)) self.set_backup_for(field_name, item_id, marked_up_text, searchable_text) return note_id def get_note(self, conn, field_name, item_id): - for (doc,) in conn.execute('SELECT doc FROM notes_db.notes WHERE item=? AND colname=?', (item_id, field_name)): - return doc + return conn.get('SELECT doc FROM notes_db.notes WHERE item=? AND colname=?', (item_id, field_name), all=False) def get_note_data(self, conn, field_name, item_id): for (note_id, doc, searchable_text) in conn.execute( @@ -162,7 +166,7 @@ class Notes: ): return { 'id': note_id, 'doc': doc, 'searchable_text': searchable_text, - 'resource_hashes': frozenset(self.resources_used_by(conn, note_id)), + 'resource_ids': frozenset(self.resources_used_by(conn, note_id)), } def rename_note(self, conn, field_name, old_item_id, new_item_id): @@ -175,7 +179,7 @@ class Notes: old_note = self.get_note_data(conn, field_name, old_item_id) if not old_note or not old_note['doc']: return - self.set_note(conn, field_name, new_item_id, old_note['doc'], old_note['resource_hashes'], old_note['searchable_text']) + self.set_note(conn, field_name, new_item_id, old_note['doc'], old_note['resource_ids'], old_note['searchable_text']) def trim_retired_dir(self): mpath_map = {} @@ -189,7 +193,7 @@ class Notes: for path in items[:extra]: remove_with_retry(path) - def add_resource(self, path_or_stream_or_data): + def add_resource(self, conn, path_or_stream_or_data, name): if isinstance(path_or_stream_or_data, bytes): data = path_or_stream_or_data elif isinstance(path_or_stream_or_data, str): @@ -198,7 +202,7 @@ class Notes: else: data = f.read() resource_hash = hash_data(data) - path = self.path_for_resource(resource_hash) + path = self.path_for_resource(conn, resource_hash) path = make_long_path_useable(path) exists = False try: @@ -207,21 +211,39 @@ class Notes: pass else: exists = s.st_size == len(data) - if exists: - return resource_hash + if not exists: + try: + f = open(path, 'wb') + except FileNotFoundError: + os.makedirs(os.path.dirname(path), exist_ok=True) + f = open(path, 'wb') + with f: + f.write(data) + base_name, ext = os.path.splitext(name) + c = 0 + for (resource_id, existing_name) in conn.execute('SELECT id,name FROM notes_db.resources WHERE hash=?', (resource_hash,)): + if existing_name != name: + while True: + try: + conn.execute('UPDATE notes_db.resources SET name=? WHERE id=?', (name, resource_id)) + break + except apsw.ConstraintError: + c += 1 + name = f'{base_name}-{c}{ext}' + break + else: + while True: + try: + resource_id = conn.get('INSERT INTO notes_db.resources (hash,name) VALUES (?,?) RETURNING id', (resource_hash, name), all=False) + break + except apsw.ConstraintError: + c += 1 + name = f'{base_name}-{c}{ext}' + return resource_id - try: - f = open(path, 'wb') - except FileNotFoundError: - os.makedirs(os.path.dirname(path), exist_ok=True) - f = open(path, 'wb') - with f: - f.write(data) - return resource_hash - - def get_resource(self, resource_hash) -> bytes: - path = self.path_for_resource(resource_hash) - path = make_long_path_useable(path) - with suppress(FileNotFoundError), open(path, 'rb') as f: - return f.read() - return b'' + def get_resource_data(self, conn, resource_id) -> Optional[dict]: + for (name, resource_hash) in conn.execute('SELECT name,hash FROM notes_db.resources WHERE id=?', (resource_id,)): + path = self.path_for_resource(conn, resource_hash) + path = make_long_path_useable(path) + with suppress(FileNotFoundError), open(path, 'rb') as f: + return {'name': name, 'data': f.read(), 'hash': resource_hash} diff --git a/src/calibre/db/tests/notes.py b/src/calibre/db/tests/notes.py index c76435b004..0a8460bcb8 100644 --- a/src/calibre/db/tests/notes.py +++ b/src/calibre/db/tests/notes.py @@ -22,18 +22,20 @@ class NotesTest(BaseTest): authors = sorted(cache.all_field_ids('authors')) self.ae(cache.notes_for('authors', authors[0]), '') doc = 'simple notes for an author' - h1 = cache.add_notes_resource(b'resource1') - h2 = cache.add_notes_resource(b'resource2') - note_id = cache.set_notes_for('authors', authors[0], doc, resource_hashes=(h1, h2)) + h1 = cache.add_notes_resource(b'resource1', 'r1.jpg') + h2 = cache.add_notes_resource(b'resource2', 'r1.jpg') + self.ae(cache.get_notes_resource(h1)['name'], 'r1.jpg') + self.ae(cache.get_notes_resource(h2)['name'], 'r1-1.jpg') + note_id = cache.set_notes_for('authors', authors[0], doc, resource_ids=(h1, h2)) self.ae(cache.notes_for('authors', authors[0]), doc) self.ae(cache.notes_resources_used_by('authors', authors[0]), frozenset({h1, h2})) - self.ae(cache.get_notes_resource(h1), b'resource1') - self.ae(cache.get_notes_resource(h2), b'resource2') + self.ae(cache.get_notes_resource(h1)['data'], b'resource1') + self.ae(cache.get_notes_resource(h2)['data'], b'resource2') doc2 = 'a different note to replace the first one' - self.ae(note_id, cache.set_notes_for('authors', authors[0], doc2, resource_hashes=(h1,))) + self.ae(note_id, cache.set_notes_for('authors', authors[0], doc2, resource_ids=(h1,))) self.ae(cache.notes_for('authors', authors[0]), doc2) self.ae(cache.notes_resources_used_by('authors', authors[0]), frozenset({h1})) - self.ae(cache.get_notes_resource(h1), b'resource1') - self.ae(cache.get_notes_resource(h2), b'') - self.assertTrue(os.path.exists(notes.path_for_resource(h1))) - self.assertFalse(os.path.exists(notes.path_for_resource(h2))) + self.ae(cache.get_notes_resource(h1)['data'], b'resource1') + self.ae(cache.get_notes_resource(h2), None) + self.assertTrue(os.path.exists(notes.path_for_resource(cache.backend.conn, h1))) + self.assertFalse(os.path.exists(notes.path_for_resource(cache.backend.conn, h2)))