Store a name for every resource

This commit is contained in:
Kovid Goyal 2023-08-14 09:52:27 +05:30
parent 0b18f30392
commit 35b7bd3fe8
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
5 changed files with 101 additions and 66 deletions

View File

@ -6,10 +6,17 @@ CREATE TABLE notes_db.notes ( id INTEGER PRIMARY KEY,
UNIQUE(item, colname)
);
CREATE TABLE notes_db.resources ( id INTEGER PRIMARY KEY,
hash TEXT NOT NULL UNIQUE ON CONFLICT FAIL,
name TEXT NOT NULL UNIQUE ON CONFLICT FAIL
);
CREATE TABLE notes_db.notes_resources_link ( id INTEGER PRIMARY KEY,
note INTEGER NOT NULL,
hash TEXT NOT NULL,
UNIQUE(note, hash)
resource INTEGER NOT NULL,
FOREIGN KEY(note) REFERENCES notes(id),
FOREIGN KEY(resource) REFERENCES resources(id),
UNIQUE(note, resource)
);
CREATE VIRTUAL TABLE notes_db.notes_fts USING fts5(searchable_text, content = 'notes', content_rowid = 'id', tokenize = 'calibre remove_diacritics 2');
@ -21,7 +28,7 @@ BEGIN
INSERT INTO notes_fts_stemmed(rowid, searchable_text) VALUES (NEW.id, NEW.searchable_text);
END;
CREATE TRIGGER notes_db.notes_db_notes_delete_trg AFTER DELETE ON notes_db.notes
CREATE TRIGGER notes_db.notes_db_notes_delete_trg BEFORE DELETE ON notes_db.notes
BEGIN
DELETE FROM notes_resources_link WHERE note=OLD.id;
INSERT INTO notes_fts(notes_fts, rowid, searchable_text) VALUES('delete', OLD.id, OLD.searchable_text);
@ -36,5 +43,9 @@ BEGIN
INSERT INTO notes_fts_stemmed(rowid, searchable_text) VALUES (NEW.id, NEW.searchable_text);
END;
CREATE TRIGGER notes_db.notes_db_resources_delete_trg BEFORE DELETE ON notes_db.resources
BEGIN
DELETE FROM notes_resources_link WHERE resource=OLD.id;
END;
PRAGMA notes_db.user_version=1;

View File

@ -17,6 +17,7 @@ import sys
import time
import uuid
from contextlib import closing, suppress
from typing import Optional
from functools import partial
from calibre import as_unicode, force_unicode, isbytestring, prints
@ -339,10 +340,9 @@ class Connection(apsw.Connection): # {{{
self.fts_dbpath = self.notes_dbpath = None
self.setbusytimeout(self.BUSY_TIMEOUT)
self.execute('pragma cache_size=-5000')
self.execute('pragma temp_store=2')
self.execute('PRAGMA cache_size=-5000; PRAGMA temp_store=2; PRAGMA foreign_keys=ON;')
encoding = next(self.execute('pragma encoding'))[0]
encoding = next(self.execute('PRAGMA encoding'))[0]
self.createcollation('PYNOCASE', partial(pynocase,
encoding=encoding))
@ -968,14 +968,14 @@ class DB:
def notes_for(self, field_name, item_id):
return self.notes.get_note(self.conn, field_name, item_id) or ''
def set_notes_for(self, field, item_id, doc: str, searchable_text: str, resource_hashes) -> int:
return self.notes.set_note(self.conn, field, item_id, doc, resource_hashes, searchable_text)
def set_notes_for(self, field, item_id, doc: str, searchable_text: str, resource_ids) -> int:
return self.notes.set_note(self.conn, field, item_id, doc, resource_ids, searchable_text)
def add_notes_resource(self, path_or_stream) -> str:
return self.notes.add_resource(path_or_stream)
def add_notes_resource(self, path_or_stream, name) -> int:
return self.notes.add_resource(self.conn, path_or_stream, name)
def get_notes_resource(self, resource_hash) -> bytes:
return self.notes.get_resource(resource_hash)
def get_notes_resource(self, resource_id) -> Optional[dict]:
return self.notes.get_resource_data(self.conn, resource_id)
def notes_resources_used_by(self, field, item_id):
conn = self.conn
@ -1354,11 +1354,11 @@ class DB:
@property
def user_version(self):
'''The user version of this database'''
return self.conn.get('pragma user_version;', all=False)
return self.conn.get('PRAGMA user_version;', all=False)
@user_version.setter
def user_version(self, val):
self.execute('pragma user_version=%d'%int(val))
self.execute('PRAGMA user_version=%d'%int(val))
def initialize_database(self):
metadata_sqlite = P('metadata_sqlite.sql', data=True,

View File

@ -20,7 +20,7 @@ from io import DEFAULT_BUFFER_SIZE, BytesIO
from queue import Queue
from threading import Lock
from time import monotonic, sleep, time
from typing import NamedTuple, Tuple
from typing import NamedTuple, Tuple, Optional
from calibre import as_unicode, detect_ncpus, isbytestring
from calibre.constants import iswindows, preferred_encoding
@ -677,14 +677,14 @@ class Cache:
def notes_for(self, field, item_id) -> str:
return self.backend.notes_for(field, item_id)
def set_notes_for(self, field, item_id, doc: str, searchable_text: str = copy_marked_up_text, resource_hashes=()) -> int:
return self.backend.set_notes_for(field, item_id, doc, searchable_text, resource_hashes)
def set_notes_for(self, field, item_id, doc: str, searchable_text: str = copy_marked_up_text, resource_ids=()) -> int:
return self.backend.set_notes_for(field, item_id, doc, searchable_text, resource_ids)
def add_notes_resource(self, path_or_stream_or_data) -> str:
return self.backend.add_notes_resource(path_or_stream_or_data)
def add_notes_resource(self, path_or_stream_or_data, name: str) -> int:
return self.backend.add_notes_resource(path_or_stream_or_data, name)
def get_notes_resource(self, resource_hash) -> bytes:
return self.backend.get_notes_resource(resource_hash)
def get_notes_resource(self, resource_id) -> Optional[dict]:
return self.backend.get_notes_resource(resource_id)
def notes_resources_used_by(self, field, item_id):
return frozenset(self.backend.notes_resources_used_by(field, item_id))

View File

@ -1,9 +1,11 @@
#!/usr/bin/env python
# License: GPLv3 Copyright: 2023, Kovid Goyal <kovid at kovidgoyal.net>
import apsw
import os
import time
import xxhash
from typing import Union, Optional
from contextlib import suppress
from itertools import repeat
@ -76,7 +78,11 @@ class Notes:
SchemaUpgrade(conn, '\n'.join(triggers))
conn.notes_dbpath = dbpath
def path_for_resource(self, resource_hash: str) -> str:
def path_for_resource(self, conn, resource_hash_or_resource_id: Union[str,int]) -> str:
if isinstance(resource_hash_or_resource_id, str):
resource_hash = resource_hash_or_resource_id
else:
resource_hash = conn.get('SELECT hash FROM notes_db.resources WHERE id=?', (resource_hash_or_resource_id,), all=False)
idx = resource_hash.index(':')
prefix = resource_hash[idx + 1: idx + 3]
return os.path.join(self.resources_dir, prefix, resource_hash)
@ -86,22 +92,21 @@ class Notes:
resources_to_potentially_remove = tuple(resources_to_potentially_remove)
if delete_from_link_table:
conn.executemany('''
DELETE FROM notes_db.notes_resources_link WHERE note=? AND hash=?
DELETE FROM notes_db.notes_resources_link WHERE note=? AND resource=?
''', tuple((note_id, x) for x in resources_to_potentially_remove))
stmt = '''
WITH resources_table(value) AS (VALUES {})
SELECT value FROM resources_table WHERE value NOT IN (SELECT hash FROM notes_db.notes_resources_link)
SELECT value FROM resources_table WHERE value NOT IN (SELECT resource FROM notes_db.notes_resources_link)
'''.format(','.join(repeat('(?)', len(resources_to_potentially_remove))))
for (x,) in conn.execute(stmt, resources_to_potentially_remove):
remove_with_retry(self.path_for_resource(x))
remove_with_retry(self.path_for_resource(conn, x))
def note_id_for(self, conn, field_name, item_id):
for (ans,) in conn.execute('SELECT id FROM notes_db.notes WHERE item=? AND colname=?', (item_id, field_name)):
return ans
return conn.get('SELECT id FROM notes_db.notes WHERE item=? AND colname=?', (item_id, field_name), all=False)
def resources_used_by(self, conn, note_id):
if note_id is not None:
for (h,) in conn.execute('SELECT hash from notes_db.notes_resources_link WHERE note=?', (note_id,)):
for (h,) in conn.execute('SELECT resource from notes_db.notes_resources_link WHERE note=?', (note_id,)):
yield h
def set_backup_for(self, field_name, item_id, marked_up_text='', searchable_text=''):
@ -122,7 +127,7 @@ class Notes:
os.replace(path, dest)
self.trim_retired_dir()
def set_note(self, conn, field_name, item_id, marked_up_text='', hashes_of_used_resources=(), searchable_text=copy_marked_up_text):
def set_note(self, conn, field_name, item_id, marked_up_text='', used_resource_ids=(), searchable_text=copy_marked_up_text):
if searchable_text is copy_marked_up_text:
searchable_text = marked_up_text
note_id = self.note_id_for(conn, field_name, item_id)
@ -134,27 +139,26 @@ class Notes:
if old_resources:
self.remove_resources(conn, note_id, old_resources, delete_from_link_table=False)
return
new_resources = frozenset(hashes_of_used_resources)
new_resources = frozenset(used_resource_ids)
resources_to_potentially_remove = old_resources - new_resources
resources_to_add = new_resources - old_resources
if note_id is None:
note_id, = next(conn.execute('''
note_id = conn.get('''
INSERT INTO notes_db.notes (item,colname,doc,searchable_text) VALUES (?,?,?,?) RETURNING id;
''', (item_id, field_name, marked_up_text, searchable_text)))
''', (item_id, field_name, marked_up_text, searchable_text), all=False)
else:
conn.execute('UPDATE notes_db.notes SET doc=?,searchable_text=?', (marked_up_text, searchable_text))
if resources_to_potentially_remove:
self.remove_resources(conn, note_id, resources_to_potentially_remove)
if resources_to_add:
conn.executemany('''
INSERT INTO notes_db.notes_resources_link (note,hash) VALUES (?,?);
INSERT INTO notes_db.notes_resources_link (note,resource) VALUES (?,?);
''', tuple((note_id, x) for x in resources_to_add))
self.set_backup_for(field_name, item_id, marked_up_text, searchable_text)
return note_id
def get_note(self, conn, field_name, item_id):
for (doc,) in conn.execute('SELECT doc FROM notes_db.notes WHERE item=? AND colname=?', (item_id, field_name)):
return doc
return conn.get('SELECT doc FROM notes_db.notes WHERE item=? AND colname=?', (item_id, field_name), all=False)
def get_note_data(self, conn, field_name, item_id):
for (note_id, doc, searchable_text) in conn.execute(
@ -162,7 +166,7 @@ class Notes:
):
return {
'id': note_id, 'doc': doc, 'searchable_text': searchable_text,
'resource_hashes': frozenset(self.resources_used_by(conn, note_id)),
'resource_ids': frozenset(self.resources_used_by(conn, note_id)),
}
def rename_note(self, conn, field_name, old_item_id, new_item_id):
@ -175,7 +179,7 @@ class Notes:
old_note = self.get_note_data(conn, field_name, old_item_id)
if not old_note or not old_note['doc']:
return
self.set_note(conn, field_name, new_item_id, old_note['doc'], old_note['resource_hashes'], old_note['searchable_text'])
self.set_note(conn, field_name, new_item_id, old_note['doc'], old_note['resource_ids'], old_note['searchable_text'])
def trim_retired_dir(self):
mpath_map = {}
@ -189,7 +193,7 @@ class Notes:
for path in items[:extra]:
remove_with_retry(path)
def add_resource(self, path_or_stream_or_data):
def add_resource(self, conn, path_or_stream_or_data, name):
if isinstance(path_or_stream_or_data, bytes):
data = path_or_stream_or_data
elif isinstance(path_or_stream_or_data, str):
@ -198,7 +202,7 @@ class Notes:
else:
data = f.read()
resource_hash = hash_data(data)
path = self.path_for_resource(resource_hash)
path = self.path_for_resource(conn, resource_hash)
path = make_long_path_useable(path)
exists = False
try:
@ -207,21 +211,39 @@ class Notes:
pass
else:
exists = s.st_size == len(data)
if exists:
return resource_hash
if not exists:
try:
f = open(path, 'wb')
except FileNotFoundError:
os.makedirs(os.path.dirname(path), exist_ok=True)
f = open(path, 'wb')
with f:
f.write(data)
base_name, ext = os.path.splitext(name)
c = 0
for (resource_id, existing_name) in conn.execute('SELECT id,name FROM notes_db.resources WHERE hash=?', (resource_hash,)):
if existing_name != name:
while True:
try:
conn.execute('UPDATE notes_db.resources SET name=? WHERE id=?', (name, resource_id))
break
except apsw.ConstraintError:
c += 1
name = f'{base_name}-{c}{ext}'
break
else:
while True:
try:
resource_id = conn.get('INSERT INTO notes_db.resources (hash,name) VALUES (?,?) RETURNING id', (resource_hash, name), all=False)
break
except apsw.ConstraintError:
c += 1
name = f'{base_name}-{c}{ext}'
return resource_id
try:
f = open(path, 'wb')
except FileNotFoundError:
os.makedirs(os.path.dirname(path), exist_ok=True)
f = open(path, 'wb')
with f:
f.write(data)
return resource_hash
def get_resource(self, resource_hash) -> bytes:
path = self.path_for_resource(resource_hash)
path = make_long_path_useable(path)
with suppress(FileNotFoundError), open(path, 'rb') as f:
return f.read()
return b''
def get_resource_data(self, conn, resource_id) -> Optional[dict]:
for (name, resource_hash) in conn.execute('SELECT name,hash FROM notes_db.resources WHERE id=?', (resource_id,)):
path = self.path_for_resource(conn, resource_hash)
path = make_long_path_useable(path)
with suppress(FileNotFoundError), open(path, 'rb') as f:
return {'name': name, 'data': f.read(), 'hash': resource_hash}

View File

@ -22,18 +22,20 @@ class NotesTest(BaseTest):
authors = sorted(cache.all_field_ids('authors'))
self.ae(cache.notes_for('authors', authors[0]), '')
doc = 'simple notes for an author'
h1 = cache.add_notes_resource(b'resource1')
h2 = cache.add_notes_resource(b'resource2')
note_id = cache.set_notes_for('authors', authors[0], doc, resource_hashes=(h1, h2))
h1 = cache.add_notes_resource(b'resource1', 'r1.jpg')
h2 = cache.add_notes_resource(b'resource2', 'r1.jpg')
self.ae(cache.get_notes_resource(h1)['name'], 'r1.jpg')
self.ae(cache.get_notes_resource(h2)['name'], 'r1-1.jpg')
note_id = cache.set_notes_for('authors', authors[0], doc, resource_ids=(h1, h2))
self.ae(cache.notes_for('authors', authors[0]), doc)
self.ae(cache.notes_resources_used_by('authors', authors[0]), frozenset({h1, h2}))
self.ae(cache.get_notes_resource(h1), b'resource1')
self.ae(cache.get_notes_resource(h2), b'resource2')
self.ae(cache.get_notes_resource(h1)['data'], b'resource1')
self.ae(cache.get_notes_resource(h2)['data'], b'resource2')
doc2 = 'a different note to replace the first one'
self.ae(note_id, cache.set_notes_for('authors', authors[0], doc2, resource_hashes=(h1,)))
self.ae(note_id, cache.set_notes_for('authors', authors[0], doc2, resource_ids=(h1,)))
self.ae(cache.notes_for('authors', authors[0]), doc2)
self.ae(cache.notes_resources_used_by('authors', authors[0]), frozenset({h1}))
self.ae(cache.get_notes_resource(h1), b'resource1')
self.ae(cache.get_notes_resource(h2), b'')
self.assertTrue(os.path.exists(notes.path_for_resource(h1)))
self.assertFalse(os.path.exists(notes.path_for_resource(h2)))
self.ae(cache.get_notes_resource(h1)['data'], b'resource1')
self.ae(cache.get_notes_resource(h2), None)
self.assertTrue(os.path.exists(notes.path_for_resource(cache.backend.conn, h1)))
self.assertFalse(os.path.exists(notes.path_for_resource(cache.backend.conn, h2)))