mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Use hashes as primary key for resources
This will allow rebuild of database as the backup documents will refer to resource hashes not ids (its very difficult to restore resources with ids unchanged). Also this allows docs to be migrated between databases easily.
This commit is contained in:
parent
713f1af61e
commit
6017bafe48
@ -6,16 +6,16 @@ CREATE TABLE notes_db.notes ( id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
UNIQUE(item, colname)
|
||||
);
|
||||
|
||||
CREATE TABLE notes_db.resources ( id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
hash TEXT NOT NULL UNIQUE ON CONFLICT FAIL,
|
||||
CREATE TABLE notes_db.resources (
|
||||
hash TEXT NOT NULL PRIMARY KEY ON CONFLICT FAIL,
|
||||
name TEXT NOT NULL UNIQUE ON CONFLICT FAIL
|
||||
);
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE TABLE notes_db.notes_resources_link ( id INTEGER PRIMARY KEY,
|
||||
note INTEGER NOT NULL,
|
||||
resource INTEGER NOT NULL,
|
||||
resource TEXT NOT NULL,
|
||||
FOREIGN KEY(note) REFERENCES notes(id),
|
||||
FOREIGN KEY(resource) REFERENCES resources(id),
|
||||
FOREIGN KEY(resource) REFERENCES resources(hash),
|
||||
UNIQUE(note, resource)
|
||||
);
|
||||
|
||||
@ -45,7 +45,7 @@ END;
|
||||
|
||||
CREATE TRIGGER notes_db.notes_db_resources_delete_trg BEFORE DELETE ON notes_db.resources
|
||||
BEGIN
|
||||
DELETE FROM notes_resources_link WHERE resource=OLD.id;
|
||||
DELETE FROM notes_resources_link WHERE resource=OLD.hash;
|
||||
END;
|
||||
|
||||
PRAGMA notes_db.user_version=1;
|
||||
|
@ -972,9 +972,9 @@ class DB:
|
||||
def notes_for(self, field_name, item_id):
|
||||
return self.notes.get_note(self.conn, field_name, item_id) or ''
|
||||
|
||||
def set_notes_for(self, field, item_id, doc: str, searchable_text: str, resource_ids) -> int:
|
||||
def set_notes_for(self, field, item_id, doc: str, searchable_text: str, resource_hashes) -> int:
|
||||
id_val = self.tables[field].id_map[item_id]
|
||||
return self.notes.set_note(self.conn, field, item_id, id_val, doc, resource_ids, searchable_text)
|
||||
return self.notes.set_note(self.conn, field, item_id, id_val, doc, resource_hashes, searchable_text)
|
||||
|
||||
def unretire_note_for(self, field, item_id) -> int:
|
||||
id_val = self.tables[field].id_map[item_id]
|
||||
@ -983,8 +983,8 @@ class DB:
|
||||
def add_notes_resource(self, path_or_stream, name) -> int:
|
||||
return self.notes.add_resource(self.conn, path_or_stream, name)
|
||||
|
||||
def get_notes_resource(self, resource_id) -> Optional[dict]:
|
||||
return self.notes.get_resource_data(self.conn, resource_id)
|
||||
def get_notes_resource(self, resource_hash) -> Optional[dict]:
|
||||
return self.notes.get_resource_data(self.conn, resource_hash)
|
||||
|
||||
def notes_resources_used_by(self, field, item_id):
|
||||
conn = self.conn
|
||||
|
@ -681,16 +681,16 @@ class Cache:
|
||||
return self.backend.notes_for(field, item_id)
|
||||
|
||||
@write_api
|
||||
def set_notes_for(self, field, item_id, doc: str, searchable_text: str = copy_marked_up_text, resource_ids=()) -> int:
|
||||
return self.backend.set_notes_for(field, item_id, doc, searchable_text, resource_ids)
|
||||
def set_notes_for(self, field, item_id, doc: str, searchable_text: str = copy_marked_up_text, resource_hashes=()) -> int:
|
||||
return self.backend.set_notes_for(field, item_id, doc, searchable_text, resource_hashes)
|
||||
|
||||
@write_api
|
||||
def add_notes_resource(self, path_or_stream_or_data, name: str) -> int:
|
||||
return self.backend.add_notes_resource(path_or_stream_or_data, name)
|
||||
|
||||
@read_api
|
||||
def get_notes_resource(self, resource_id) -> Optional[dict]:
|
||||
return self.backend.get_notes_resource(resource_id)
|
||||
def get_notes_resource(self, resource_hash) -> Optional[dict]:
|
||||
return self.backend.get_notes_resource(resource_hash)
|
||||
|
||||
@read_api
|
||||
def notes_resources_used_by(self, field, item_id):
|
||||
|
@ -8,7 +8,7 @@ import time
|
||||
import xxhash
|
||||
from contextlib import suppress
|
||||
from itertools import count, repeat
|
||||
from typing import Optional, Union
|
||||
from typing import Optional
|
||||
|
||||
from calibre.constants import iswindows
|
||||
from calibre.db import FTSQueryError
|
||||
@ -103,15 +103,14 @@ class Notes:
|
||||
self.remove_unreferenced_resources(conn)
|
||||
|
||||
def remove_unreferenced_resources(self, conn):
|
||||
for (rhash,) in conn.get('SELECT hash FROM notes_db.resources WHERE id NOT IN (SELECT resource FROM notes_db.notes_resources_link)'):
|
||||
found = False
|
||||
for (rhash,) in conn.get('SELECT hash FROM notes_db.resources WHERE hash NOT IN (SELECT resource FROM notes_db.notes_resources_link)'):
|
||||
found = True
|
||||
remove_with_retry(self.path_for_resource(conn, rhash))
|
||||
conn.execute('DELETE FROM notes_db.resources WHERE id NOT IN (SELECT resource FROM notes_db.notes_resources_link)')
|
||||
if found:
|
||||
conn.execute('DELETE FROM notes_db.resources WHERE hash NOT IN (SELECT resource FROM notes_db.notes_resources_link)')
|
||||
|
||||
def path_for_resource(self, conn, resource_hash_or_resource_id: Union[str,int]) -> str:
|
||||
if isinstance(resource_hash_or_resource_id, str):
|
||||
resource_hash = resource_hash_or_resource_id
|
||||
else:
|
||||
resource_hash = conn.get('SELECT hash FROM notes_db.resources WHERE id=?', (resource_hash_or_resource_id,), all=False)
|
||||
def path_for_resource(self, conn, resource_hash: str) -> str:
|
||||
hashalg, digest = resource_hash.split(':', 1)
|
||||
prefix = digest[:2]
|
||||
# Cant use colons in filenames on windows safely
|
||||
@ -188,7 +187,7 @@ class Notes:
|
||||
remove_with_retry(srcdir, is_dir=True)
|
||||
return note_id
|
||||
|
||||
def set_note(self, conn, field_name, item_id, item_value, marked_up_text='', used_resource_ids=(), searchable_text=copy_marked_up_text):
|
||||
def set_note(self, conn, field_name, item_id, item_value, marked_up_text='', used_resource_hashes=(), searchable_text=copy_marked_up_text):
|
||||
if searchable_text is copy_marked_up_text:
|
||||
searchable_text = marked_up_text
|
||||
note_id = self.note_id_for(conn, field_name, item_id)
|
||||
@ -199,13 +198,13 @@ class Notes:
|
||||
resources = ()
|
||||
if old_resources:
|
||||
resources = conn.get(
|
||||
'SELECT hash,name FROM notes_db.resources WHERE id IN ({})'.format(','.join(repeat('?', len(old_resources)))),
|
||||
'SELECT hash,name FROM notes_db.resources WHERE hash IN ({})'.format(','.join(repeat('?', len(old_resources)))),
|
||||
tuple(old_resources))
|
||||
self.retire_entry(field_name, item_id, item_value, resources, note_id)
|
||||
if old_resources:
|
||||
self.remove_resources(conn, note_id, old_resources, delete_from_link_table=False)
|
||||
return -1
|
||||
new_resources = frozenset(used_resource_ids)
|
||||
new_resources = frozenset(used_resource_hashes)
|
||||
resources_to_potentially_remove = old_resources - new_resources
|
||||
resources_to_add = new_resources - old_resources
|
||||
if note_id is None:
|
||||
@ -232,7 +231,7 @@ class Notes:
|
||||
):
|
||||
return {
|
||||
'id': note_id, 'doc': doc, 'searchable_text': searchable_text,
|
||||
'resource_ids': frozenset(self.resources_used_by(conn, note_id)),
|
||||
'resource_hashes': frozenset(self.resources_used_by(conn, note_id)),
|
||||
}
|
||||
|
||||
def rename_note(self, conn, field_name, old_item_id, new_item_id, new_item_value):
|
||||
@ -245,7 +244,7 @@ class Notes:
|
||||
old_note = self.get_note_data(conn, field_name, old_item_id)
|
||||
if not old_note or not old_note['doc']:
|
||||
return
|
||||
self.set_note(conn, field_name, new_item_id, new_item_value, old_note['doc'], old_note['resource_ids'], old_note['searchable_text'])
|
||||
self.set_note(conn, field_name, new_item_id, new_item_value, old_note['doc'], old_note['resource_hashes'], old_note['searchable_text'])
|
||||
|
||||
def trim_retired_dir(self):
|
||||
items = []
|
||||
@ -290,11 +289,11 @@ class Notes:
|
||||
f.write(data)
|
||||
base_name, ext = os.path.splitext(name)
|
||||
c = 0
|
||||
for (resource_id, existing_name) in conn.execute('SELECT id,name FROM notes_db.resources WHERE hash=?', (resource_hash,)):
|
||||
for (existing_name,) in conn.execute('SELECT name FROM notes_db.resources WHERE hash=?', (resource_hash,)):
|
||||
if existing_name != name and update_name:
|
||||
while True:
|
||||
try:
|
||||
conn.execute('UPDATE notes_db.resources SET name=? WHERE id=?', (name, resource_id))
|
||||
conn.execute('UPDATE notes_db.resources SET name=? WHERE hash=?', (name, resource_hash))
|
||||
break
|
||||
except apsw.ConstraintError:
|
||||
c += 1
|
||||
@ -303,15 +302,15 @@ class Notes:
|
||||
else:
|
||||
while True:
|
||||
try:
|
||||
resource_id = conn.get('INSERT INTO notes_db.resources (hash,name) VALUES (?,?) RETURNING id', (resource_hash, name), all=False)
|
||||
conn.get('INSERT INTO notes_db.resources (hash,name) VALUES (?,?)', (resource_hash, name), all=False)
|
||||
break
|
||||
except apsw.ConstraintError:
|
||||
c += 1
|
||||
name = f'{base_name}-{c}{ext}'
|
||||
return resource_id
|
||||
return resource_hash
|
||||
|
||||
def get_resource_data(self, conn, resource_id) -> Optional[dict]:
|
||||
for (name, resource_hash) in conn.execute('SELECT name,hash FROM notes_db.resources WHERE id=?', (resource_id,)):
|
||||
def get_resource_data(self, conn, resource_hash) -> Optional[dict]:
|
||||
for (name,) in conn.execute('SELECT name FROM notes_db.resources WHERE hash=?', (resource_hash,)):
|
||||
path = self.path_for_resource(conn, resource_hash)
|
||||
path = make_long_path_useable(path)
|
||||
os.listdir(os.path.dirname(path))
|
||||
|
@ -266,7 +266,7 @@ class FilesystemTest(BaseTest):
|
||||
self.assertEqual('recurse', open(os.path.join(bookdir, 'sub', 'recurse')).read())
|
||||
r1 = cache.add_notes_resource(b'res1', 'res.jpg')
|
||||
r2 = cache.add_notes_resource(b'res2', 'res.jpg')
|
||||
cache.set_notes_for('authors', 2, 'some notes', resource_ids=(r1, r2))
|
||||
cache.set_notes_for('authors', 2, 'some notes', resource_hashes=(r1, r2))
|
||||
cache.add_format(1, 'TXT', BytesIO(b'testing exim'))
|
||||
cache.fts_indexing_sleep_time = 0.001
|
||||
cache.enable_fts()
|
||||
|
@ -16,13 +16,13 @@ def test_notes_api(self: 'NotesTest'):
|
||||
h2 = cache.add_notes_resource(b'resource2', 'r1.jpg')
|
||||
self.ae(cache.get_notes_resource(h1)['name'], 'r1.jpg')
|
||||
self.ae(cache.get_notes_resource(h2)['name'], 'r1-1.jpg')
|
||||
note_id = cache.set_notes_for('authors', authors[0], doc, resource_ids=(h1, h2))
|
||||
note_id = cache.set_notes_for('authors', authors[0], doc, resource_hashes=(h1, h2))
|
||||
self.ae(cache.notes_for('authors', authors[0]), doc)
|
||||
self.ae(cache.notes_resources_used_by('authors', authors[0]), frozenset({h1, h2}))
|
||||
self.ae(cache.get_notes_resource(h1)['data'], b'resource1')
|
||||
self.ae(cache.get_notes_resource(h2)['data'], b'resource2')
|
||||
doc2 = 'a different note to replace the first one'
|
||||
self.ae(note_id, cache.set_notes_for('authors', authors[0], doc2, resource_ids=(h1,)))
|
||||
self.ae(note_id, cache.set_notes_for('authors', authors[0], doc2, resource_hashes=(h1,)))
|
||||
self.ae(cache.notes_for('authors', authors[0]), doc2)
|
||||
self.ae(cache.notes_resources_used_by('authors', authors[0]), frozenset({h1}))
|
||||
self.ae(cache.get_notes_resource(h1)['data'], b'resource1')
|
||||
@ -32,7 +32,7 @@ def test_notes_api(self: 'NotesTest'):
|
||||
|
||||
# check retirement
|
||||
h2 = cache.add_notes_resource(b'resource2', 'r1.jpg')
|
||||
self.ae(note_id, cache.set_notes_for('authors', authors[0], doc2, resource_ids=(h1,h2)))
|
||||
self.ae(note_id, cache.set_notes_for('authors', authors[0], doc2, resource_hashes=(h1,h2)))
|
||||
self.ae(-1, cache.set_notes_for('authors', authors[0], ''))
|
||||
self.ae(cache.notes_for('authors', authors[0]), '')
|
||||
self.ae(cache.notes_resources_used_by('authors', authors[0]), frozenset())
|
||||
@ -41,7 +41,7 @@ def test_notes_api(self: 'NotesTest'):
|
||||
|
||||
h1 = cache.add_notes_resource(b'resource1', 'r1.jpg')
|
||||
h2 = cache.add_notes_resource(b'resource2', 'r1.jpg')
|
||||
nnote_id = cache.set_notes_for('authors', authors[1], doc, resource_ids=(h1, h2))
|
||||
nnote_id = cache.set_notes_for('authors', authors[1], doc, resource_hashes=(h1, h2))
|
||||
self.assertNotEqual(note_id, nnote_id)
|
||||
self.ae(-1, cache.set_notes_for('authors', authors[1], ''))
|
||||
after = os.listdir(notes.retired_dir)
|
||||
@ -63,7 +63,7 @@ def test_cache_api(self: 'NotesTest'):
|
||||
doc = 'simple notes for an author'
|
||||
h1 = cache.add_notes_resource(b'resource1', 'r1.jpg')
|
||||
h2 = cache.add_notes_resource(b'resource2', 'r1.jpg')
|
||||
cache.set_notes_for('authors', author_id, doc, resource_ids=(h1, h2))
|
||||
cache.set_notes_for('authors', author_id, doc, resource_hashes=(h1, h2))
|
||||
# test renaming to a new author preserves notes
|
||||
cache.rename_items('authors', {author_id: 'renamed author'})
|
||||
raid = cache.get_item_id('authors', 'renamed author')
|
||||
@ -92,7 +92,7 @@ def test_cache_api(self: 'NotesTest'):
|
||||
tag_id = cache.get_item_id('#tags', tags[0])
|
||||
h1 = cache.add_notes_resource(b'resource1t', 'r1.jpg')
|
||||
h2 = cache.add_notes_resource(b'resource2t', 'r1.jpg')
|
||||
cache.set_notes_for('#tags', tag_id, doc, resource_ids=(h1, h2))
|
||||
cache.set_notes_for('#tags', tag_id, doc, resource_hashes=(h1, h2))
|
||||
self.ae(cache.notes_for('#tags', tag_id), doc)
|
||||
cache.delete_custom_column('tags')
|
||||
cache.close()
|
||||
|
Loading…
x
Reference in New Issue
Block a user