mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Use hashes as primary key for resources
This will allow rebuild of database as the backup documents will refer to resource hashes not ids (its very difficult to restore resources with ids unchanged). Also this allows docs to be migrated between databases easily.
This commit is contained in:
parent
713f1af61e
commit
6017bafe48
@ -6,16 +6,16 @@ CREATE TABLE notes_db.notes ( id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|||||||
UNIQUE(item, colname)
|
UNIQUE(item, colname)
|
||||||
);
|
);
|
||||||
|
|
||||||
CREATE TABLE notes_db.resources ( id INTEGER PRIMARY KEY AUTOINCREMENT,
|
CREATE TABLE notes_db.resources (
|
||||||
hash TEXT NOT NULL UNIQUE ON CONFLICT FAIL,
|
hash TEXT NOT NULL PRIMARY KEY ON CONFLICT FAIL,
|
||||||
name TEXT NOT NULL UNIQUE ON CONFLICT FAIL
|
name TEXT NOT NULL UNIQUE ON CONFLICT FAIL
|
||||||
);
|
) WITHOUT ROWID;
|
||||||
|
|
||||||
CREATE TABLE notes_db.notes_resources_link ( id INTEGER PRIMARY KEY,
|
CREATE TABLE notes_db.notes_resources_link ( id INTEGER PRIMARY KEY,
|
||||||
note INTEGER NOT NULL,
|
note INTEGER NOT NULL,
|
||||||
resource INTEGER NOT NULL,
|
resource TEXT NOT NULL,
|
||||||
FOREIGN KEY(note) REFERENCES notes(id),
|
FOREIGN KEY(note) REFERENCES notes(id),
|
||||||
FOREIGN KEY(resource) REFERENCES resources(id),
|
FOREIGN KEY(resource) REFERENCES resources(hash),
|
||||||
UNIQUE(note, resource)
|
UNIQUE(note, resource)
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -45,7 +45,7 @@ END;
|
|||||||
|
|
||||||
CREATE TRIGGER notes_db.notes_db_resources_delete_trg BEFORE DELETE ON notes_db.resources
|
CREATE TRIGGER notes_db.notes_db_resources_delete_trg BEFORE DELETE ON notes_db.resources
|
||||||
BEGIN
|
BEGIN
|
||||||
DELETE FROM notes_resources_link WHERE resource=OLD.id;
|
DELETE FROM notes_resources_link WHERE resource=OLD.hash;
|
||||||
END;
|
END;
|
||||||
|
|
||||||
PRAGMA notes_db.user_version=1;
|
PRAGMA notes_db.user_version=1;
|
||||||
|
@ -972,9 +972,9 @@ class DB:
|
|||||||
def notes_for(self, field_name, item_id):
|
def notes_for(self, field_name, item_id):
|
||||||
return self.notes.get_note(self.conn, field_name, item_id) or ''
|
return self.notes.get_note(self.conn, field_name, item_id) or ''
|
||||||
|
|
||||||
def set_notes_for(self, field, item_id, doc: str, searchable_text: str, resource_ids) -> int:
|
def set_notes_for(self, field, item_id, doc: str, searchable_text: str, resource_hashes) -> int:
|
||||||
id_val = self.tables[field].id_map[item_id]
|
id_val = self.tables[field].id_map[item_id]
|
||||||
return self.notes.set_note(self.conn, field, item_id, id_val, doc, resource_ids, searchable_text)
|
return self.notes.set_note(self.conn, field, item_id, id_val, doc, resource_hashes, searchable_text)
|
||||||
|
|
||||||
def unretire_note_for(self, field, item_id) -> int:
|
def unretire_note_for(self, field, item_id) -> int:
|
||||||
id_val = self.tables[field].id_map[item_id]
|
id_val = self.tables[field].id_map[item_id]
|
||||||
@ -983,8 +983,8 @@ class DB:
|
|||||||
def add_notes_resource(self, path_or_stream, name) -> int:
|
def add_notes_resource(self, path_or_stream, name) -> int:
|
||||||
return self.notes.add_resource(self.conn, path_or_stream, name)
|
return self.notes.add_resource(self.conn, path_or_stream, name)
|
||||||
|
|
||||||
def get_notes_resource(self, resource_id) -> Optional[dict]:
|
def get_notes_resource(self, resource_hash) -> Optional[dict]:
|
||||||
return self.notes.get_resource_data(self.conn, resource_id)
|
return self.notes.get_resource_data(self.conn, resource_hash)
|
||||||
|
|
||||||
def notes_resources_used_by(self, field, item_id):
|
def notes_resources_used_by(self, field, item_id):
|
||||||
conn = self.conn
|
conn = self.conn
|
||||||
|
@ -681,16 +681,16 @@ class Cache:
|
|||||||
return self.backend.notes_for(field, item_id)
|
return self.backend.notes_for(field, item_id)
|
||||||
|
|
||||||
@write_api
|
@write_api
|
||||||
def set_notes_for(self, field, item_id, doc: str, searchable_text: str = copy_marked_up_text, resource_ids=()) -> int:
|
def set_notes_for(self, field, item_id, doc: str, searchable_text: str = copy_marked_up_text, resource_hashes=()) -> int:
|
||||||
return self.backend.set_notes_for(field, item_id, doc, searchable_text, resource_ids)
|
return self.backend.set_notes_for(field, item_id, doc, searchable_text, resource_hashes)
|
||||||
|
|
||||||
@write_api
|
@write_api
|
||||||
def add_notes_resource(self, path_or_stream_or_data, name: str) -> int:
|
def add_notes_resource(self, path_or_stream_or_data, name: str) -> int:
|
||||||
return self.backend.add_notes_resource(path_or_stream_or_data, name)
|
return self.backend.add_notes_resource(path_or_stream_or_data, name)
|
||||||
|
|
||||||
@read_api
|
@read_api
|
||||||
def get_notes_resource(self, resource_id) -> Optional[dict]:
|
def get_notes_resource(self, resource_hash) -> Optional[dict]:
|
||||||
return self.backend.get_notes_resource(resource_id)
|
return self.backend.get_notes_resource(resource_hash)
|
||||||
|
|
||||||
@read_api
|
@read_api
|
||||||
def notes_resources_used_by(self, field, item_id):
|
def notes_resources_used_by(self, field, item_id):
|
||||||
|
@ -8,7 +8,7 @@ import time
|
|||||||
import xxhash
|
import xxhash
|
||||||
from contextlib import suppress
|
from contextlib import suppress
|
||||||
from itertools import count, repeat
|
from itertools import count, repeat
|
||||||
from typing import Optional, Union
|
from typing import Optional
|
||||||
|
|
||||||
from calibre.constants import iswindows
|
from calibre.constants import iswindows
|
||||||
from calibre.db import FTSQueryError
|
from calibre.db import FTSQueryError
|
||||||
@ -103,15 +103,14 @@ class Notes:
|
|||||||
self.remove_unreferenced_resources(conn)
|
self.remove_unreferenced_resources(conn)
|
||||||
|
|
||||||
def remove_unreferenced_resources(self, conn):
|
def remove_unreferenced_resources(self, conn):
|
||||||
for (rhash,) in conn.get('SELECT hash FROM notes_db.resources WHERE id NOT IN (SELECT resource FROM notes_db.notes_resources_link)'):
|
found = False
|
||||||
|
for (rhash,) in conn.get('SELECT hash FROM notes_db.resources WHERE hash NOT IN (SELECT resource FROM notes_db.notes_resources_link)'):
|
||||||
|
found = True
|
||||||
remove_with_retry(self.path_for_resource(conn, rhash))
|
remove_with_retry(self.path_for_resource(conn, rhash))
|
||||||
conn.execute('DELETE FROM notes_db.resources WHERE id NOT IN (SELECT resource FROM notes_db.notes_resources_link)')
|
if found:
|
||||||
|
conn.execute('DELETE FROM notes_db.resources WHERE hash NOT IN (SELECT resource FROM notes_db.notes_resources_link)')
|
||||||
|
|
||||||
def path_for_resource(self, conn, resource_hash_or_resource_id: Union[str,int]) -> str:
|
def path_for_resource(self, conn, resource_hash: str) -> str:
|
||||||
if isinstance(resource_hash_or_resource_id, str):
|
|
||||||
resource_hash = resource_hash_or_resource_id
|
|
||||||
else:
|
|
||||||
resource_hash = conn.get('SELECT hash FROM notes_db.resources WHERE id=?', (resource_hash_or_resource_id,), all=False)
|
|
||||||
hashalg, digest = resource_hash.split(':', 1)
|
hashalg, digest = resource_hash.split(':', 1)
|
||||||
prefix = digest[:2]
|
prefix = digest[:2]
|
||||||
# Cant use colons in filenames on windows safely
|
# Cant use colons in filenames on windows safely
|
||||||
@ -188,7 +187,7 @@ class Notes:
|
|||||||
remove_with_retry(srcdir, is_dir=True)
|
remove_with_retry(srcdir, is_dir=True)
|
||||||
return note_id
|
return note_id
|
||||||
|
|
||||||
def set_note(self, conn, field_name, item_id, item_value, marked_up_text='', used_resource_ids=(), searchable_text=copy_marked_up_text):
|
def set_note(self, conn, field_name, item_id, item_value, marked_up_text='', used_resource_hashes=(), searchable_text=copy_marked_up_text):
|
||||||
if searchable_text is copy_marked_up_text:
|
if searchable_text is copy_marked_up_text:
|
||||||
searchable_text = marked_up_text
|
searchable_text = marked_up_text
|
||||||
note_id = self.note_id_for(conn, field_name, item_id)
|
note_id = self.note_id_for(conn, field_name, item_id)
|
||||||
@ -199,13 +198,13 @@ class Notes:
|
|||||||
resources = ()
|
resources = ()
|
||||||
if old_resources:
|
if old_resources:
|
||||||
resources = conn.get(
|
resources = conn.get(
|
||||||
'SELECT hash,name FROM notes_db.resources WHERE id IN ({})'.format(','.join(repeat('?', len(old_resources)))),
|
'SELECT hash,name FROM notes_db.resources WHERE hash IN ({})'.format(','.join(repeat('?', len(old_resources)))),
|
||||||
tuple(old_resources))
|
tuple(old_resources))
|
||||||
self.retire_entry(field_name, item_id, item_value, resources, note_id)
|
self.retire_entry(field_name, item_id, item_value, resources, note_id)
|
||||||
if old_resources:
|
if old_resources:
|
||||||
self.remove_resources(conn, note_id, old_resources, delete_from_link_table=False)
|
self.remove_resources(conn, note_id, old_resources, delete_from_link_table=False)
|
||||||
return -1
|
return -1
|
||||||
new_resources = frozenset(used_resource_ids)
|
new_resources = frozenset(used_resource_hashes)
|
||||||
resources_to_potentially_remove = old_resources - new_resources
|
resources_to_potentially_remove = old_resources - new_resources
|
||||||
resources_to_add = new_resources - old_resources
|
resources_to_add = new_resources - old_resources
|
||||||
if note_id is None:
|
if note_id is None:
|
||||||
@ -232,7 +231,7 @@ class Notes:
|
|||||||
):
|
):
|
||||||
return {
|
return {
|
||||||
'id': note_id, 'doc': doc, 'searchable_text': searchable_text,
|
'id': note_id, 'doc': doc, 'searchable_text': searchable_text,
|
||||||
'resource_ids': frozenset(self.resources_used_by(conn, note_id)),
|
'resource_hashes': frozenset(self.resources_used_by(conn, note_id)),
|
||||||
}
|
}
|
||||||
|
|
||||||
def rename_note(self, conn, field_name, old_item_id, new_item_id, new_item_value):
|
def rename_note(self, conn, field_name, old_item_id, new_item_id, new_item_value):
|
||||||
@ -245,7 +244,7 @@ class Notes:
|
|||||||
old_note = self.get_note_data(conn, field_name, old_item_id)
|
old_note = self.get_note_data(conn, field_name, old_item_id)
|
||||||
if not old_note or not old_note['doc']:
|
if not old_note or not old_note['doc']:
|
||||||
return
|
return
|
||||||
self.set_note(conn, field_name, new_item_id, new_item_value, old_note['doc'], old_note['resource_ids'], old_note['searchable_text'])
|
self.set_note(conn, field_name, new_item_id, new_item_value, old_note['doc'], old_note['resource_hashes'], old_note['searchable_text'])
|
||||||
|
|
||||||
def trim_retired_dir(self):
|
def trim_retired_dir(self):
|
||||||
items = []
|
items = []
|
||||||
@ -290,11 +289,11 @@ class Notes:
|
|||||||
f.write(data)
|
f.write(data)
|
||||||
base_name, ext = os.path.splitext(name)
|
base_name, ext = os.path.splitext(name)
|
||||||
c = 0
|
c = 0
|
||||||
for (resource_id, existing_name) in conn.execute('SELECT id,name FROM notes_db.resources WHERE hash=?', (resource_hash,)):
|
for (existing_name,) in conn.execute('SELECT name FROM notes_db.resources WHERE hash=?', (resource_hash,)):
|
||||||
if existing_name != name and update_name:
|
if existing_name != name and update_name:
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
conn.execute('UPDATE notes_db.resources SET name=? WHERE id=?', (name, resource_id))
|
conn.execute('UPDATE notes_db.resources SET name=? WHERE hash=?', (name, resource_hash))
|
||||||
break
|
break
|
||||||
except apsw.ConstraintError:
|
except apsw.ConstraintError:
|
||||||
c += 1
|
c += 1
|
||||||
@ -303,15 +302,15 @@ class Notes:
|
|||||||
else:
|
else:
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
resource_id = conn.get('INSERT INTO notes_db.resources (hash,name) VALUES (?,?) RETURNING id', (resource_hash, name), all=False)
|
conn.get('INSERT INTO notes_db.resources (hash,name) VALUES (?,?)', (resource_hash, name), all=False)
|
||||||
break
|
break
|
||||||
except apsw.ConstraintError:
|
except apsw.ConstraintError:
|
||||||
c += 1
|
c += 1
|
||||||
name = f'{base_name}-{c}{ext}'
|
name = f'{base_name}-{c}{ext}'
|
||||||
return resource_id
|
return resource_hash
|
||||||
|
|
||||||
def get_resource_data(self, conn, resource_id) -> Optional[dict]:
|
def get_resource_data(self, conn, resource_hash) -> Optional[dict]:
|
||||||
for (name, resource_hash) in conn.execute('SELECT name,hash FROM notes_db.resources WHERE id=?', (resource_id,)):
|
for (name,) in conn.execute('SELECT name FROM notes_db.resources WHERE hash=?', (resource_hash,)):
|
||||||
path = self.path_for_resource(conn, resource_hash)
|
path = self.path_for_resource(conn, resource_hash)
|
||||||
path = make_long_path_useable(path)
|
path = make_long_path_useable(path)
|
||||||
os.listdir(os.path.dirname(path))
|
os.listdir(os.path.dirname(path))
|
||||||
|
@ -266,7 +266,7 @@ class FilesystemTest(BaseTest):
|
|||||||
self.assertEqual('recurse', open(os.path.join(bookdir, 'sub', 'recurse')).read())
|
self.assertEqual('recurse', open(os.path.join(bookdir, 'sub', 'recurse')).read())
|
||||||
r1 = cache.add_notes_resource(b'res1', 'res.jpg')
|
r1 = cache.add_notes_resource(b'res1', 'res.jpg')
|
||||||
r2 = cache.add_notes_resource(b'res2', 'res.jpg')
|
r2 = cache.add_notes_resource(b'res2', 'res.jpg')
|
||||||
cache.set_notes_for('authors', 2, 'some notes', resource_ids=(r1, r2))
|
cache.set_notes_for('authors', 2, 'some notes', resource_hashes=(r1, r2))
|
||||||
cache.add_format(1, 'TXT', BytesIO(b'testing exim'))
|
cache.add_format(1, 'TXT', BytesIO(b'testing exim'))
|
||||||
cache.fts_indexing_sleep_time = 0.001
|
cache.fts_indexing_sleep_time = 0.001
|
||||||
cache.enable_fts()
|
cache.enable_fts()
|
||||||
|
@ -16,13 +16,13 @@ def test_notes_api(self: 'NotesTest'):
|
|||||||
h2 = cache.add_notes_resource(b'resource2', 'r1.jpg')
|
h2 = cache.add_notes_resource(b'resource2', 'r1.jpg')
|
||||||
self.ae(cache.get_notes_resource(h1)['name'], 'r1.jpg')
|
self.ae(cache.get_notes_resource(h1)['name'], 'r1.jpg')
|
||||||
self.ae(cache.get_notes_resource(h2)['name'], 'r1-1.jpg')
|
self.ae(cache.get_notes_resource(h2)['name'], 'r1-1.jpg')
|
||||||
note_id = cache.set_notes_for('authors', authors[0], doc, resource_ids=(h1, h2))
|
note_id = cache.set_notes_for('authors', authors[0], doc, resource_hashes=(h1, h2))
|
||||||
self.ae(cache.notes_for('authors', authors[0]), doc)
|
self.ae(cache.notes_for('authors', authors[0]), doc)
|
||||||
self.ae(cache.notes_resources_used_by('authors', authors[0]), frozenset({h1, h2}))
|
self.ae(cache.notes_resources_used_by('authors', authors[0]), frozenset({h1, h2}))
|
||||||
self.ae(cache.get_notes_resource(h1)['data'], b'resource1')
|
self.ae(cache.get_notes_resource(h1)['data'], b'resource1')
|
||||||
self.ae(cache.get_notes_resource(h2)['data'], b'resource2')
|
self.ae(cache.get_notes_resource(h2)['data'], b'resource2')
|
||||||
doc2 = 'a different note to replace the first one'
|
doc2 = 'a different note to replace the first one'
|
||||||
self.ae(note_id, cache.set_notes_for('authors', authors[0], doc2, resource_ids=(h1,)))
|
self.ae(note_id, cache.set_notes_for('authors', authors[0], doc2, resource_hashes=(h1,)))
|
||||||
self.ae(cache.notes_for('authors', authors[0]), doc2)
|
self.ae(cache.notes_for('authors', authors[0]), doc2)
|
||||||
self.ae(cache.notes_resources_used_by('authors', authors[0]), frozenset({h1}))
|
self.ae(cache.notes_resources_used_by('authors', authors[0]), frozenset({h1}))
|
||||||
self.ae(cache.get_notes_resource(h1)['data'], b'resource1')
|
self.ae(cache.get_notes_resource(h1)['data'], b'resource1')
|
||||||
@ -32,7 +32,7 @@ def test_notes_api(self: 'NotesTest'):
|
|||||||
|
|
||||||
# check retirement
|
# check retirement
|
||||||
h2 = cache.add_notes_resource(b'resource2', 'r1.jpg')
|
h2 = cache.add_notes_resource(b'resource2', 'r1.jpg')
|
||||||
self.ae(note_id, cache.set_notes_for('authors', authors[0], doc2, resource_ids=(h1,h2)))
|
self.ae(note_id, cache.set_notes_for('authors', authors[0], doc2, resource_hashes=(h1,h2)))
|
||||||
self.ae(-1, cache.set_notes_for('authors', authors[0], ''))
|
self.ae(-1, cache.set_notes_for('authors', authors[0], ''))
|
||||||
self.ae(cache.notes_for('authors', authors[0]), '')
|
self.ae(cache.notes_for('authors', authors[0]), '')
|
||||||
self.ae(cache.notes_resources_used_by('authors', authors[0]), frozenset())
|
self.ae(cache.notes_resources_used_by('authors', authors[0]), frozenset())
|
||||||
@ -41,7 +41,7 @@ def test_notes_api(self: 'NotesTest'):
|
|||||||
|
|
||||||
h1 = cache.add_notes_resource(b'resource1', 'r1.jpg')
|
h1 = cache.add_notes_resource(b'resource1', 'r1.jpg')
|
||||||
h2 = cache.add_notes_resource(b'resource2', 'r1.jpg')
|
h2 = cache.add_notes_resource(b'resource2', 'r1.jpg')
|
||||||
nnote_id = cache.set_notes_for('authors', authors[1], doc, resource_ids=(h1, h2))
|
nnote_id = cache.set_notes_for('authors', authors[1], doc, resource_hashes=(h1, h2))
|
||||||
self.assertNotEqual(note_id, nnote_id)
|
self.assertNotEqual(note_id, nnote_id)
|
||||||
self.ae(-1, cache.set_notes_for('authors', authors[1], ''))
|
self.ae(-1, cache.set_notes_for('authors', authors[1], ''))
|
||||||
after = os.listdir(notes.retired_dir)
|
after = os.listdir(notes.retired_dir)
|
||||||
@ -63,7 +63,7 @@ def test_cache_api(self: 'NotesTest'):
|
|||||||
doc = 'simple notes for an author'
|
doc = 'simple notes for an author'
|
||||||
h1 = cache.add_notes_resource(b'resource1', 'r1.jpg')
|
h1 = cache.add_notes_resource(b'resource1', 'r1.jpg')
|
||||||
h2 = cache.add_notes_resource(b'resource2', 'r1.jpg')
|
h2 = cache.add_notes_resource(b'resource2', 'r1.jpg')
|
||||||
cache.set_notes_for('authors', author_id, doc, resource_ids=(h1, h2))
|
cache.set_notes_for('authors', author_id, doc, resource_hashes=(h1, h2))
|
||||||
# test renaming to a new author preserves notes
|
# test renaming to a new author preserves notes
|
||||||
cache.rename_items('authors', {author_id: 'renamed author'})
|
cache.rename_items('authors', {author_id: 'renamed author'})
|
||||||
raid = cache.get_item_id('authors', 'renamed author')
|
raid = cache.get_item_id('authors', 'renamed author')
|
||||||
@ -92,7 +92,7 @@ def test_cache_api(self: 'NotesTest'):
|
|||||||
tag_id = cache.get_item_id('#tags', tags[0])
|
tag_id = cache.get_item_id('#tags', tags[0])
|
||||||
h1 = cache.add_notes_resource(b'resource1t', 'r1.jpg')
|
h1 = cache.add_notes_resource(b'resource1t', 'r1.jpg')
|
||||||
h2 = cache.add_notes_resource(b'resource2t', 'r1.jpg')
|
h2 = cache.add_notes_resource(b'resource2t', 'r1.jpg')
|
||||||
cache.set_notes_for('#tags', tag_id, doc, resource_ids=(h1, h2))
|
cache.set_notes_for('#tags', tag_id, doc, resource_hashes=(h1, h2))
|
||||||
self.ae(cache.notes_for('#tags', tag_id), doc)
|
self.ae(cache.notes_for('#tags', tag_id), doc)
|
||||||
cache.delete_custom_column('tags')
|
cache.delete_custom_column('tags')
|
||||||
cache.close()
|
cache.close()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user