mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Store a name for every resource
This commit is contained in:
parent
0b18f30392
commit
35b7bd3fe8
@ -6,10 +6,17 @@ CREATE TABLE notes_db.notes ( id INTEGER PRIMARY KEY,
|
||||
UNIQUE(item, colname)
|
||||
);
|
||||
|
||||
CREATE TABLE notes_db.resources ( id INTEGER PRIMARY KEY,
|
||||
hash TEXT NOT NULL UNIQUE ON CONFLICT FAIL,
|
||||
name TEXT NOT NULL UNIQUE ON CONFLICT FAIL
|
||||
);
|
||||
|
||||
CREATE TABLE notes_db.notes_resources_link ( id INTEGER PRIMARY KEY,
|
||||
note INTEGER NOT NULL,
|
||||
hash TEXT NOT NULL,
|
||||
UNIQUE(note, hash)
|
||||
resource INTEGER NOT NULL,
|
||||
FOREIGN KEY(note) REFERENCES notes(id),
|
||||
FOREIGN KEY(resource) REFERENCES resources(id),
|
||||
UNIQUE(note, resource)
|
||||
);
|
||||
|
||||
CREATE VIRTUAL TABLE notes_db.notes_fts USING fts5(searchable_text, content = 'notes', content_rowid = 'id', tokenize = 'calibre remove_diacritics 2');
|
||||
@ -21,7 +28,7 @@ BEGIN
|
||||
INSERT INTO notes_fts_stemmed(rowid, searchable_text) VALUES (NEW.id, NEW.searchable_text);
|
||||
END;
|
||||
|
||||
CREATE TRIGGER notes_db.notes_db_notes_delete_trg AFTER DELETE ON notes_db.notes
|
||||
CREATE TRIGGER notes_db.notes_db_notes_delete_trg BEFORE DELETE ON notes_db.notes
|
||||
BEGIN
|
||||
DELETE FROM notes_resources_link WHERE note=OLD.id;
|
||||
INSERT INTO notes_fts(notes_fts, rowid, searchable_text) VALUES('delete', OLD.id, OLD.searchable_text);
|
||||
@ -36,5 +43,9 @@ BEGIN
|
||||
INSERT INTO notes_fts_stemmed(rowid, searchable_text) VALUES (NEW.id, NEW.searchable_text);
|
||||
END;
|
||||
|
||||
CREATE TRIGGER notes_db.notes_db_resources_delete_trg BEFORE DELETE ON notes_db.resources
|
||||
BEGIN
|
||||
DELETE FROM notes_resources_link WHERE resource=OLD.id;
|
||||
END;
|
||||
|
||||
PRAGMA notes_db.user_version=1;
|
||||
|
@ -17,6 +17,7 @@ import sys
|
||||
import time
|
||||
import uuid
|
||||
from contextlib import closing, suppress
|
||||
from typing import Optional
|
||||
from functools import partial
|
||||
|
||||
from calibre import as_unicode, force_unicode, isbytestring, prints
|
||||
@ -339,10 +340,9 @@ class Connection(apsw.Connection): # {{{
|
||||
self.fts_dbpath = self.notes_dbpath = None
|
||||
|
||||
self.setbusytimeout(self.BUSY_TIMEOUT)
|
||||
self.execute('pragma cache_size=-5000')
|
||||
self.execute('pragma temp_store=2')
|
||||
self.execute('PRAGMA cache_size=-5000; PRAGMA temp_store=2; PRAGMA foreign_keys=ON;')
|
||||
|
||||
encoding = next(self.execute('pragma encoding'))[0]
|
||||
encoding = next(self.execute('PRAGMA encoding'))[0]
|
||||
self.createcollation('PYNOCASE', partial(pynocase,
|
||||
encoding=encoding))
|
||||
|
||||
@ -968,14 +968,14 @@ class DB:
|
||||
def notes_for(self, field_name, item_id):
|
||||
return self.notes.get_note(self.conn, field_name, item_id) or ''
|
||||
|
||||
def set_notes_for(self, field, item_id, doc: str, searchable_text: str, resource_hashes) -> int:
|
||||
return self.notes.set_note(self.conn, field, item_id, doc, resource_hashes, searchable_text)
|
||||
def set_notes_for(self, field, item_id, doc: str, searchable_text: str, resource_ids) -> int:
|
||||
return self.notes.set_note(self.conn, field, item_id, doc, resource_ids, searchable_text)
|
||||
|
||||
def add_notes_resource(self, path_or_stream) -> str:
|
||||
return self.notes.add_resource(path_or_stream)
|
||||
def add_notes_resource(self, path_or_stream, name) -> int:
|
||||
return self.notes.add_resource(self.conn, path_or_stream, name)
|
||||
|
||||
def get_notes_resource(self, resource_hash) -> bytes:
|
||||
return self.notes.get_resource(resource_hash)
|
||||
def get_notes_resource(self, resource_id) -> Optional[dict]:
|
||||
return self.notes.get_resource_data(self.conn, resource_id)
|
||||
|
||||
def notes_resources_used_by(self, field, item_id):
|
||||
conn = self.conn
|
||||
@ -1354,11 +1354,11 @@ class DB:
|
||||
@property
|
||||
def user_version(self):
|
||||
'''The user version of this database'''
|
||||
return self.conn.get('pragma user_version;', all=False)
|
||||
return self.conn.get('PRAGMA user_version;', all=False)
|
||||
|
||||
@user_version.setter
|
||||
def user_version(self, val):
|
||||
self.execute('pragma user_version=%d'%int(val))
|
||||
self.execute('PRAGMA user_version=%d'%int(val))
|
||||
|
||||
def initialize_database(self):
|
||||
metadata_sqlite = P('metadata_sqlite.sql', data=True,
|
||||
|
@ -20,7 +20,7 @@ from io import DEFAULT_BUFFER_SIZE, BytesIO
|
||||
from queue import Queue
|
||||
from threading import Lock
|
||||
from time import monotonic, sleep, time
|
||||
from typing import NamedTuple, Tuple
|
||||
from typing import NamedTuple, Tuple, Optional
|
||||
|
||||
from calibre import as_unicode, detect_ncpus, isbytestring
|
||||
from calibre.constants import iswindows, preferred_encoding
|
||||
@ -677,14 +677,14 @@ class Cache:
|
||||
def notes_for(self, field, item_id) -> str:
|
||||
return self.backend.notes_for(field, item_id)
|
||||
|
||||
def set_notes_for(self, field, item_id, doc: str, searchable_text: str = copy_marked_up_text, resource_hashes=()) -> int:
|
||||
return self.backend.set_notes_for(field, item_id, doc, searchable_text, resource_hashes)
|
||||
def set_notes_for(self, field, item_id, doc: str, searchable_text: str = copy_marked_up_text, resource_ids=()) -> int:
|
||||
return self.backend.set_notes_for(field, item_id, doc, searchable_text, resource_ids)
|
||||
|
||||
def add_notes_resource(self, path_or_stream_or_data) -> str:
|
||||
return self.backend.add_notes_resource(path_or_stream_or_data)
|
||||
def add_notes_resource(self, path_or_stream_or_data, name: str) -> int:
|
||||
return self.backend.add_notes_resource(path_or_stream_or_data, name)
|
||||
|
||||
def get_notes_resource(self, resource_hash) -> bytes:
|
||||
return self.backend.get_notes_resource(resource_hash)
|
||||
def get_notes_resource(self, resource_id) -> Optional[dict]:
|
||||
return self.backend.get_notes_resource(resource_id)
|
||||
|
||||
def notes_resources_used_by(self, field, item_id):
|
||||
return frozenset(self.backend.notes_resources_used_by(field, item_id))
|
||||
|
@ -1,9 +1,11 @@
|
||||
#!/usr/bin/env python
|
||||
# License: GPLv3 Copyright: 2023, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
|
||||
import apsw
|
||||
import os
|
||||
import time
|
||||
import xxhash
|
||||
from typing import Union, Optional
|
||||
from contextlib import suppress
|
||||
from itertools import repeat
|
||||
|
||||
@ -76,7 +78,11 @@ class Notes:
|
||||
SchemaUpgrade(conn, '\n'.join(triggers))
|
||||
conn.notes_dbpath = dbpath
|
||||
|
||||
def path_for_resource(self, resource_hash: str) -> str:
|
||||
def path_for_resource(self, conn, resource_hash_or_resource_id: Union[str,int]) -> str:
|
||||
if isinstance(resource_hash_or_resource_id, str):
|
||||
resource_hash = resource_hash_or_resource_id
|
||||
else:
|
||||
resource_hash = conn.get('SELECT hash FROM notes_db.resources WHERE id=?', (resource_hash_or_resource_id,), all=False)
|
||||
idx = resource_hash.index(':')
|
||||
prefix = resource_hash[idx + 1: idx + 3]
|
||||
return os.path.join(self.resources_dir, prefix, resource_hash)
|
||||
@ -86,22 +92,21 @@ class Notes:
|
||||
resources_to_potentially_remove = tuple(resources_to_potentially_remove)
|
||||
if delete_from_link_table:
|
||||
conn.executemany('''
|
||||
DELETE FROM notes_db.notes_resources_link WHERE note=? AND hash=?
|
||||
DELETE FROM notes_db.notes_resources_link WHERE note=? AND resource=?
|
||||
''', tuple((note_id, x) for x in resources_to_potentially_remove))
|
||||
stmt = '''
|
||||
WITH resources_table(value) AS (VALUES {})
|
||||
SELECT value FROM resources_table WHERE value NOT IN (SELECT hash FROM notes_db.notes_resources_link)
|
||||
SELECT value FROM resources_table WHERE value NOT IN (SELECT resource FROM notes_db.notes_resources_link)
|
||||
'''.format(','.join(repeat('(?)', len(resources_to_potentially_remove))))
|
||||
for (x,) in conn.execute(stmt, resources_to_potentially_remove):
|
||||
remove_with_retry(self.path_for_resource(x))
|
||||
remove_with_retry(self.path_for_resource(conn, x))
|
||||
|
||||
def note_id_for(self, conn, field_name, item_id):
|
||||
for (ans,) in conn.execute('SELECT id FROM notes_db.notes WHERE item=? AND colname=?', (item_id, field_name)):
|
||||
return ans
|
||||
return conn.get('SELECT id FROM notes_db.notes WHERE item=? AND colname=?', (item_id, field_name), all=False)
|
||||
|
||||
def resources_used_by(self, conn, note_id):
|
||||
if note_id is not None:
|
||||
for (h,) in conn.execute('SELECT hash from notes_db.notes_resources_link WHERE note=?', (note_id,)):
|
||||
for (h,) in conn.execute('SELECT resource from notes_db.notes_resources_link WHERE note=?', (note_id,)):
|
||||
yield h
|
||||
|
||||
def set_backup_for(self, field_name, item_id, marked_up_text='', searchable_text=''):
|
||||
@ -122,7 +127,7 @@ class Notes:
|
||||
os.replace(path, dest)
|
||||
self.trim_retired_dir()
|
||||
|
||||
def set_note(self, conn, field_name, item_id, marked_up_text='', hashes_of_used_resources=(), searchable_text=copy_marked_up_text):
|
||||
def set_note(self, conn, field_name, item_id, marked_up_text='', used_resource_ids=(), searchable_text=copy_marked_up_text):
|
||||
if searchable_text is copy_marked_up_text:
|
||||
searchable_text = marked_up_text
|
||||
note_id = self.note_id_for(conn, field_name, item_id)
|
||||
@ -134,27 +139,26 @@ class Notes:
|
||||
if old_resources:
|
||||
self.remove_resources(conn, note_id, old_resources, delete_from_link_table=False)
|
||||
return
|
||||
new_resources = frozenset(hashes_of_used_resources)
|
||||
new_resources = frozenset(used_resource_ids)
|
||||
resources_to_potentially_remove = old_resources - new_resources
|
||||
resources_to_add = new_resources - old_resources
|
||||
if note_id is None:
|
||||
note_id, = next(conn.execute('''
|
||||
note_id = conn.get('''
|
||||
INSERT INTO notes_db.notes (item,colname,doc,searchable_text) VALUES (?,?,?,?) RETURNING id;
|
||||
''', (item_id, field_name, marked_up_text, searchable_text)))
|
||||
''', (item_id, field_name, marked_up_text, searchable_text), all=False)
|
||||
else:
|
||||
conn.execute('UPDATE notes_db.notes SET doc=?,searchable_text=?', (marked_up_text, searchable_text))
|
||||
if resources_to_potentially_remove:
|
||||
self.remove_resources(conn, note_id, resources_to_potentially_remove)
|
||||
if resources_to_add:
|
||||
conn.executemany('''
|
||||
INSERT INTO notes_db.notes_resources_link (note,hash) VALUES (?,?);
|
||||
INSERT INTO notes_db.notes_resources_link (note,resource) VALUES (?,?);
|
||||
''', tuple((note_id, x) for x in resources_to_add))
|
||||
self.set_backup_for(field_name, item_id, marked_up_text, searchable_text)
|
||||
return note_id
|
||||
|
||||
def get_note(self, conn, field_name, item_id):
|
||||
for (doc,) in conn.execute('SELECT doc FROM notes_db.notes WHERE item=? AND colname=?', (item_id, field_name)):
|
||||
return doc
|
||||
return conn.get('SELECT doc FROM notes_db.notes WHERE item=? AND colname=?', (item_id, field_name), all=False)
|
||||
|
||||
def get_note_data(self, conn, field_name, item_id):
|
||||
for (note_id, doc, searchable_text) in conn.execute(
|
||||
@ -162,7 +166,7 @@ class Notes:
|
||||
):
|
||||
return {
|
||||
'id': note_id, 'doc': doc, 'searchable_text': searchable_text,
|
||||
'resource_hashes': frozenset(self.resources_used_by(conn, note_id)),
|
||||
'resource_ids': frozenset(self.resources_used_by(conn, note_id)),
|
||||
}
|
||||
|
||||
def rename_note(self, conn, field_name, old_item_id, new_item_id):
|
||||
@ -175,7 +179,7 @@ class Notes:
|
||||
old_note = self.get_note_data(conn, field_name, old_item_id)
|
||||
if not old_note or not old_note['doc']:
|
||||
return
|
||||
self.set_note(conn, field_name, new_item_id, old_note['doc'], old_note['resource_hashes'], old_note['searchable_text'])
|
||||
self.set_note(conn, field_name, new_item_id, old_note['doc'], old_note['resource_ids'], old_note['searchable_text'])
|
||||
|
||||
def trim_retired_dir(self):
|
||||
mpath_map = {}
|
||||
@ -189,7 +193,7 @@ class Notes:
|
||||
for path in items[:extra]:
|
||||
remove_with_retry(path)
|
||||
|
||||
def add_resource(self, path_or_stream_or_data):
|
||||
def add_resource(self, conn, path_or_stream_or_data, name):
|
||||
if isinstance(path_or_stream_or_data, bytes):
|
||||
data = path_or_stream_or_data
|
||||
elif isinstance(path_or_stream_or_data, str):
|
||||
@ -198,7 +202,7 @@ class Notes:
|
||||
else:
|
||||
data = f.read()
|
||||
resource_hash = hash_data(data)
|
||||
path = self.path_for_resource(resource_hash)
|
||||
path = self.path_for_resource(conn, resource_hash)
|
||||
path = make_long_path_useable(path)
|
||||
exists = False
|
||||
try:
|
||||
@ -207,21 +211,39 @@ class Notes:
|
||||
pass
|
||||
else:
|
||||
exists = s.st_size == len(data)
|
||||
if exists:
|
||||
return resource_hash
|
||||
if not exists:
|
||||
try:
|
||||
f = open(path, 'wb')
|
||||
except FileNotFoundError:
|
||||
os.makedirs(os.path.dirname(path), exist_ok=True)
|
||||
f = open(path, 'wb')
|
||||
with f:
|
||||
f.write(data)
|
||||
base_name, ext = os.path.splitext(name)
|
||||
c = 0
|
||||
for (resource_id, existing_name) in conn.execute('SELECT id,name FROM notes_db.resources WHERE hash=?', (resource_hash,)):
|
||||
if existing_name != name:
|
||||
while True:
|
||||
try:
|
||||
conn.execute('UPDATE notes_db.resources SET name=? WHERE id=?', (name, resource_id))
|
||||
break
|
||||
except apsw.ConstraintError:
|
||||
c += 1
|
||||
name = f'{base_name}-{c}{ext}'
|
||||
break
|
||||
else:
|
||||
while True:
|
||||
try:
|
||||
resource_id = conn.get('INSERT INTO notes_db.resources (hash,name) VALUES (?,?) RETURNING id', (resource_hash, name), all=False)
|
||||
break
|
||||
except apsw.ConstraintError:
|
||||
c += 1
|
||||
name = f'{base_name}-{c}{ext}'
|
||||
return resource_id
|
||||
|
||||
try:
|
||||
f = open(path, 'wb')
|
||||
except FileNotFoundError:
|
||||
os.makedirs(os.path.dirname(path), exist_ok=True)
|
||||
f = open(path, 'wb')
|
||||
with f:
|
||||
f.write(data)
|
||||
return resource_hash
|
||||
|
||||
def get_resource(self, resource_hash) -> bytes:
|
||||
path = self.path_for_resource(resource_hash)
|
||||
path = make_long_path_useable(path)
|
||||
with suppress(FileNotFoundError), open(path, 'rb') as f:
|
||||
return f.read()
|
||||
return b''
|
||||
def get_resource_data(self, conn, resource_id) -> Optional[dict]:
|
||||
for (name, resource_hash) in conn.execute('SELECT name,hash FROM notes_db.resources WHERE id=?', (resource_id,)):
|
||||
path = self.path_for_resource(conn, resource_hash)
|
||||
path = make_long_path_useable(path)
|
||||
with suppress(FileNotFoundError), open(path, 'rb') as f:
|
||||
return {'name': name, 'data': f.read(), 'hash': resource_hash}
|
||||
|
@ -22,18 +22,20 @@ class NotesTest(BaseTest):
|
||||
authors = sorted(cache.all_field_ids('authors'))
|
||||
self.ae(cache.notes_for('authors', authors[0]), '')
|
||||
doc = 'simple notes for an author'
|
||||
h1 = cache.add_notes_resource(b'resource1')
|
||||
h2 = cache.add_notes_resource(b'resource2')
|
||||
note_id = cache.set_notes_for('authors', authors[0], doc, resource_hashes=(h1, h2))
|
||||
h1 = cache.add_notes_resource(b'resource1', 'r1.jpg')
|
||||
h2 = cache.add_notes_resource(b'resource2', 'r1.jpg')
|
||||
self.ae(cache.get_notes_resource(h1)['name'], 'r1.jpg')
|
||||
self.ae(cache.get_notes_resource(h2)['name'], 'r1-1.jpg')
|
||||
note_id = cache.set_notes_for('authors', authors[0], doc, resource_ids=(h1, h2))
|
||||
self.ae(cache.notes_for('authors', authors[0]), doc)
|
||||
self.ae(cache.notes_resources_used_by('authors', authors[0]), frozenset({h1, h2}))
|
||||
self.ae(cache.get_notes_resource(h1), b'resource1')
|
||||
self.ae(cache.get_notes_resource(h2), b'resource2')
|
||||
self.ae(cache.get_notes_resource(h1)['data'], b'resource1')
|
||||
self.ae(cache.get_notes_resource(h2)['data'], b'resource2')
|
||||
doc2 = 'a different note to replace the first one'
|
||||
self.ae(note_id, cache.set_notes_for('authors', authors[0], doc2, resource_hashes=(h1,)))
|
||||
self.ae(note_id, cache.set_notes_for('authors', authors[0], doc2, resource_ids=(h1,)))
|
||||
self.ae(cache.notes_for('authors', authors[0]), doc2)
|
||||
self.ae(cache.notes_resources_used_by('authors', authors[0]), frozenset({h1}))
|
||||
self.ae(cache.get_notes_resource(h1), b'resource1')
|
||||
self.ae(cache.get_notes_resource(h2), b'')
|
||||
self.assertTrue(os.path.exists(notes.path_for_resource(h1)))
|
||||
self.assertFalse(os.path.exists(notes.path_for_resource(h2)))
|
||||
self.ae(cache.get_notes_resource(h1)['data'], b'resource1')
|
||||
self.ae(cache.get_notes_resource(h2), None)
|
||||
self.assertTrue(os.path.exists(notes.path_for_resource(cache.backend.conn, h1)))
|
||||
self.assertFalse(os.path.exists(notes.path_for_resource(cache.backend.conn, h2)))
|
||||
|
Loading…
x
Reference in New Issue
Block a user