mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
More work on notes
This commit is contained in:
parent
7eefd96970
commit
3aee660656
@ -5,7 +5,7 @@
|
|||||||
|
|
||||||
set -xe
|
set -xe
|
||||||
|
|
||||||
pacman -S --noconfirm --needed base-devel sudo git sip pyqt-builder cmake chmlib icu jxrlib hunspell libmtp libusb libwmf optipng python-apsw python-beautifulsoup4 python-cssselect python-css-parser python-dateutil python-jeepney python-dnspython python-feedparser python-html2text python-html5-parser python-lxml python-markdown python-mechanize python-msgpack python-netifaces python-unrardll python-pillow python-psutil python-pygments python-pyqt6 python-regex python-zeroconf python-pyqt6-webengine qt6-svg qt6-imageformats udisks2 hyphen python-pychm python-pycryptodome speech-dispatcher python-sphinx python-urllib3 python-py7zr python-pip python-fonttools uchardet libstemmer poppler tk podofo
|
pacman -S --noconfirm --needed base-devel sudo git sip pyqt-builder cmake chmlib icu jxrlib hunspell libmtp libusb libwmf optipng python-apsw python-beautifulsoup4 python-cssselect python-css-parser python-dateutil python-jeepney python-dnspython python-feedparser python-html2text python-html5-parser python-lxml python-markdown python-mechanize python-msgpack python-netifaces python-unrardll python-pillow python-psutil python-pygments python-pyqt6 python-regex python-zeroconf python-pyqt6-webengine qt6-svg qt6-imageformats udisks2 hyphen python-pychm python-pycryptodome speech-dispatcher python-sphinx python-urllib3 python-py7zr python-pip python-fonttools python-xxhash uchardet libstemmer poppler tk podofo
|
||||||
|
|
||||||
useradd -m ci
|
useradd -m ci
|
||||||
chown -R ci:users $GITHUB_WORKSPACE
|
chown -R ci:users $GITHUB_WORKSPACE
|
||||||
|
@ -954,14 +954,29 @@ class DB:
|
|||||||
if link_table_name and link_col_name:
|
if link_table_name and link_col_name:
|
||||||
self.executemany(f'DELETE FROM {link_table_name} WHERE {link_col_name}=?', bindings)
|
self.executemany(f'DELETE FROM {link_table_name} WHERE {link_col_name}=?', bindings)
|
||||||
self.executemany(f'DELETE FROM {table_name} WHERE id=?', bindings)
|
self.executemany(f'DELETE FROM {table_name} WHERE id=?', bindings)
|
||||||
|
for item_id in items:
|
||||||
|
self.notes.set_note(self.conn, field_name, item_id)
|
||||||
|
|
||||||
def rename_category_item(self, field_name, table_name, link_table_name, link_col_name, old_item_id, new_item_id):
|
def rename_category_item(self, field_name, table_name, link_table_name, link_col_name, old_item_id, new_item_id):
|
||||||
|
self.notes.rename_note(self.conn, field_name, old_item_id, new_item_id)
|
||||||
# For custom series this means that the series index can
|
# For custom series this means that the series index can
|
||||||
# potentially have duplicates/be incorrect, but there is no way to
|
# potentially have duplicates/be incorrect, but there is no way to
|
||||||
# handle that in this context.
|
# handle that in this context.
|
||||||
self.execute(f'UPDATE {link_table_name} SET {link_col_name}=? WHERE {link_col_name}=?; DELETE FROM {table_name} WHERE id=?',
|
self.execute(f'UPDATE {link_table_name} SET {link_col_name}=? WHERE {link_col_name}=?; DELETE FROM {table_name} WHERE id=?',
|
||||||
(new_item_id, old_item_id, old_item_id))
|
(new_item_id, old_item_id, old_item_id))
|
||||||
|
|
||||||
|
def notes_for(self, field_name, item_id):
|
||||||
|
return self.notes.get_note(self.conn, field_name, item_id) or ''
|
||||||
|
|
||||||
|
def set_notes_for(self, field, item_id, doc: str, searchable_text: str, resource_hashes) -> int:
|
||||||
|
return self.notes.set_note(self.conn, field, item_id, doc, resource_hashes, searchable_text)
|
||||||
|
|
||||||
|
def add_notes_resource(self, path_or_stream) -> str:
|
||||||
|
return self.notes.add_resource(path_or_stream)
|
||||||
|
|
||||||
|
def get_notes_resource(self, resource_hash) -> bytes:
|
||||||
|
return self.notes.get_resource(resource_hash)
|
||||||
|
|
||||||
def initialize_fts(self, dbref):
|
def initialize_fts(self, dbref):
|
||||||
self.fts = None
|
self.fts = None
|
||||||
if not self.prefs['fts_enabled']:
|
if not self.prefs['fts_enabled']:
|
||||||
|
@ -38,6 +38,7 @@ from calibre.db.listeners import EventDispatcher, EventType
|
|||||||
from calibre.db.locking import (
|
from calibre.db.locking import (
|
||||||
DowngradeLockError, LockingError, SafeReadLock, create_locks, try_lock,
|
DowngradeLockError, LockingError, SafeReadLock, create_locks, try_lock,
|
||||||
)
|
)
|
||||||
|
from calibre.db.notes.connect import copy_marked_up_text
|
||||||
from calibre.db.search import Search
|
from calibre.db.search import Search
|
||||||
from calibre.db.tables import VirtualTable
|
from calibre.db.tables import VirtualTable
|
||||||
from calibre.db.utils import type_safe_sort_key_function
|
from calibre.db.utils import type_safe_sort_key_function
|
||||||
@ -672,6 +673,20 @@ class Cache:
|
|||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
# Notes API {{{
|
||||||
|
def notes_for(self, field, item_id) -> str:
|
||||||
|
return self.backend.notes_for(field, item_id)
|
||||||
|
|
||||||
|
def set_notes_for(self, field, item_id, doc: str, searchable_text: str = copy_marked_up_text, resource_hashes=()) -> int:
|
||||||
|
return self.backend.set_notes_for(field, item_id, doc, searchable_text, resource_hashes)
|
||||||
|
|
||||||
|
def add_notes_resource(self, path_or_stream_or_data) -> str:
|
||||||
|
return self.backend.add_notes_resource(path_or_stream_or_data)
|
||||||
|
|
||||||
|
def get_notes_resource(self, resource_hash) -> bytes:
|
||||||
|
return self.backend.get_notes_resource(resource_hash)
|
||||||
|
# }}}
|
||||||
|
|
||||||
# Cache Layer API {{{
|
# Cache Layer API {{{
|
||||||
|
|
||||||
@write_api
|
@write_api
|
||||||
|
@ -2,12 +2,44 @@
|
|||||||
# License: GPLv3 Copyright: 2023, Kovid Goyal <kovid at kovidgoyal.net>
|
# License: GPLv3 Copyright: 2023, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import time
|
||||||
|
import xxhash
|
||||||
|
from contextlib import suppress
|
||||||
|
from itertools import repeat
|
||||||
|
|
||||||
from calibre.constants import iswindows
|
from calibre.constants import iswindows
|
||||||
|
from calibre.utils.copy_files import WINDOWS_SLEEP_FOR_RETRY_TIME
|
||||||
|
from calibre.utils.filenames import make_long_path_useable
|
||||||
|
|
||||||
from ..constants import NOTES_DIR_NAME
|
from ..constants import NOTES_DIR_NAME
|
||||||
from .schema_upgrade import SchemaUpgrade
|
from .schema_upgrade import SchemaUpgrade
|
||||||
|
|
||||||
|
if iswindows:
|
||||||
|
from calibre_extensions import winutil
|
||||||
|
|
||||||
|
class cmt(str):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
copy_marked_up_text = cmt()
|
||||||
|
SEP = b'\0\x1c\0'
|
||||||
|
|
||||||
|
|
||||||
|
def hash_data(data: bytes) -> str:
|
||||||
|
return 'xxh64:' + xxhash.xxh3_64_hexdigest(data)
|
||||||
|
|
||||||
|
|
||||||
|
def remove_with_retry(x):
|
||||||
|
x = make_long_path_useable(x)
|
||||||
|
try:
|
||||||
|
os.remove(x)
|
||||||
|
except FileNotFoundError:
|
||||||
|
return
|
||||||
|
except OSError as e:
|
||||||
|
if iswindows and e.winerror == winutil.ERROR_SHARING_VIOLATION:
|
||||||
|
time.sleep(WINDOWS_SLEEP_FOR_RETRY_TIME)
|
||||||
|
os.remove(x)
|
||||||
|
|
||||||
|
|
||||||
class Notes:
|
class Notes:
|
||||||
|
|
||||||
@ -15,13 +47,18 @@ class Notes:
|
|||||||
conn = backend.get_connection()
|
conn = backend.get_connection()
|
||||||
libdir = os.path.dirname(os.path.abspath(conn.db_filename('main')))
|
libdir = os.path.dirname(os.path.abspath(conn.db_filename('main')))
|
||||||
notes_dir = os.path.join(libdir, NOTES_DIR_NAME)
|
notes_dir = os.path.join(libdir, NOTES_DIR_NAME)
|
||||||
|
self.resources_dir = os.path.join(notes_dir, 'resources')
|
||||||
|
self.backup_dir = os.path.join(notes_dir, 'backup')
|
||||||
|
self.retired_dir = os.path.join(notes_dir, 'retired')
|
||||||
if not os.path.exists(notes_dir):
|
if not os.path.exists(notes_dir):
|
||||||
os.makedirs(notes_dir, exist_ok=True)
|
os.makedirs(notes_dir, exist_ok=True)
|
||||||
if iswindows:
|
if iswindows:
|
||||||
import calibre_extensions.winutil as winutil
|
|
||||||
winutil.set_file_attributes(notes_dir, winutil.FILE_ATTRIBUTE_HIDDEN | winutil.FILE_ATTRIBUTE_NOT_CONTENT_INDEXED)
|
winutil.set_file_attributes(notes_dir, winutil.FILE_ATTRIBUTE_HIDDEN | winutil.FILE_ATTRIBUTE_NOT_CONTENT_INDEXED)
|
||||||
dbpath = os.path.join(notes_dir, 'notes.db')
|
dbpath = os.path.join(notes_dir, 'notes.db')
|
||||||
conn.execute("ATTACH DATABASE ? AS notes_db", (dbpath,))
|
conn.execute("ATTACH DATABASE ? AS notes_db", (dbpath,))
|
||||||
|
os.makedirs(self.resources_dir, exist_ok=True)
|
||||||
|
os.makedirs(self.backup_dir, exist_ok=True)
|
||||||
|
os.makedirs(self.retired_dir, exist_ok=True)
|
||||||
self.allowed_fields = set()
|
self.allowed_fields = set()
|
||||||
triggers = []
|
triggers = []
|
||||||
for table in backend.tables.values():
|
for table in backend.tables.values():
|
||||||
@ -36,3 +73,149 @@ class Notes:
|
|||||||
)
|
)
|
||||||
SchemaUpgrade(conn, '\n'.join(triggers))
|
SchemaUpgrade(conn, '\n'.join(triggers))
|
||||||
conn.notes_dbpath = dbpath
|
conn.notes_dbpath = dbpath
|
||||||
|
|
||||||
|
def path_for_resource(self, resource_hash: str) -> str:
|
||||||
|
idx = resource_hash.index(':')
|
||||||
|
prefix = resource_hash[idx + 1: idx + 3]
|
||||||
|
return os.path.join(self.resources_dir, prefix, resource_hash)
|
||||||
|
|
||||||
|
def remove_resources(self, conn, note_id, resources_to_potentially_remove, delete_from_link_table=True):
|
||||||
|
if not isinstance(resources_to_potentially_remove, tuple):
|
||||||
|
resources_to_potentially_remove = tuple(resources_to_potentially_remove)
|
||||||
|
if delete_from_link_table:
|
||||||
|
conn.executemany('''
|
||||||
|
DELETE FROM notes_db.notes_resources_link WHERE note=? AND hash=?
|
||||||
|
''', tuple((note_id, x) for x in resources_to_potentially_remove))
|
||||||
|
for (x,) in conn.execute(
|
||||||
|
'''
|
||||||
|
SELECT value FROM (VALUES {}) AS my_values(value) WHERE value NOT IN (SELECT hash FROM notes_db.notes_resources_link)
|
||||||
|
'''.format(','.join(repeat('(?)', len(resources_to_potentially_remove)))), resources_to_potentially_remove):
|
||||||
|
remove_with_retry(self.path_for_resource(x))
|
||||||
|
|
||||||
|
def note_id_for(self, conn, field_name, item_id):
|
||||||
|
for (ans,) in conn.execute('SELECT id FROM notes_db.notes WHERE item=? AND colname=?', (item_id, field_name)):
|
||||||
|
return ans
|
||||||
|
|
||||||
|
def resources_used_by(self, conn, note_id):
|
||||||
|
if note_id is not None:
|
||||||
|
for (h,) in conn.execute('SELECT hash from notes_db.notes_resources_link WHERE note=?', (note_id,)):
|
||||||
|
yield h
|
||||||
|
|
||||||
|
def set_backup_for(self, field_name, item_id, marked_up_text='', searchable_text=''):
|
||||||
|
path = make_long_path_useable(os.path.join(self.backup_dir, field_name, str(item_id)))
|
||||||
|
if marked_up_text:
|
||||||
|
try:
|
||||||
|
f = open(path, 'wb')
|
||||||
|
except FileNotFoundError:
|
||||||
|
os.makedirs(os.path.dirname(path), exist_ok=True)
|
||||||
|
f = open(path, 'wb')
|
||||||
|
with f:
|
||||||
|
f.write(marked_up_text.encode('utf-8'))
|
||||||
|
f.write(SEP)
|
||||||
|
f.write(searchable_text.encode('utf-8'))
|
||||||
|
else:
|
||||||
|
if os.path.exists(path):
|
||||||
|
dest = make_long_path_useable(os.path.join(self.retired_dir, f'{item_id}_{field_name}'))
|
||||||
|
os.replace(path, dest)
|
||||||
|
self.trim_retire_dir()
|
||||||
|
|
||||||
|
def set_note(self, conn, field_name, item_id, marked_up_text='', hashes_of_used_resources=(), searchable_text=copy_marked_up_text):
|
||||||
|
if searchable_text is copy_marked_up_text:
|
||||||
|
searchable_text = marked_up_text
|
||||||
|
note_id = self.note_id_for(conn, field_name, item_id)
|
||||||
|
old_resources = frozenset(self.resources_used_by(conn, note_id))
|
||||||
|
if not marked_up_text:
|
||||||
|
if note_id is not None:
|
||||||
|
conn.execute('DELETE FROM notes_db.notes WHERE id=?', (note_id,))
|
||||||
|
self.set_backup_for(field_name, item_id)
|
||||||
|
if old_resources:
|
||||||
|
self.remove_resources(conn, note_id, old_resources, delete_from_link_table=False)
|
||||||
|
return
|
||||||
|
new_resources = frozenset(hashes_of_used_resources)
|
||||||
|
resources_to_potentially_remove = old_resources - new_resources
|
||||||
|
resources_to_add = new_resources - old_resources
|
||||||
|
inserted_id, = next(conn.execute('''
|
||||||
|
INSERT OR REPLACE INTO notes_db.notes (item,colname,doc,searchable_text) VALUES (?,?,?,?) RETURNING id;
|
||||||
|
''', (item_id, field_name, marked_up_text, searchable_text)))
|
||||||
|
if resources_to_potentially_remove:
|
||||||
|
self.remove_resources(conn, inserted_id, resources_to_potentially_remove)
|
||||||
|
if resources_to_add:
|
||||||
|
conn.executemany('''
|
||||||
|
INSERT INTO notes_db.notes_resources_link (note,hash) VALUES (?,?);
|
||||||
|
''', tuple((inserted_id, x) for x in resources_to_add))
|
||||||
|
self.set_backup_for(field_name, item_id, marked_up_text, searchable_text)
|
||||||
|
return note_id
|
||||||
|
|
||||||
|
def get_note(self, conn, field_name, item_id):
|
||||||
|
for (doc,) in conn.execute('SELECT doc FROM notes_db.notes WHERE item=? AND colname=?', (item_id, field_name)):
|
||||||
|
return doc
|
||||||
|
|
||||||
|
def get_note_data(self, conn, field_name, item_id):
|
||||||
|
for (note_id, doc, searchable_text) in conn.execute(
|
||||||
|
'SELECT id,doc,searchable_text FROM notes_db.notes WHERE item=? AND colname=?', (item_id, field_name)
|
||||||
|
):
|
||||||
|
return {
|
||||||
|
'id': note_id, 'doc': doc, 'searchable_text': searchable_text,
|
||||||
|
'resource_hashes': frozenset(self.resources_used_by(conn, note_id)),
|
||||||
|
}
|
||||||
|
|
||||||
|
def rename_note(self, conn, field_name, old_item_id, new_item_id):
|
||||||
|
note_id = self.note_id_for(conn, field_name, old_item_id)
|
||||||
|
if note_id is None:
|
||||||
|
return
|
||||||
|
new_note = self.get_note(conn, field_name, new_item_id)
|
||||||
|
if new_note:
|
||||||
|
return
|
||||||
|
old_note = self.get_note_data(conn, field_name, old_item_id)
|
||||||
|
if not old_note or not old_note['doc']:
|
||||||
|
return
|
||||||
|
self.set_note(conn, field_name, new_item_id, old_note['doc'], old_note['resource_hashes'], old_note['searchable_text'])
|
||||||
|
|
||||||
|
def trim_retired_dir(self):
|
||||||
|
mpath_map = {}
|
||||||
|
items = []
|
||||||
|
for d in os.scandir(self.retired_dir):
|
||||||
|
mpath_map[d.path] = d.stat(follow_symlinks=False).st_mtime_ns
|
||||||
|
items.append(d.path)
|
||||||
|
extra = len(items) - self.max_retired_items
|
||||||
|
if extra > 0:
|
||||||
|
items.sort(key=mpath_map.__getitem__)
|
||||||
|
for path in items[:extra]:
|
||||||
|
remove_with_retry(path)
|
||||||
|
|
||||||
|
def add_resource(self, path_or_stream_or_data):
|
||||||
|
if isinstance(path_or_stream_or_data, bytes):
|
||||||
|
data = path_or_stream_or_data
|
||||||
|
elif isinstance(path_or_stream_or_data, str):
|
||||||
|
with open(path_or_stream_or_data, 'rb') as f:
|
||||||
|
data = f.read()
|
||||||
|
else:
|
||||||
|
data = f.read()
|
||||||
|
resource_hash = hash_data(data)
|
||||||
|
path = self.path_for_resource(resource_hash)
|
||||||
|
path = make_long_path_useable(path)
|
||||||
|
exists = False
|
||||||
|
try:
|
||||||
|
s = os.stat(path, follow_symlinks=False)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
exists = s.st_size == len(data)
|
||||||
|
if exists:
|
||||||
|
return resource_hash
|
||||||
|
|
||||||
|
try:
|
||||||
|
f = open(path, 'wb')
|
||||||
|
except FileNotFoundError:
|
||||||
|
os.makedirs(os.path.dirname(path), exist_ok=True)
|
||||||
|
f = open(path, 'wb')
|
||||||
|
with f:
|
||||||
|
f.write(data)
|
||||||
|
return resource_hash
|
||||||
|
|
||||||
|
def get_resource(self, resource_hash) -> bytes:
|
||||||
|
path = self.path_for_resource(resource_hash)
|
||||||
|
path = make_long_path_useable(path)
|
||||||
|
with suppress(FileNotFoundError), open(path, 'rb') as f:
|
||||||
|
return f.read()
|
||||||
|
return b''
|
||||||
|
@ -306,6 +306,3 @@ class FilesystemTest(BaseTest):
|
|||||||
c(r(match_type='not_startswith', query='IGnored.', action='add'), r(query='ignored.md')),
|
c(r(match_type='not_startswith', query='IGnored.', action='add'), r(query='ignored.md')),
|
||||||
):
|
):
|
||||||
q(['added.epub non-book.other'.split()], find_books_in_directory('', True, compiled_rules=rules, listdir_impl=lambda x: files))
|
q(['added.epub non-book.other'.split()], find_books_in_directory('', True, compiled_rules=rules, listdir_impl=lambda x: files))
|
||||||
|
|
||||||
def test_notes_operations(self):
|
|
||||||
cache = self.init_cache()
|
|
||||||
|
19
src/calibre/db/tests/notes.py
Normal file
19
src/calibre/db/tests/notes.py
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# License: GPLv3 Copyright: 2023, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
|
|
||||||
|
|
||||||
|
from calibre.db.tests.base import BaseTest
|
||||||
|
|
||||||
|
class NotesTest(BaseTest):
|
||||||
|
|
||||||
|
ae = BaseTest.assertEqual
|
||||||
|
|
||||||
|
def test_notes(self):
|
||||||
|
cache = self.init_cache()
|
||||||
|
authors = sorted(cache.all_field_ids('authors'))
|
||||||
|
self.ae(cache.notes_for('authors', authors[0]), '')
|
||||||
|
doc = 'simple notes for an author'
|
||||||
|
h1 = cache.add_notes_resource(b'resource1')
|
||||||
|
h2 = cache.add_notes_resource(b'resource2')
|
||||||
|
cache.set_notes_for('authors', authors[0], doc, resource_hashes=(h1, h2))
|
||||||
|
self.ae(cache.notes_for('authors', authors[0]), doc)
|
Loading…
x
Reference in New Issue
Block a user