mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 18:24:30 -04:00
More work on notes
This commit is contained in:
parent
7eefd96970
commit
3aee660656
@ -5,7 +5,7 @@
|
||||
|
||||
set -xe
|
||||
|
||||
pacman -S --noconfirm --needed base-devel sudo git sip pyqt-builder cmake chmlib icu jxrlib hunspell libmtp libusb libwmf optipng python-apsw python-beautifulsoup4 python-cssselect python-css-parser python-dateutil python-jeepney python-dnspython python-feedparser python-html2text python-html5-parser python-lxml python-markdown python-mechanize python-msgpack python-netifaces python-unrardll python-pillow python-psutil python-pygments python-pyqt6 python-regex python-zeroconf python-pyqt6-webengine qt6-svg qt6-imageformats udisks2 hyphen python-pychm python-pycryptodome speech-dispatcher python-sphinx python-urllib3 python-py7zr python-pip python-fonttools uchardet libstemmer poppler tk podofo
|
||||
pacman -S --noconfirm --needed base-devel sudo git sip pyqt-builder cmake chmlib icu jxrlib hunspell libmtp libusb libwmf optipng python-apsw python-beautifulsoup4 python-cssselect python-css-parser python-dateutil python-jeepney python-dnspython python-feedparser python-html2text python-html5-parser python-lxml python-markdown python-mechanize python-msgpack python-netifaces python-unrardll python-pillow python-psutil python-pygments python-pyqt6 python-regex python-zeroconf python-pyqt6-webengine qt6-svg qt6-imageformats udisks2 hyphen python-pychm python-pycryptodome speech-dispatcher python-sphinx python-urllib3 python-py7zr python-pip python-fonttools python-xxhash uchardet libstemmer poppler tk podofo
|
||||
|
||||
useradd -m ci
|
||||
chown -R ci:users $GITHUB_WORKSPACE
|
||||
|
@ -954,14 +954,29 @@ class DB:
|
||||
if link_table_name and link_col_name:
|
||||
self.executemany(f'DELETE FROM {link_table_name} WHERE {link_col_name}=?', bindings)
|
||||
self.executemany(f'DELETE FROM {table_name} WHERE id=?', bindings)
|
||||
for item_id in items:
|
||||
self.notes.set_note(self.conn, field_name, item_id)
|
||||
|
||||
def rename_category_item(self, field_name, table_name, link_table_name, link_col_name, old_item_id, new_item_id):
|
||||
self.notes.rename_note(self.conn, field_name, old_item_id, new_item_id)
|
||||
# For custom series this means that the series index can
|
||||
# potentially have duplicates/be incorrect, but there is no way to
|
||||
# handle that in this context.
|
||||
self.execute(f'UPDATE {link_table_name} SET {link_col_name}=? WHERE {link_col_name}=?; DELETE FROM {table_name} WHERE id=?',
|
||||
(new_item_id, old_item_id, old_item_id))
|
||||
|
||||
def notes_for(self, field_name, item_id):
|
||||
return self.notes.get_note(self.conn, field_name, item_id) or ''
|
||||
|
||||
def set_notes_for(self, field, item_id, doc: str, searchable_text: str, resource_hashes) -> int:
|
||||
return self.notes.set_note(self.conn, field, item_id, doc, resource_hashes, searchable_text)
|
||||
|
||||
def add_notes_resource(self, path_or_stream) -> str:
|
||||
return self.notes.add_resource(path_or_stream)
|
||||
|
||||
def get_notes_resource(self, resource_hash) -> bytes:
|
||||
return self.notes.get_resource(resource_hash)
|
||||
|
||||
def initialize_fts(self, dbref):
|
||||
self.fts = None
|
||||
if not self.prefs['fts_enabled']:
|
||||
|
@ -38,6 +38,7 @@ from calibre.db.listeners import EventDispatcher, EventType
|
||||
from calibre.db.locking import (
|
||||
DowngradeLockError, LockingError, SafeReadLock, create_locks, try_lock,
|
||||
)
|
||||
from calibre.db.notes.connect import copy_marked_up_text
|
||||
from calibre.db.search import Search
|
||||
from calibre.db.tables import VirtualTable
|
||||
from calibre.db.utils import type_safe_sort_key_function
|
||||
@ -672,6 +673,20 @@ class Cache:
|
||||
|
||||
# }}}
|
||||
|
||||
# Notes API {{{
|
||||
def notes_for(self, field, item_id) -> str:
|
||||
return self.backend.notes_for(field, item_id)
|
||||
|
||||
def set_notes_for(self, field, item_id, doc: str, searchable_text: str = copy_marked_up_text, resource_hashes=()) -> int:
|
||||
return self.backend.set_notes_for(field, item_id, doc, searchable_text, resource_hashes)
|
||||
|
||||
def add_notes_resource(self, path_or_stream_or_data) -> str:
|
||||
return self.backend.add_notes_resource(path_or_stream_or_data)
|
||||
|
||||
def get_notes_resource(self, resource_hash) -> bytes:
|
||||
return self.backend.get_notes_resource(resource_hash)
|
||||
# }}}
|
||||
|
||||
# Cache Layer API {{{
|
||||
|
||||
@write_api
|
||||
|
@ -2,12 +2,44 @@
|
||||
# License: GPLv3 Copyright: 2023, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
|
||||
import os
|
||||
import time
|
||||
import xxhash
|
||||
from contextlib import suppress
|
||||
from itertools import repeat
|
||||
|
||||
from calibre.constants import iswindows
|
||||
from calibre.utils.copy_files import WINDOWS_SLEEP_FOR_RETRY_TIME
|
||||
from calibre.utils.filenames import make_long_path_useable
|
||||
|
||||
from ..constants import NOTES_DIR_NAME
|
||||
from .schema_upgrade import SchemaUpgrade
|
||||
|
||||
if iswindows:
|
||||
from calibre_extensions import winutil
|
||||
|
||||
class cmt(str):
|
||||
pass
|
||||
|
||||
|
||||
copy_marked_up_text = cmt()
|
||||
SEP = b'\0\x1c\0'
|
||||
|
||||
|
||||
def hash_data(data: bytes) -> str:
|
||||
return 'xxh64:' + xxhash.xxh3_64_hexdigest(data)
|
||||
|
||||
|
||||
def remove_with_retry(x):
|
||||
x = make_long_path_useable(x)
|
||||
try:
|
||||
os.remove(x)
|
||||
except FileNotFoundError:
|
||||
return
|
||||
except OSError as e:
|
||||
if iswindows and e.winerror == winutil.ERROR_SHARING_VIOLATION:
|
||||
time.sleep(WINDOWS_SLEEP_FOR_RETRY_TIME)
|
||||
os.remove(x)
|
||||
|
||||
|
||||
class Notes:
|
||||
|
||||
@ -15,13 +47,18 @@ class Notes:
|
||||
conn = backend.get_connection()
|
||||
libdir = os.path.dirname(os.path.abspath(conn.db_filename('main')))
|
||||
notes_dir = os.path.join(libdir, NOTES_DIR_NAME)
|
||||
self.resources_dir = os.path.join(notes_dir, 'resources')
|
||||
self.backup_dir = os.path.join(notes_dir, 'backup')
|
||||
self.retired_dir = os.path.join(notes_dir, 'retired')
|
||||
if not os.path.exists(notes_dir):
|
||||
os.makedirs(notes_dir, exist_ok=True)
|
||||
if iswindows:
|
||||
import calibre_extensions.winutil as winutil
|
||||
winutil.set_file_attributes(notes_dir, winutil.FILE_ATTRIBUTE_HIDDEN | winutil.FILE_ATTRIBUTE_NOT_CONTENT_INDEXED)
|
||||
dbpath = os.path.join(notes_dir, 'notes.db')
|
||||
conn.execute("ATTACH DATABASE ? AS notes_db", (dbpath,))
|
||||
os.makedirs(self.resources_dir, exist_ok=True)
|
||||
os.makedirs(self.backup_dir, exist_ok=True)
|
||||
os.makedirs(self.retired_dir, exist_ok=True)
|
||||
self.allowed_fields = set()
|
||||
triggers = []
|
||||
for table in backend.tables.values():
|
||||
@ -36,3 +73,149 @@ class Notes:
|
||||
)
|
||||
SchemaUpgrade(conn, '\n'.join(triggers))
|
||||
conn.notes_dbpath = dbpath
|
||||
|
||||
def path_for_resource(self, resource_hash: str) -> str:
|
||||
idx = resource_hash.index(':')
|
||||
prefix = resource_hash[idx + 1: idx + 3]
|
||||
return os.path.join(self.resources_dir, prefix, resource_hash)
|
||||
|
||||
def remove_resources(self, conn, note_id, resources_to_potentially_remove, delete_from_link_table=True):
|
||||
if not isinstance(resources_to_potentially_remove, tuple):
|
||||
resources_to_potentially_remove = tuple(resources_to_potentially_remove)
|
||||
if delete_from_link_table:
|
||||
conn.executemany('''
|
||||
DELETE FROM notes_db.notes_resources_link WHERE note=? AND hash=?
|
||||
''', tuple((note_id, x) for x in resources_to_potentially_remove))
|
||||
for (x,) in conn.execute(
|
||||
'''
|
||||
SELECT value FROM (VALUES {}) AS my_values(value) WHERE value NOT IN (SELECT hash FROM notes_db.notes_resources_link)
|
||||
'''.format(','.join(repeat('(?)', len(resources_to_potentially_remove)))), resources_to_potentially_remove):
|
||||
remove_with_retry(self.path_for_resource(x))
|
||||
|
||||
def note_id_for(self, conn, field_name, item_id):
|
||||
for (ans,) in conn.execute('SELECT id FROM notes_db.notes WHERE item=? AND colname=?', (item_id, field_name)):
|
||||
return ans
|
||||
|
||||
def resources_used_by(self, conn, note_id):
|
||||
if note_id is not None:
|
||||
for (h,) in conn.execute('SELECT hash from notes_db.notes_resources_link WHERE note=?', (note_id,)):
|
||||
yield h
|
||||
|
||||
def set_backup_for(self, field_name, item_id, marked_up_text='', searchable_text=''):
|
||||
path = make_long_path_useable(os.path.join(self.backup_dir, field_name, str(item_id)))
|
||||
if marked_up_text:
|
||||
try:
|
||||
f = open(path, 'wb')
|
||||
except FileNotFoundError:
|
||||
os.makedirs(os.path.dirname(path), exist_ok=True)
|
||||
f = open(path, 'wb')
|
||||
with f:
|
||||
f.write(marked_up_text.encode('utf-8'))
|
||||
f.write(SEP)
|
||||
f.write(searchable_text.encode('utf-8'))
|
||||
else:
|
||||
if os.path.exists(path):
|
||||
dest = make_long_path_useable(os.path.join(self.retired_dir, f'{item_id}_{field_name}'))
|
||||
os.replace(path, dest)
|
||||
self.trim_retire_dir()
|
||||
|
||||
def set_note(self, conn, field_name, item_id, marked_up_text='', hashes_of_used_resources=(), searchable_text=copy_marked_up_text):
|
||||
if searchable_text is copy_marked_up_text:
|
||||
searchable_text = marked_up_text
|
||||
note_id = self.note_id_for(conn, field_name, item_id)
|
||||
old_resources = frozenset(self.resources_used_by(conn, note_id))
|
||||
if not marked_up_text:
|
||||
if note_id is not None:
|
||||
conn.execute('DELETE FROM notes_db.notes WHERE id=?', (note_id,))
|
||||
self.set_backup_for(field_name, item_id)
|
||||
if old_resources:
|
||||
self.remove_resources(conn, note_id, old_resources, delete_from_link_table=False)
|
||||
return
|
||||
new_resources = frozenset(hashes_of_used_resources)
|
||||
resources_to_potentially_remove = old_resources - new_resources
|
||||
resources_to_add = new_resources - old_resources
|
||||
inserted_id, = next(conn.execute('''
|
||||
INSERT OR REPLACE INTO notes_db.notes (item,colname,doc,searchable_text) VALUES (?,?,?,?) RETURNING id;
|
||||
''', (item_id, field_name, marked_up_text, searchable_text)))
|
||||
if resources_to_potentially_remove:
|
||||
self.remove_resources(conn, inserted_id, resources_to_potentially_remove)
|
||||
if resources_to_add:
|
||||
conn.executemany('''
|
||||
INSERT INTO notes_db.notes_resources_link (note,hash) VALUES (?,?);
|
||||
''', tuple((inserted_id, x) for x in resources_to_add))
|
||||
self.set_backup_for(field_name, item_id, marked_up_text, searchable_text)
|
||||
return note_id
|
||||
|
||||
def get_note(self, conn, field_name, item_id):
|
||||
for (doc,) in conn.execute('SELECT doc FROM notes_db.notes WHERE item=? AND colname=?', (item_id, field_name)):
|
||||
return doc
|
||||
|
||||
def get_note_data(self, conn, field_name, item_id):
|
||||
for (note_id, doc, searchable_text) in conn.execute(
|
||||
'SELECT id,doc,searchable_text FROM notes_db.notes WHERE item=? AND colname=?', (item_id, field_name)
|
||||
):
|
||||
return {
|
||||
'id': note_id, 'doc': doc, 'searchable_text': searchable_text,
|
||||
'resource_hashes': frozenset(self.resources_used_by(conn, note_id)),
|
||||
}
|
||||
|
||||
def rename_note(self, conn, field_name, old_item_id, new_item_id):
|
||||
note_id = self.note_id_for(conn, field_name, old_item_id)
|
||||
if note_id is None:
|
||||
return
|
||||
new_note = self.get_note(conn, field_name, new_item_id)
|
||||
if new_note:
|
||||
return
|
||||
old_note = self.get_note_data(conn, field_name, old_item_id)
|
||||
if not old_note or not old_note['doc']:
|
||||
return
|
||||
self.set_note(conn, field_name, new_item_id, old_note['doc'], old_note['resource_hashes'], old_note['searchable_text'])
|
||||
|
||||
def trim_retired_dir(self):
|
||||
mpath_map = {}
|
||||
items = []
|
||||
for d in os.scandir(self.retired_dir):
|
||||
mpath_map[d.path] = d.stat(follow_symlinks=False).st_mtime_ns
|
||||
items.append(d.path)
|
||||
extra = len(items) - self.max_retired_items
|
||||
if extra > 0:
|
||||
items.sort(key=mpath_map.__getitem__)
|
||||
for path in items[:extra]:
|
||||
remove_with_retry(path)
|
||||
|
||||
def add_resource(self, path_or_stream_or_data):
|
||||
if isinstance(path_or_stream_or_data, bytes):
|
||||
data = path_or_stream_or_data
|
||||
elif isinstance(path_or_stream_or_data, str):
|
||||
with open(path_or_stream_or_data, 'rb') as f:
|
||||
data = f.read()
|
||||
else:
|
||||
data = f.read()
|
||||
resource_hash = hash_data(data)
|
||||
path = self.path_for_resource(resource_hash)
|
||||
path = make_long_path_useable(path)
|
||||
exists = False
|
||||
try:
|
||||
s = os.stat(path, follow_symlinks=False)
|
||||
except OSError:
|
||||
pass
|
||||
else:
|
||||
exists = s.st_size == len(data)
|
||||
if exists:
|
||||
return resource_hash
|
||||
|
||||
try:
|
||||
f = open(path, 'wb')
|
||||
except FileNotFoundError:
|
||||
os.makedirs(os.path.dirname(path), exist_ok=True)
|
||||
f = open(path, 'wb')
|
||||
with f:
|
||||
f.write(data)
|
||||
return resource_hash
|
||||
|
||||
def get_resource(self, resource_hash) -> bytes:
|
||||
path = self.path_for_resource(resource_hash)
|
||||
path = make_long_path_useable(path)
|
||||
with suppress(FileNotFoundError), open(path, 'rb') as f:
|
||||
return f.read()
|
||||
return b''
|
||||
|
@ -306,6 +306,3 @@ class FilesystemTest(BaseTest):
|
||||
c(r(match_type='not_startswith', query='IGnored.', action='add'), r(query='ignored.md')),
|
||||
):
|
||||
q(['added.epub non-book.other'.split()], find_books_in_directory('', True, compiled_rules=rules, listdir_impl=lambda x: files))
|
||||
|
||||
def test_notes_operations(self):
|
||||
cache = self.init_cache()
|
||||
|
19
src/calibre/db/tests/notes.py
Normal file
19
src/calibre/db/tests/notes.py
Normal file
@ -0,0 +1,19 @@
|
||||
#!/usr/bin/env python
|
||||
# License: GPLv3 Copyright: 2023, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
|
||||
|
||||
from calibre.db.tests.base import BaseTest
|
||||
|
||||
class NotesTest(BaseTest):
|
||||
|
||||
ae = BaseTest.assertEqual
|
||||
|
||||
def test_notes(self):
|
||||
cache = self.init_cache()
|
||||
authors = sorted(cache.all_field_ids('authors'))
|
||||
self.ae(cache.notes_for('authors', authors[0]), '')
|
||||
doc = 'simple notes for an author'
|
||||
h1 = cache.add_notes_resource(b'resource1')
|
||||
h2 = cache.add_notes_resource(b'resource2')
|
||||
cache.set_notes_for('authors', authors[0], doc, resource_hashes=(h1, h2))
|
||||
self.ae(cache.notes_for('authors', authors[0]), doc)
|
Loading…
x
Reference in New Issue
Block a user