From 7eefd9697049f05336e6d70402a97814953de030 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 6 Aug 2023 17:50:03 +0530 Subject: [PATCH] Start work on per category item notes --- resources/notes_sqlite.sql | 40 ++++++++++++++++++++++++++ src/calibre/db/backend.py | 33 +++++++++++++++------ src/calibre/db/constants.py | 1 + src/calibre/db/notes/connect.py | 38 ++++++++++++++++++++++++ src/calibre/db/notes/schema_upgrade.py | 38 ++++++++++++++++++++++++ src/calibre/db/tables.py | 35 ++++++++++------------ src/calibre/db/tests/filesystem.py | 3 ++ src/calibre/db/tests/legacy.py | 2 ++ src/calibre/library/check_library.py | 6 ++-- 9 files changed, 166 insertions(+), 30 deletions(-) create mode 100644 resources/notes_sqlite.sql create mode 100644 src/calibre/db/notes/connect.py create mode 100644 src/calibre/db/notes/schema_upgrade.py diff --git a/resources/notes_sqlite.sql b/resources/notes_sqlite.sql new file mode 100644 index 0000000000..bc517b3e9e --- /dev/null +++ b/resources/notes_sqlite.sql @@ -0,0 +1,40 @@ +CREATE TABLE notes_db.notes ( id INTEGER PRIMARY KEY, + item INTEGER NOT NULL, + colname TEXT NOT NULL COLLATE NOCASE, + doc TEXT NOT NULL DEFAULT '', + searchable_text TEXT NOT NULL DEFAULT '', + UNIQUE(item, colname) +); + +CREATE TABLE notes_db.notes_resources_link ( id INTEGER PRIMARY KEY, + note INTEGER NOT NULL, + hash TEXT NOT NULL, + UNIQUE(note, hash) +); + +CREATE VIRTUAL TABLE notes_db.notes_fts USING fts5(searchable_text, content = 'notes', content_rowid = 'id', tokenize = 'calibre remove_diacritics 2'); +CREATE VIRTUAL TABLE notes_db.notes_fts_stemmed USING fts5(searchable_text, content = 'notes', content_rowid = 'id', tokenize = 'porter calibre remove_diacritics 2'); + +CREATE TRIGGER notes_db.notes_fts_insert_trg AFTER INSERT ON notes_db.notes +BEGIN + INSERT INTO notes_fts(rowid, searchable_text) VALUES (NEW.id, NEW.searchable_text); + INSERT INTO notes_fts_stemmed(rowid, searchable_text) VALUES (NEW.id, NEW.searchable_text); +END; + +CREATE TRIGGER notes_db.notes_db_notes_delete_trg AFTER DELETE ON notes_db.notes + BEGIN + DELETE FROM notes_resources_link WHERE note=OLD.id; + INSERT INTO notes_fts(notes_fts, rowid, searchable_text) VALUES('delete', OLD.id, OLD.searchable_text); + INSERT INTO notes_fts_stemmed(notes_fts_stemmed, rowid, searchable_text) VALUES('delete', OLD.id, OLD.searchable_text); + END; + +CREATE TRIGGER notes_db.notes_fts_update_trg AFTER UPDATE ON notes_db.notes +BEGIN + INSERT INTO notes_fts(notes_fts, rowid, searchable_text) VALUES('delete', OLD.id, OLD.searchable_text); + INSERT INTO notes_fts(rowid, searchable_text) VALUES (NEW.id, NEW.searchable_text); + INSERT INTO notes_fts_stemmed(notes_fts_stemmed, rowid, searchable_text) VALUES('delete', OLD.id, OLD.searchable_text); + INSERT INTO notes_fts_stemmed(rowid, searchable_text) VALUES (NEW.id, NEW.searchable_text); +END; + + +PRAGMA notes_db.user_version=1; diff --git a/src/calibre/db/backend.py b/src/calibre/db/backend.py index 35d08d7062..c8323ceb66 100644 --- a/src/calibre/db/backend.py +++ b/src/calibre/db/backend.py @@ -27,7 +27,7 @@ from calibre.db import SPOOL_SIZE, FTSQueryError from calibre.db.annotations import annot_db_data, unicode_normalize from calibre.db.constants import ( BOOK_ID_PATH_TEMPLATE, COVER_FILE_NAME, DEFAULT_TRASH_EXPIRY_TIME_SECONDS, - METADATA_FILE_NAME, TRASH_DIR_NAME, TrashEntry, + METADATA_FILE_NAME, NOTES_DIR_NAME, TRASH_DIR_NAME, TrashEntry, ) from calibre.db.errors import NoSuchFormat from calibre.db.schema_upgrades import SchemaUpgrade @@ -41,14 +41,13 @@ from calibre.ptempfile import PersistentTemporaryFile, TemporaryFile from calibre.utils import pickle_binary_string, unpickle_binary_string from calibre.utils.config import from_json, prefs, to_json, tweaks from calibre.utils.copy_files import ( - copy_files, copy_tree, rename_files, - windows_check_if_files_in_use, + copy_files, copy_tree, rename_files, windows_check_if_files_in_use, ) from calibre.utils.date import EPOCH, parse_date, utcfromtimestamp, utcnow from calibre.utils.filenames import ( ascii_filename, atomic_rename, copyfile_using_links, copytree_using_links, - hardlink_file, is_case_sensitive, is_fat_filesystem, make_long_path_useable, - remove_dir_if_empty, samefile, get_long_path_name + get_long_path_name, hardlink_file, is_case_sensitive, is_fat_filesystem, + make_long_path_useable, remove_dir_if_empty, samefile, ) from calibre.utils.formatter_functions import ( compile_user_template_functions, formatter_functions, load_user_template_functions, @@ -337,7 +336,7 @@ class Connection(apsw.Connection): # {{{ set_ui_language(get_lang()) super().__init__(path) plugins.load_apsw_extension(self, 'sqlite_extension') - self.fts_dbpath = None + self.fts_dbpath = self.notes_dbpath = None self.setbusytimeout(self.BUSY_TIMEOUT) self.execute('pragma cache_size=-5000') @@ -509,6 +508,7 @@ class DB: self.ensure_trash_dir(during_init=True) if load_user_formatter_functions: set_global_state(self) + self.initialize_notes() @property def last_expired_trash_at(self) -> float: @@ -945,6 +945,23 @@ class DB: # }}} + def initialize_notes(self): + from .notes.connect import Notes + self.notes = Notes(self) + + def delete_category_items(self, field_name, table_name, items, link_table_name='', link_col_name=''): + bindings = tuple((x,) for x in items) + if link_table_name and link_col_name: + self.executemany(f'DELETE FROM {link_table_name} WHERE {link_col_name}=?', bindings) + self.executemany(f'DELETE FROM {table_name} WHERE id=?', bindings) + + def rename_category_item(self, field_name, table_name, link_table_name, link_col_name, old_item_id, new_item_id): + # For custom series this means that the series index can + # potentially have duplicates/be incorrect, but there is no way to + # handle that in this context. + self.execute(f'UPDATE {link_table_name} SET {link_col_name}=? WHERE {link_col_name}=?; DELETE FROM {table_name} WHERE id=?', + (new_item_id, old_item_id, old_item_id)) + def initialize_fts(self, dbref): self.fts = None if not self.prefs['fts_enabled']: @@ -2022,7 +2039,7 @@ class DB: os.makedirs(os.path.join(tdir, 'f'), exist_ok=True) if iswindows: import calibre_extensions.winutil as winutil - winutil.set_file_attributes(tdir, getattr(winutil, 'FILE_ATTRIBUTE_HIDDEN', 2) | getattr(winutil, 'FILE_ATTRIBUTE_NOT_CONTENT_INDEXED', 8192)) + winutil.set_file_attributes(tdir, winutil.FILE_ATTRIBUTE_HIDDEN | winutil.FILE_ATTRIBUTE_NOT_CONTENT_INDEXED) if time.time() - self.last_expired_trash_at >= 3600: self.expire_old_trash(during_init=during_init) @@ -2454,7 +2471,7 @@ class DB: def get_top_level_move_items(self, all_paths): items = set(os.listdir(self.library_path)) paths = set(all_paths) - paths.update({'metadata.db', 'full-text-search.db', 'metadata_db_prefs_backup.json'}) + paths.update({'metadata.db', 'full-text-search.db', 'metadata_db_prefs_backup.json', NOTES_DIR_NAME}) path_map = {x:x for x in paths} if not self.is_case_sensitive: for x in items: diff --git a/src/calibre/db/constants.py b/src/calibre/db/constants.py index c443a52b7d..07cda782a8 100644 --- a/src/calibre/db/constants.py +++ b/src/calibre/db/constants.py @@ -8,6 +8,7 @@ COVER_FILE_NAME = 'cover.jpg' METADATA_FILE_NAME = 'metadata.opf' DEFAULT_TRASH_EXPIRY_TIME_SECONDS = 14 * 86400 TRASH_DIR_NAME = '.caltrash' +NOTES_DIR_NAME = '.notes' DATA_DIR_NAME = 'data' DATA_FILE_PATTERN = f'{DATA_DIR_NAME}/**/*' BOOK_ID_PATH_TEMPLATE = ' ({})' diff --git a/src/calibre/db/notes/connect.py b/src/calibre/db/notes/connect.py new file mode 100644 index 0000000000..99bb33c4aa --- /dev/null +++ b/src/calibre/db/notes/connect.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python +# License: GPLv3 Copyright: 2023, Kovid Goyal + +import os + +from calibre.constants import iswindows + +from ..constants import NOTES_DIR_NAME +from .schema_upgrade import SchemaUpgrade + + +class Notes: + + def __init__(self, backend): + conn = backend.get_connection() + libdir = os.path.dirname(os.path.abspath(conn.db_filename('main'))) + notes_dir = os.path.join(libdir, NOTES_DIR_NAME) + if not os.path.exists(notes_dir): + os.makedirs(notes_dir, exist_ok=True) + if iswindows: + import calibre_extensions.winutil as winutil + winutil.set_file_attributes(notes_dir, winutil.FILE_ATTRIBUTE_HIDDEN | winutil.FILE_ATTRIBUTE_NOT_CONTENT_INDEXED) + dbpath = os.path.join(notes_dir, 'notes.db') + conn.execute("ATTACH DATABASE ? AS notes_db", (dbpath,)) + self.allowed_fields = set() + triggers = [] + for table in backend.tables.values(): + m = table.metadata + if not table.supports_notes or m.get('datatype') == 'rating': + continue + self.allowed_fields.add(table.name) + triggers.append( + f'CREATE TEMP TRIGGER IF NOT EXISTS notes_db_{table.name.replace("#", "_")}_deleted_trigger AFTER DELETE ON main.{m["table"]} BEGIN\n' + f" DELETE FROM notes WHERE colname = '{table.name}' AND item = OLD.id;\n" + 'END;' + ) + SchemaUpgrade(conn, '\n'.join(triggers)) + conn.notes_dbpath = dbpath diff --git a/src/calibre/db/notes/schema_upgrade.py b/src/calibre/db/notes/schema_upgrade.py new file mode 100644 index 0000000000..cd19854b06 --- /dev/null +++ b/src/calibre/db/notes/schema_upgrade.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python +# License: GPLv3 Copyright: 2023, Kovid Goyal + +from calibre.utils.resources import get_path as P + + +class SchemaUpgrade: + + def __init__(self, conn, triggers_sql): + self.conn = conn + conn.execute('BEGIN EXCLUSIVE TRANSACTION') + try: + if self.user_version == 0: + notes_sqlite = P('notes_sqlite.sql', data=True, allow_user_override=False).decode('utf-8') + conn.execute(notes_sqlite) + while True: + uv = self.user_version + meth = getattr(self, f'upgrade_version_{uv}', None) + if meth is None: + break + print(f'Upgrading Notes database to version {uv+1}...') + meth() + self.user_version = uv + 1 + conn.execute(triggers_sql) + except (Exception, BaseException): + conn.execute('ROLLBACK') + raise + else: + conn.execute('COMMIT') + self.conn = None + + @property + def user_version(self): + return self.conn.get('PRAGMA notes_db.user_version', all=False) or 0 + + @user_version.setter + def user_version(self, val): + self.conn.execute(f'PRAGMA notes_db.user_version={val}') diff --git a/src/calibre/db/tables.py b/src/calibre/db/tables.py index 47e7c66f20..e0bd6840cb 100644 --- a/src/calibre/db/tables.py +++ b/src/calibre/db/tables.py @@ -52,6 +52,8 @@ null = object() class Table: + supports_notes = False + def __init__(self, name, metadata, link_table=None): self.name, self.metadata = name, metadata self.sort_alpha = metadata.get('is_multiple', False) and metadata.get('display', {}).get('sort_alpha', False) @@ -199,6 +201,7 @@ class ManyToOneTable(Table): ''' table_type = MANY_ONE + supports_notes = True def read(self, db): self.id_map = {} @@ -253,8 +256,7 @@ class ManyToOneTable(Table): db.executemany('UPDATE {0} SET {1}=? WHERE {1}=?'.format( self.link_table, self.metadata['link_column']), tuple((main_id, x) for x in v)) - db.executemany('DELETE FROM {} WHERE id=?'.format(self.metadata['table']), - tuple((x,) for x in v)) + db.delete_category_items(self.name, self.metadata['table'], v) def remove_books(self, book_ids, db): clean = set() @@ -316,9 +318,7 @@ class ManyToOneTable(Table): for book_id in book_ids: self.book_col_map.pop(book_id, None) affected_books.update(book_ids) - item_ids = tuple((x,) for x in item_ids) - db.executemany('DELETE FROM {} WHERE {}=?'.format(self.link_table, self.metadata['link_column']), item_ids) - db.executemany('DELETE FROM {} WHERE id=?'.format(self.metadata['table']), item_ids) + db.delete_category_items(self.name, self.metadata['table'], item_ids, self.link_table, self.metadata['link_column']) return affected_books def rename_item(self, item_id, new_name, db): @@ -339,11 +339,7 @@ class ManyToOneTable(Table): for book_id in books: self.book_col_map[book_id] = existing_item self.col_book_map[existing_item].update(books) - # For custom series this means that the series index can - # potentially have duplicates/be incorrect, but there is no way to - # handle that in this context. - db.execute('UPDATE {0} SET {1}=? WHERE {1}=?; DELETE FROM {2} WHERE id=?'.format( - self.link_table, lcol, table), (existing_item, item_id, item_id)) + db.rename_category_item(self.name, table, self.link_table, lcol, item_id, existing_item) return affected_books, new_id def set_links(self, link_map, db): @@ -358,6 +354,8 @@ class ManyToOneTable(Table): class RatingTable(ManyToOneTable): + supports_notes = False + def read_id_maps(self, db): ManyToOneTable.read_id_maps(self, db) # Ensure there are no records with rating=0 in the table. These should @@ -420,9 +418,7 @@ class ManyToManyTable(ManyToOneTable): if self.id_map.pop(item_id, null) is not null: clean.add(item_id) if clean and self.do_clean_on_remove: - db.executemany( - 'DELETE FROM {} WHERE id=?'.format(self.metadata['table']), - [(x,) for x in clean]) + db.delete_category_items(self.name, self.metadata['table'], clean) return clean def remove_items(self, item_ids, db, restrict_to_book_ids=None): @@ -464,9 +460,7 @@ class ManyToManyTable(ManyToOneTable): for book_id in book_ids: self.book_col_map[book_id] = tuple(x for x in self.book_col_map.get(book_id, ()) if x != item_id) affected_books.update(book_ids) - item_ids = tuple((x,) for x in item_ids) - db.executemany('DELETE FROM {} WHERE {}=?'.format(self.link_table, self.metadata['link_column']), item_ids) - db.executemany('DELETE FROM {} WHERE id=?'.format(self.metadata['table']), item_ids) + db.delete_category_items(self.name, self.metadata['table'], item_ids, self.link_table, self.metadata['link_column']) return affected_books def rename_item(self, item_id, new_name, db): @@ -492,8 +486,7 @@ class ManyToManyTable(ManyToOneTable): self.col_book_map[existing_item].update(books) db.executemany(f'DELETE FROM {self.link_table} WHERE book=? AND {lcol}=?', [ (book_id, existing_item) for book_id in books]) - db.execute('UPDATE {0} SET {1}=? WHERE {1}=?; DELETE FROM {2} WHERE id=?'.format( - self.link_table, lcol, table), (existing_item, item_id, item_id)) + db.rename_category_item(self.name, table, self.link_table, lcol, item_id, existing_item) return affected_books, new_id def fix_case_duplicates(self, db): @@ -531,8 +524,7 @@ class ManyToManyTable(ManyToOneTable): db.executemany( 'INSERT INTO {} (book,{}) VALUES (?,?)'.format(self.link_table, self.metadata['link_column']), tuple((book_id, x) for x in vals)) - db.executemany('DELETE FROM {} WHERE id=?'.format(self.metadata['table']), - tuple((x,) for x in v)) + db.delete_category_items(self.name, self.metadata['table'], v) class AuthorsTable(ManyToManyTable): @@ -590,6 +582,7 @@ class AuthorsTable(ManyToManyTable): class FormatsTable(ManyToManyTable): do_clean_on_remove = False + supports_notes = False def read_id_maps(self, db): pass @@ -675,6 +668,8 @@ class FormatsTable(ManyToManyTable): class IdentifiersTable(ManyToManyTable): + supports_notes = False + def read_id_maps(self, db): pass diff --git a/src/calibre/db/tests/filesystem.py b/src/calibre/db/tests/filesystem.py index 9e2bce4a6a..a07a635e58 100644 --- a/src/calibre/db/tests/filesystem.py +++ b/src/calibre/db/tests/filesystem.py @@ -306,3 +306,6 @@ class FilesystemTest(BaseTest): c(r(match_type='not_startswith', query='IGnored.', action='add'), r(query='ignored.md')), ): q(['added.epub non-book.other'.split()], find_books_in_directory('', True, compiled_rules=rules, listdir_impl=lambda x: files)) + + def test_notes_operations(self): + cache = self.init_cache() diff --git a/src/calibre/db/tests/legacy.py b/src/calibre/db/tests/legacy.py index 1a7b3b9e81..8526003079 100644 --- a/src/calibre/db/tests/legacy.py +++ b/src/calibre/db/tests/legacy.py @@ -274,6 +274,8 @@ class LegacyTest(BaseTest): def f(x, y): # get_top_level_move_items is broken in the old db on case-insensitive file systems x.discard('metadata_db_prefs_backup.json') y.pop('full-text-search.db', None) + x.discard('.notes') + y.pop('.notes', None) return x, y self.assertEqual(f(*db.get_top_level_move_items()), f(*ndb.get_top_level_move_items())) d1, d2 = BytesIO(), BytesIO() diff --git a/src/calibre/library/check_library.py b/src/calibre/library/check_library.py index a7262d3ae6..cc94864249 100644 --- a/src/calibre/library/check_library.py +++ b/src/calibre/library/check_library.py @@ -13,7 +13,7 @@ import traceback from calibre import isbytestring from calibre.constants import filesystem_encoding from calibre.db.constants import ( - COVER_FILE_NAME, DATA_DIR_NAME, METADATA_FILE_NAME, TRASH_DIR_NAME, + COVER_FILE_NAME, DATA_DIR_NAME, METADATA_FILE_NAME, TRASH_DIR_NAME, NOTES_DIR_NAME, ) from calibre.ebooks import BOOK_EXTENSIONS from calibre.utils.localization import _ @@ -21,7 +21,9 @@ from polyglot.builtins import iteritems EBOOK_EXTENSIONS = frozenset(BOOK_EXTENSIONS) NORMALS = frozenset({METADATA_FILE_NAME, COVER_FILE_NAME, DATA_DIR_NAME}) -IGNORE_AT_TOP_LEVEL = frozenset({'metadata.db', 'metadata_db_prefs_backup.json', 'metadata_pre_restore.db', 'full-text-search.db', TRASH_DIR_NAME}) +IGNORE_AT_TOP_LEVEL = frozenset({ + 'metadata.db', 'metadata_db_prefs_backup.json', 'metadata_pre_restore.db', 'full-text-search.db', TRASH_DIR_NAME, NOTES_DIR_NAME +}) ''' Checks fields: