diff --git a/src/calibre/db/backend.py b/src/calibre/db/backend.py index a636d9d503..86ba120e94 100644 --- a/src/calibre/db/backend.py +++ b/src/calibre/db/backend.py @@ -1890,11 +1890,12 @@ class DB: def iter_extra_files(self, book_id, book_path, formats_field, yield_paths=False, pattern=''): known_files = {COVER_FILE_NAME, METADATA_FILE_NAME} - for fmt in formats_field.for_book(book_id, default_value=()): - fname = formats_field.format_fname(book_id, fmt) - fpath = self.format_abspath(book_id, fmt, fname, book_path, do_file_rename=False) - if fpath: - known_files.add(os.path.basename(fpath)) + if '/' not in pattern: + for fmt in formats_field.for_book(book_id, default_value=()): + fname = formats_field.format_fname(book_id, fmt) + fpath = self.format_abspath(book_id, fmt, fname, book_path, do_file_rename=False) + if fpath: + known_files.add(os.path.basename(fpath)) full_book_path = os.path.abspath(os.path.join(self.library_path, book_path)) if pattern: from pathlib import Path @@ -1913,9 +1914,9 @@ class DB: relpath = os.path.relpath(path, full_book_path) relpath = relpath.replace(os.sep, '/') if relpath not in known_files: - mtime = os.path.getmtime(path) + stat = os.stat(path) if yield_paths: - yield relpath, path, mtime + yield relpath, path, stat else: try: src = open(path, 'rb') @@ -1924,7 +1925,7 @@ class DB: time.sleep(1) src = open(path, 'rb') with src: - yield relpath, src, mtime + yield relpath, src, stat def add_extra_file(self, relpath, stream, book_path, replace=True, auto_rename=False): bookdir = os.path.join(self.library_path, book_path) diff --git a/src/calibre/db/backup.py b/src/calibre/db/backup.py index d6747e74b0..2437d7169d 100644 --- a/src/calibre/db/backup.py +++ b/src/calibre/db/backup.py @@ -68,6 +68,7 @@ class MetadataBackup(Thread): if self.stop_running.is_set() or self.db.is_closed: return traceback.print_exc() + try: book_id = self.db.get_a_dirtied_book() if book_id is None: diff --git a/src/calibre/db/cache.py b/src/calibre/db/cache.py index 37802a3387..30a4faaaee 100644 --- a/src/calibre/db/cache.py +++ b/src/calibre/db/cache.py @@ -24,7 +24,8 @@ from time import monotonic, sleep, time from calibre import as_unicode, detect_ncpus, isbytestring from calibre.constants import iswindows, preferred_encoding from calibre.customize.ui import ( - run_plugins_on_import, run_plugins_on_postadd, run_plugins_on_postimport, run_plugins_on_postdelete, + run_plugins_on_import, run_plugins_on_postadd, run_plugins_on_postdelete, + run_plugins_on_postimport, ) from calibre.db import SPOOL_SIZE, _get_next_series_num_for_list from calibre.db.annotations import merge_annotations @@ -154,6 +155,7 @@ class Cache: self.formatter_template_cache = {} self.dirtied_cache = {} self.link_maps_cache = {} + self.extra_files_cache = {} self.vls_for_books_cache = None self.vls_for_books_lib_in_process = None self.vls_cache_lock = Lock() @@ -273,6 +275,13 @@ class Cache: self.vls_for_books_cache = None self.vls_for_books_lib_in_process = None + @write_api + def clear_extra_files_cache(self, book_id=None): + if book_id is None: + self.extra_files_cache = {} + else: + self.extra_files_cache.pop(book_id, None) + @read_api def last_modified(self): return self.backend.last_modified() @@ -291,8 +300,9 @@ class Cache: self.format_metadata_cache.clear() if search_cache: self._clear_search_caches(book_ids) - self.clear_link_map_cache(book_ids) + self._clear_link_map_cache(book_ids) + @write_api def clear_link_map_cache(self, book_ids=None): if book_ids is None: self.link_maps_cache = {} @@ -560,7 +570,6 @@ class Cache: has_more = do_one() except Exception: if self.backend.fts_enabled: - import traceback traceback.print_exc() sleep(self.fts_indexing_sleep_time) @@ -1491,7 +1500,7 @@ class Cache: if update_path and do_path_update: self._update_path(dirtied, mark_as_dirtied=False) self._mark_as_dirty(dirtied) - self.clear_link_map_cache(dirtied) + self._clear_link_map_cache(dirtied) self.event_dispatcher(EventType.metadata_changed, name, dirtied) return dirtied @@ -1507,7 +1516,7 @@ class Cache: self.format_metadata_cache.pop(book_id, None) if mark_as_dirtied: self._mark_as_dirty(book_ids) - self.clear_link_map_cache(book_ids) + self._clear_link_map_cache(book_ids) @read_api def get_a_dirtied_book(self): @@ -1540,7 +1549,6 @@ class Cache: except: # This almost certainly means that the book has been deleted while # the backup operation sat in the queue. - import traceback traceback.print_exc() return mi, sequence @@ -2063,7 +2071,6 @@ class Cache: raw = metadata_to_opf(mi) self.backend.write_backup(path, raw) except Exception: - import traceback traceback.print_exc() self.backend.remove_books(path_map, permanent=permanent) for field in itervalues(self.fields): @@ -2187,7 +2194,7 @@ class Cache: for book_id in moved_books: self._set_field(f.index_field.name, {book_id:self._get_next_series_num_for(self._fast_field_for(f, book_id), field=field)}) self._mark_as_dirty(affected_books) - self.clear_link_map_cache(affected_books) + self._clear_link_map_cache(affected_books) self.event_dispatcher(EventType.items_renamed, field, affected_books, id_map) return affected_books, id_map @@ -2207,7 +2214,7 @@ class Cache: self._set_field(field.index_field.name, {bid:1.0 for bid in affected_books}) else: self._mark_as_dirty(affected_books) - self.clear_link_map_cache(affected_books) + self._clear_link_map_cache(affected_books) self.event_dispatcher(EventType.items_removed, field, affected_books, item_ids) return affected_books @@ -2342,7 +2349,7 @@ class Cache: self._set_field('author_sort', val_map) if changed_books: self._mark_as_dirty(changed_books) - self.clear_link_map_cache(changed_books) + self._clear_link_map_cache(changed_books) return changed_books @write_api @@ -2353,7 +2360,7 @@ class Cache: changed_books |= self._books_for_field('authors', author_id) if changed_books: self._mark_as_dirty(changed_books) - self.clear_link_map_cache(changed_books) + self._clear_link_map_cache(changed_books) return changed_books @read_api @@ -2453,7 +2460,7 @@ class Cache: changed_books |= self._books_for_field(field, id_) if changed_books: self._mark_as_dirty(changed_books) - self.clear_link_map_cache(changed_books) + self._clear_link_map_cache(changed_books) return changed_books @read_api @@ -2577,7 +2584,6 @@ class Cache: if progress is not None: progress(item_name, item_count, total) except Exception: - import traceback traceback.print_exc() all_paths = {self._field_for('path', book_id).partition('/')[0] for book_id in self._all_book_ids()} @@ -2666,7 +2672,6 @@ class Cache: try: plugin.run(self) except Exception: - import traceback traceback.print_exc() self._shutdown_fts(stage=2) with self.write_lock: @@ -2966,9 +2971,9 @@ class Cache: bp = self.field_for('path', book_id) extra_files[book_id] = ef = {} if bp: - for (relpath, fobj, mtime) in self.backend.iter_extra_files(book_id, bp, self.fields['formats']): + for (relpath, fobj, stat_result) in self.backend.iter_extra_files(book_id, bp, self.fields['formats']): key = f'{key_prefix}:{book_id}:.|{relpath}' - with exporter.start_file(key, mtime=mtime) as dest: + with exporter.start_file(key, mtime=stat_result.st_mtime) as dest: shutil.copyfileobj(fobj, dest) ef[relpath] = key exporter.set_metadata(library_key, metadata) @@ -3070,6 +3075,7 @@ class Cache: added = {} for relpath, stream_or_path in map_of_relpath_to_stream_or_path.items(): added[relpath] = bool(self.backend.add_extra_file(relpath, stream_or_path, path, replace, auto_rename)) + self._clear_extra_files_cache(book_id) return added @write_api @@ -3083,21 +3089,36 @@ class Cache: book_path = self._field_for('path', src_id) if book_path: book_path = book_path.replace('/', os.sep) - for (relpath, file_path, mtime) in self.backend.iter_extra_files( + for (relpath, file_path, stat_result) in self.backend.iter_extra_files( src_id, book_path, self.fields['formats'], yield_paths=True): added.add(self.backend.add_extra_file(relpath, file_path, path, replace=replace, auto_rename=True)) + self._clear_extra_files_cache(dest_id) return added @read_api - def list_extra_files_matching(self, book_id, pattern=''): - ' List extra data files matching the specified pattern. Empty pattern matches all. Recursive globbing with ** is supported. ' - path = self._field_for('path', book_id) - ans = {} - if path: - book_path = path.replace('/', os.sep) - for (relpath, file_path, mtime) in self.backend.iter_extra_files( - book_id, book_path, self.fields['formats'], yield_paths=True, pattern=pattern): - ans[relpath] = file_path + def list_extra_files(self, book_id, use_cache=False, pattern=''): + ''' + Get information about extra files in the book's directory. + + :param book_id: the database book id for the book + :param pattern: the pattern of filenames to search for. Empty pattern matches all extra files. Patterns must use / as separator. + Use the DATA_FILE_PATTERN constant to match files inside the data directory. + + :return: A tuple of all extra files matching the specified pattern. Each element of the tuple is (relpath, file_path, stat_result) + where relpath is the relative path of the file to the book directory using / as a separator. + stat_result is the result of calling os.stat() on the file. + ''' + key = book_id, pattern + ans = self.extra_files_cache.get(key) + if ans is None or not use_cache: + ans = [] + path = self._field_for('path', book_id) + if path: + for (relpath, file_path, stat_result) in self.backend.iter_extra_files( + book_id, path, self.fields['formats'], yield_paths=True, pattern=pattern + ): + ans.append((relpath, file_path, stat_result)) + self.extra_files_cache[key] = ans = tuple(ans) return ans @read_api diff --git a/src/calibre/db/cli/cmd_export.py b/src/calibre/db/cli/cmd_export.py index 20032dd5e4..66e463d290 100644 --- a/src/calibre/db/cli/cmd_export.py +++ b/src/calibre/db/cli/cmd_export.py @@ -6,6 +6,7 @@ import os from calibre.db.cli import integers_from_string from calibre.db.errors import NoSuchFormat +from calibre.db.constants import DATA_FILE_PATTERN from calibre.library.save_to_disk import ( config, do_save_book_to_disk, get_formats, sanitize_args ) @@ -26,7 +27,7 @@ def implementation(db, notify_changes, action, *args): mi = db.get_metadata(book_id) plugboards = db.pref('plugboards', {}) formats = get_formats(db.formats(book_id), formats) - extra_files_for_export = tuple(db.list_extra_files_matching(book_id, 'data/**/*')) + extra_files_for_export = tuple(relpath for (relpath, file_path, stat_result) in db.list_extra_files(book_id, pattern=DATA_FILE_PATTERN)) plugboards['extra_files_for_export'] = extra_files_for_export return mi, plugboards, formats, db.library_id, db.pref( 'user_template_functions', [] diff --git a/src/calibre/db/constants.py b/src/calibre/db/constants.py index 821027b61b..c443a52b7d 100644 --- a/src/calibre/db/constants.py +++ b/src/calibre/db/constants.py @@ -9,6 +9,7 @@ METADATA_FILE_NAME = 'metadata.opf' DEFAULT_TRASH_EXPIRY_TIME_SECONDS = 14 * 86400 TRASH_DIR_NAME = '.caltrash' DATA_DIR_NAME = 'data' +DATA_FILE_PATTERN = f'{DATA_DIR_NAME}/**/*' BOOK_ID_PATH_TEMPLATE = ' ({})' diff --git a/src/calibre/db/copy_to_library.py b/src/calibre/db/copy_to_library.py index e0f7395185..98dbe81d83 100644 --- a/src/calibre/db/copy_to_library.py +++ b/src/calibre/db/copy_to_library.py @@ -79,7 +79,9 @@ def copy_one_book( mi.timestamp = now() format_map = {} fmts = list(db.formats(book_id, verify_formats=False)) - extra_file_map = db.list_extra_files_matching(book_id) + extra_file_map = {} + for (relpath, file_path, stat_result) in db.list_extra_files(book_id): + extra_file_map[relpath] = file_path for fmt in fmts: path = db.format_abspath(book_id, fmt) if path: @@ -112,7 +114,7 @@ def copy_one_book( preserve_uuid=preserve_uuid, run_hooks=False)[0][0] bp = db.field_for('path', book_id) if bp: - for (relpath, src_path, mtime) in db.backend.iter_extra_files(book_id, bp, db.fields['formats'], yield_paths=True): + for (relpath, src_path, stat_result) in db.backend.iter_extra_files(book_id, bp, db.fields['formats'], yield_paths=True): nbp = newdb.field_for('path', new_book_id) if nbp: newdb.backend.add_extra_file(relpath, src_path, nbp) diff --git a/src/calibre/db/tests/add_remove.py b/src/calibre/db/tests/add_remove.py index ed0325e6f0..636ad8f330 100644 --- a/src/calibre/db/tests/add_remove.py +++ b/src/calibre/db/tests/add_remove.py @@ -410,7 +410,7 @@ class AddRemoveTest(BaseTest): self.assertFalse(os.path.exists(os.path.join(bookdir, 'sub', 'recurse'))) def clear_extra_files(book_id): - for file_path in dest_db.list_extra_files_matching(book_id).values(): + for (relpath, file_path, stat_result) in dest_db.list_extra_files(book_id): os.remove(file_path) assert_does_not_have_extra_files(1) @@ -468,7 +468,7 @@ class AddRemoveTest(BaseTest): def extra_files_for(book_id): ans = {} - for relpath, file_path in db.list_extra_files_matching(book_id).items(): + for relpath, file_path, stat_result in db.list_extra_files(book_id): with open(file_path) as f: ans[relpath] = f.read() return ans diff --git a/src/calibre/db/tests/filesystem.py b/src/calibre/db/tests/filesystem.py index db55ccac8d..83bf8917cc 100644 --- a/src/calibre/db/tests/filesystem.py +++ b/src/calibre/db/tests/filesystem.py @@ -103,6 +103,11 @@ class FilesystemTest(BaseTest): # test only formats being changed init_cache() + ef = set() + for (relpath, file_path, stat_result) in cache.list_extra_files(1): + ef.add(relpath) + self.assertTrue(os.path.exists(file_path)) + self.assertEqual(ef, {'a.side', 'subdir/a.fmt1'}) fname = cache.fields['formats'].table.fname_map[1]['FMT1'] cache.fields['formats'].table.fname_map[1]['FMT1'] = 'some thing else' cache.fields['formats'].table.fname_map[1]['FMT2'] = fname.upper() @@ -224,8 +229,8 @@ class FilesystemTest(BaseTest): os.mkdir(os.path.join(bookdir, 'sub')) with open(os.path.join(bookdir, 'sub', 'recurse'), 'w') as f: f.write('recurse') - self.assertEqual(set(cache.list_extra_files_matching(1, 'sub/**/*')), {'sub/recurse'}) - self.assertEqual(set(cache.list_extra_files_matching(1, '')), {'exf', 'sub/recurse'}) + self.assertEqual({relpath for (relpath, _, _) in cache.list_extra_files(1, pattern='sub/**/*')}, {'sub/recurse'}) + self.assertEqual({relpath for (relpath, _, _) in cache.list_extra_files(1)}, {'exf', 'sub/recurse'}) for part_size in (1 << 30, 100, 1): with TemporaryDirectory('export_lib') as tdir, TemporaryDirectory('import_lib') as idir: exporter = Exporter(tdir, part_size=part_size) diff --git a/src/calibre/db/tests/writing.py b/src/calibre/db/tests/writing.py index ccffb79227..83f839534f 100644 --- a/src/calibre/db/tests/writing.py +++ b/src/calibre/db/tests/writing.py @@ -381,7 +381,7 @@ class WritingTest(BaseTest): def read_all_extra_files(book_id=1): ans = {} bp = cache.field_for('path', book_id) - for (relpath, fobj, mtime) in cache.backend.iter_extra_files(book_id, bp, cache.fields['formats']): + for (relpath, fobj, stat_result) in cache.backend.iter_extra_files(book_id, bp, cache.fields['formats']): ans[relpath] = fobj.read() return ans diff --git a/src/calibre/gui2/actions/choose_library.py b/src/calibre/gui2/actions/choose_library.py index 46f5025eb3..9d9cc6ed5a 100644 --- a/src/calibre/gui2/actions/choose_library.py +++ b/src/calibre/gui2/actions/choose_library.py @@ -329,6 +329,10 @@ class ChooseLibraryAction(InterfaceAction): None, None), attr='action_restore_database') ac.triggered.connect(self.restore_database, type=Qt.ConnectionType.QueuedConnection) + ac = self.create_action(spec=(_('Clear extra files cache'), 'lt.png', + None, None), + attr='action_clear_extra_files_cache') + ac.triggered.connect(self.clear_extra_files_cache, type=Qt.ConnectionType.QueuedConnection) self.maintenance_menu.addAction(ac) self.choose_menu.addMenu(self.maintenance_menu) @@ -649,6 +653,10 @@ class ChooseLibraryAction(InterfaceAction): if restore_database(db, self.gui): self.gui.library_moved(db.library_path) + def clear_extra_files_cache(self): + db = self.gui.library_view.model().db + db.new_api.clear_extra_files_cache() + def check_library(self): from calibre.gui2.dialogs.check_library import CheckLibraryDialog, DBCheck self.gui.library_view.save_state() diff --git a/src/calibre/gui2/actions/view.py b/src/calibre/gui2/actions/view.py index 2c74390189..b1986854b9 100644 --- a/src/calibre/gui2/actions/view.py +++ b/src/calibre/gui2/actions/view.py @@ -273,7 +273,9 @@ class ViewAction(InterfaceAction): if not self._view_check(len(rows), max_=10, skip_dialog_name='open-folder-many-check'): return for i, row in enumerate(rows): - path = self.gui.library_view.model().db.abspath(row.row()) + db = self.gui.library_view.model().db + db.new_api.clear_extra_files_cache(self.gui.library_view.model().id(row)) + path = db.abspath(row.row()) open_local_file(path) if ismacos and i < len(rows) - 1: time.sleep(0.1) # Finder cannot handle multiple folder opens @@ -283,7 +285,9 @@ class ViewAction(InterfaceAction): open_local_file(path) def view_data_folder_for_id(self, id_): - path = self.gui.library_view.model().db.abspath(id_, index_is_id=True) + db = self.gui.library_view.model().db + db.new_api.clear_extra_files_cache(id_) + path = db.abspath(id_, index_is_id=True) open_local_file(os.path.join(path, DATA_DIR_NAME)) def view_book(self, triggered): diff --git a/src/calibre/gui2/save.py b/src/calibre/gui2/save.py index 9976a42a5e..83c512f764 100644 --- a/src/calibre/gui2/save.py +++ b/src/calibre/gui2/save.py @@ -16,6 +16,7 @@ from calibre import force_unicode, prints from calibre.constants import DEBUG from calibre.customize.ui import can_set_metadata from calibre.db.errors import NoSuchFormat +from calibre.db.constants import DATA_FILE_PATTERN from calibre.ebooks.metadata import authors_to_string from calibre.ebooks.metadata.opf2 import metadata_to_opf from calibre.gui2 import error_dialog, gprefs, open_local_file, warning_dialog @@ -213,7 +214,9 @@ class Saver(QObject): extra_files = {} if self.opts.save_extra_files: - extra_files = self.db.new_api.list_extra_files_matching(int(book_id), 'data/**/*') + extra_files = {} + for (relpath, file_path, stat_result) in self.db.new_api.list_extra_files(int(book_id), pattern=DATA_FILE_PATTERN): + extra_files[relpath] = file_path if not fmts and not self.opts.write_opf and not self.opts.save_cover and not extra_files: return