From 36780c1b59cfb884e7498d44a5396170994feb0c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 10 Dec 2015 14:12:25 +0530 Subject: [PATCH] Speed up library move by using hardlinks instead of file copies when moving to a location on the same filesystem --- src/calibre/db/backend.py | 25 +++++++++++++------------ src/calibre/db/cache.py | 14 ++++++++++---- src/calibre/db/legacy.py | 1 - src/calibre/db/tests/legacy.py | 2 +- 4 files changed, 24 insertions(+), 18 deletions(-) diff --git a/src/calibre/db/backend.py b/src/calibre/db/backend.py index 924bbafe52..28be8c18a7 100644 --- a/src/calibre/db/backend.py +++ b/src/calibre/db/backend.py @@ -28,7 +28,8 @@ from calibre.utils.config import to_json, from_json, prefs, tweaks from calibre.utils.date import utcfromtimestamp, parse_date from calibre.utils.filenames import ( is_case_sensitive, samefile, hardlink_file, ascii_filename, - WindowsAtomicFolderMove, atomic_rename, remove_dir_if_empty) + WindowsAtomicFolderMove, atomic_rename, remove_dir_if_empty, + copytree_using_links, copyfile_using_links) from calibre.utils.magick.draw import save_cover_data_to from calibre.utils.formatter_functions import load_user_template_functions from calibre.db.tables import (OneToOneTable, ManyToOneTable, ManyToManyTable, @@ -1666,41 +1667,39 @@ class DB(object): items = items.intersection(paths) return items, path_map - def move_library_to(self, all_paths, newloc, progress=lambda x: x): + def move_library_to(self, all_paths, newloc, progress=(lambda item_name, item_count, total: None), abort=None): if not os.path.exists(newloc): os.makedirs(newloc) old_dirs, old_files = set(), set() items, path_map = self.get_top_level_move_items(all_paths) - for x in items: + total = len(items) + 1 + for i, x in enumerate(items): + if abort is not None and abort.is_set(): + return src = os.path.join(self.library_path, x) dest = os.path.join(newloc, path_map[x]) if os.path.isdir(src): if os.path.exists(dest): shutil.rmtree(dest) - shutil.copytree(src, dest) + copytree_using_links(src, dest, dest_is_parent=False) old_dirs.add(src) else: if os.path.exists(dest): os.remove(dest) - shutil.copyfile(src, dest) + copyfile_using_links(src, dest, dest_is_dir=False) old_files.add(src) x = path_map[x] if not isinstance(x, unicode): x = x.decode(filesystem_encoding, 'replace') - progress(x) + progress(x, i+1, total) dbpath = os.path.join(newloc, os.path.basename(self.dbpath)) - opath, odir = self.dbpath, self.library_path + odir = self.library_path self.conn.close() self.library_path, self.dbpath = newloc, dbpath if self._conn is not None: self._conn.close() self._conn = None - self.conn - try: - os.unlink(opath) - except: - pass for loc in old_dirs: try: shutil.rmtree(loc) @@ -1717,6 +1716,8 @@ class DB(object): os.rmdir(odir) except EnvironmentError: pass + self.conn # Connect to the moved metadata.db + progress(_('Completed'), total, total) def restore_book(self, book_id, path, formats): self.execute('UPDATE books SET path=? WHERE id=?', (path.replace(os.sep, '/'), book_id)) diff --git a/src/calibre/db/cache.py b/src/calibre/db/cache.py index 19e0d8524d..e639d406f8 100644 --- a/src/calibre/db/cache.py +++ b/src/calibre/db/cache.py @@ -1938,11 +1938,17 @@ class Cache(object): return self.backend.get_top_level_move_items(all_paths) @write_api - def move_library_to(self, newloc, progress=None): - if progress is None: - progress = lambda x:x + def move_library_to(self, newloc, progress=None, abort=None): + def progress_callback(item_name, item_count, total): + try: + if progress is not None: + progress(item_name, item_count, total) + except Exception: + import traceback + traceback.print_exc() + all_paths = {self._field_for('path', book_id).partition('/')[0] for book_id in self._all_book_ids()} - self.backend.move_library_to(all_paths, newloc, progress=progress) + self.backend.move_library_to(all_paths, newloc, progress=progress_callback, abort=abort) @read_api def saved_search_names(self): diff --git a/src/calibre/db/legacy.py b/src/calibre/db/legacy.py index 90280dfbcb..384bd45cea 100644 --- a/src/calibre/db/legacy.py +++ b/src/calibre/db/legacy.py @@ -917,7 +917,6 @@ for meth in ('get_next_series_num_for', 'has_book', 'author_sort_from_authors'): return func setattr(LibraryDatabase, meth, MT(getter(meth))) -LibraryDatabase.move_library_to = MT(lambda self, newloc, progress=None:self.new_api.move_library_to(newloc, progress=progress)) LibraryDatabase.saved_search_names = MT(lambda self:self.new_api.saved_search_names()) LibraryDatabase.saved_search_lookup = MT(lambda self, x:self.new_api.saved_search_lookup(x)) LibraryDatabase.saved_search_set_all = MT(lambda self, smap:self.new_api.saved_search_set_all(smap)) diff --git a/src/calibre/db/tests/legacy.py b/src/calibre/db/tests/legacy.py index 17cf8ae57b..0ef135a227 100644 --- a/src/calibre/db/tests/legacy.py +++ b/src/calibre/db/tests/legacy.py @@ -417,6 +417,7 @@ class LegacyTest(BaseTest): 'books_in_old_database', 'sizeof_old_database', # unused 'migrate_old', # no longer supported 'remove_unused_series', # superseded by clean API + 'move_library_to', # API changed, no code uses old API # Internal API 'clean_user_categories', 'cleanup_tags', 'books_list_filter', 'conn', 'connect', 'construct_file_name', @@ -802,4 +803,3 @@ class LegacyTest(BaseTest): )) db.close() # }}} -