From 66c5a1072bee017de327951303751cda6e931211 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 6 Aug 2013 15:52:27 +0530 Subject: [PATCH] newdb: Workaround windows spinning the event loop when deleting files Windows, being deigned by the geniuses that it is, spins the event loop while deleting files to the recycle bin, and there exists *no other way* to move files to the Recycle Bin, since the Recycle Bin format is not documented or stable. So we move only files out of the library in the thread calling delete_books(). The files are moved to the Recycle Bin in a worker thread. This has two advantages: 1) Faster deletes, since the main thread does not have to wait on the Recycle Bin (some windows' installs are so badly messed up that moving a single file to the Bin takes seconds) 2) Restoring deleted files from the bin will not restore them inside the calibre library folder, where they become orphed. They will be restored elsewhere. Disadvantages: 1) If the user deletes a lot of books and quits calibre, they might not be finished deleting on quit, this can probably be mitigated by popping up a warning at shutdown --- src/calibre/db/backend.py | 45 +++++++---- src/calibre/db/delete_service.py | 116 +++++++++++++++++++++++++++++ src/calibre/db/tests/add_remove.py | 17 ++++- src/calibre/gui2/ui.py | 2 + 4 files changed, 162 insertions(+), 18 deletions(-) create mode 100644 src/calibre/db/delete_service.py diff --git a/src/calibre/db/backend.py b/src/calibre/db/backend.py index 05a8887576..871d704ce1 100644 --- a/src/calibre/db/backend.py +++ b/src/calibre/db/backend.py @@ -8,7 +8,7 @@ __copyright__ = '2011, Kovid Goyal ' __docformat__ = 'restructuredtext en' # Imports {{{ -import os, shutil, uuid, json, glob, time, cPickle, hashlib +import os, shutil, uuid, json, glob, time, cPickle, hashlib, errno from functools import partial import apsw @@ -19,6 +19,7 @@ from calibre.constants import (iswindows, filesystem_encoding, from calibre.ptempfile import PersistentTemporaryFile, TemporaryFile from calibre.db import SPOOL_SIZE from calibre.db.schema_upgrades import SchemaUpgrade +from calibre.db.delete_service import delete_service from calibre.db.errors import NoSuchFormat from calibre.library.field_metadata import FieldMetadata from calibre.ebooks.metadata import title_sort, author_to_author_sort @@ -28,7 +29,6 @@ from calibre.utils.date import utcfromtimestamp, parse_date from calibre.utils.filenames import ( is_case_sensitive, samefile, hardlink_file, ascii_filename, WindowsAtomicFolderMove, atomic_rename) from calibre.utils.magick.draw import save_cover_data_to -from calibre.utils.recycle_bin import delete_tree, delete_file from calibre.utils.formatter_functions import load_user_template_functions from calibre.db.tables import (OneToOneTable, ManyToOneTable, ManyToManyTable, SizeTable, FormatsTable, AuthorsTable, IdentifiersTable, PathTable, @@ -1035,9 +1035,18 @@ class DB(object): path = os.path.normcase(path).lower() return path - def rmtree(self, path, permanent=False): - if not self.normpath(self.library_path).startswith(self.normpath(path)): - delete_tree(path, permanent=permanent) + def is_deletable(self, path): + return path and not self.normpath(self.library_path).startswith(self.normpath(path)) + + def rmtree(self, path): + if self.is_deletable(path): + try: + shutil.rmtree(path) + except: + import traceback + traceback.print_exc() + time.sleep(1) # In case something has temporarily locked a file + shutil.rmtree(path) def construct_path_name(self, book_id, title, author): ''' @@ -1170,7 +1179,7 @@ class DB(object): path = self.format_abspath(book_id, fmt, fname, path) if path is not None: try: - delete_file(path) + delete_service().delete_files((path,), self.library_path) except: import traceback traceback.print_exc() @@ -1360,10 +1369,10 @@ class DB(object): if os.path.exists(spath) and not samefile(spath, tpath): if wam is not None: wam.delete_originals() - self.rmtree(spath, permanent=True) + self.rmtree(spath) parent = os.path.dirname(spath) if len(os.listdir(parent)) == 0: - self.rmtree(parent, permanent=True) + self.rmtree(parent) finally: if wam is not None: wam.close_handles() @@ -1404,16 +1413,20 @@ class DB(object): return f.read() def remove_books(self, path_map, permanent=False): - for book_id, path in path_map.iteritems(): - if path: - path = os.path.join(self.library_path, path) - if os.path.exists(path): - self.rmtree(path, permanent=permanent) - parent = os.path.dirname(path) - if len(os.listdir(parent)) == 0: - self.rmtree(parent, permanent=permanent) self.conn.executemany( 'DELETE FROM books WHERE id=?', [(x,) for x in path_map]) + paths = {os.path.join(self.library_path, x) for x in path_map.itervalues() if x} + paths = {x for x in paths if os.path.exists(x) and self.is_deletable(x)} + if permanent: + for path in paths: + self.rmtree(path) + try: + os.rmdir(os.path.dirname(path)) + except OSError as e: + if e.errno != errno.ENOTEMPTY: + raise + else: + delete_service().delete_books(paths, self.library_path) def add_custom_data(self, name, val_map, delete_first): if delete_first: diff --git a/src/calibre/db/delete_service.py b/src/calibre/db/delete_service.py new file mode 100644 index 0000000000..a79afc7d4b --- /dev/null +++ b/src/calibre/db/delete_service.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2013, Kovid Goyal ' + +import os, tempfile, shutil, errno, time +from threading import Thread +from Queue import Queue + +from calibre.utils.recycle_bin import delete_tree, delete_file + +class DeleteService(Thread): + + ''' Provide a blocking file delete implementation with support for the + recycle bin. On windows, deleting files to the recycle bin spins the event + loop, which can cause locking errors in the main thread. We get around this + by only moving the files/folders to be deleted out of the library in the + main thread, they are deleted to recycle bin in a separate worker thread. + + This has the added advantage that doing a restore from the recycle bin wont + cause metadata.db and the file system to get out of sync. Also, deleting + becomes much faster, since in the common case, the move is done by a simple + os.rename(). The downside is that if the user quits calibre while a long + move to recycle bin is happening, the files may not all be deleted.''' + + daemon = True + + def __init__(self): + Thread.__init__(self) + self.requests = Queue() + + def shutdown(self, timeout=20): + self.requests.put(None) + self.join(timeout) + + def create_staging(self, library_path): + base_path = os.path.dirname(library_path) + base = os.path.basename(library_path) + try: + return tempfile.mkdtemp(prefix=base+' deleted ', dir=base_path) + except OSError: + return tempfile.mkdtemp(prefix=base+' deleted ') + + def delete_books(self, paths, library_path): + tdir = self.create_staging(library_path) + self.queue_paths(tdir, paths, delete_empty_parent=True) + + def queue_paths(self, tdir, paths, delete_empty_parent=True): + for path in paths: + if os.path.exists(path): + try: + shutil.move(path, tdir) + except EnvironmentError: + # Wait a little in case something has locked a file + time.sleep(1) + shutil.move(path, tdir) + if delete_empty_parent: + parent = os.path.dirname(path) + try: + os.rmdir(parent) + except OSError as e: + if e.errno != errno.ENOTEMPTY: + raise + self.requests.put(os.path.join(tdir, os.path.basename(path))) + + def delete_files(self, paths, library_path): + tdir = self.create_staging(library_path) + self.queue_paths(tdir, paths, delete_empty_parent=False) + + def run(self): + while True: + x = self.requests.get() + try: + if x is None: + break + try: + self.do_delete(x) + except: + import traceback + traceback.print_exc() + finally: + self.requests.task_done() + + def wait(self): + 'Blocks until all pending deletes have completed' + self.requests.join() + + def do_delete(self, x): + if os.path.isdir(x): + delete_tree(x) + else: + delete_file(x) + try: + os.rmdir(os.path.dirname(x)) + except OSError as e: + if e.errno != errno.ENOTEMPTY: + raise + +__ds = None +def delete_service(): + global __ds + if __ds is None: + __ds = DeleteService() + __ds.start() + return __ds + +def shutdown(timeout=20): + global __ds + if __ds is not None: + __ds.shutdown(timeout) + __ds = None + + diff --git a/src/calibre/db/tests/add_remove.py b/src/calibre/db/tests/add_remove.py index 5cbac5deca..fb44d0e62c 100644 --- a/src/calibre/db/tests/add_remove.py +++ b/src/calibre/db/tests/add_remove.py @@ -209,6 +209,7 @@ class AddRemoveTest(BaseTest): def test_remove_books(self): # {{{ 'Test removal of books' + cl = self.cloned_library cache = self.init_cache() af, ae, at = self.assertFalse, self.assertEqual, self.assertTrue authors = cache.fields['authors'].table @@ -233,7 +234,7 @@ class AddRemoveTest(BaseTest): item_id = {v:k for k, v in cache.fields['#series'].table.id_map.iteritems()}['My Series Two'] cache.remove_books((1,), permanent=True) for x in (fmtpath, bookpath, authorpath): - af(os.path.exists(x)) + af(os.path.exists(x), 'The file %s exists, when it should not' % x) for c in (cache, self.init_cache()): table = c.fields['authors'].table self.assertNotIn(1, c.all_book_ids()) @@ -252,6 +253,19 @@ class AddRemoveTest(BaseTest): self.assertFalse(table.book_col_map) self.assertFalse(table.col_book_map) + # Test the delete service + from calibre.db.delete_service import delete_service + cache = self.init_cache(cl) + # Check that files are removed + fmtpath = cache.format_abspath(1, 'FMT1') + bookpath = os.path.dirname(fmtpath) + authorpath = os.path.dirname(bookpath) + item_id = {v:k for k, v in cache.fields['#series'].table.id_map.iteritems()}['My Series Two'] + cache.remove_books((1,)) + delete_service().wait() + for x in (fmtpath, bookpath, authorpath): + af(os.path.exists(x), 'The file %s exists, when it should not' % x) + # }}} def test_original_fmt(self): # {{{ @@ -271,4 +285,3 @@ class AddRemoveTest(BaseTest): af(db.has_format(1, 'ORIGINAL_FMT1')) ae(set(fmts), set(db.formats(1, verify_formats=False))) # }}} - diff --git a/src/calibre/gui2/ui.py b/src/calibre/gui2/ui.py index 8e34c2a84f..90eac209b2 100644 --- a/src/calibre/gui2/ui.py +++ b/src/calibre/gui2/ui.py @@ -854,6 +854,8 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{ pass except KeyboardInterrupt: pass + from calibre.db.delete_service import shutdown + shutdown() time.sleep(2) self.istores.join() self.hide_windows()