newdb: Workaround windows spinning the event loop when deleting files

Windows, being deigned by the geniuses that it is, spins the event loop
while deleting files to the recycle bin, and there exists *no other way* to
move files to the Recycle Bin, since the Recycle Bin format is not
documented or stable.

So we move only files out of the library in the thread calling
delete_books(). The files are moved to the Recycle Bin in a worker
thread. This has two advantages:

1) Faster deletes, since the main thread does not have to wait on the
Recycle Bin (some windows' installs are so badly messed up that moving a
single file to the Bin takes seconds)

2) Restoring deleted files from the bin will not restore them inside the
calibre library folder, where they become orphed. They will be restored
elsewhere.

Disadvantages:

1) If the user deletes a lot of books and quits calibre, they might not
be finished deleting on quit, this can probably be mitigated by popping
up a warning at shutdown
This commit is contained in:
Kovid Goyal 2013-08-06 15:52:27 +05:30
parent 7b7f5c81dc
commit 66c5a1072b
4 changed files with 162 additions and 18 deletions

View File

@ -8,7 +8,7 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
# Imports {{{ # Imports {{{
import os, shutil, uuid, json, glob, time, cPickle, hashlib import os, shutil, uuid, json, glob, time, cPickle, hashlib, errno
from functools import partial from functools import partial
import apsw import apsw
@ -19,6 +19,7 @@ from calibre.constants import (iswindows, filesystem_encoding,
from calibre.ptempfile import PersistentTemporaryFile, TemporaryFile from calibre.ptempfile import PersistentTemporaryFile, TemporaryFile
from calibre.db import SPOOL_SIZE from calibre.db import SPOOL_SIZE
from calibre.db.schema_upgrades import SchemaUpgrade from calibre.db.schema_upgrades import SchemaUpgrade
from calibre.db.delete_service import delete_service
from calibre.db.errors import NoSuchFormat from calibre.db.errors import NoSuchFormat
from calibre.library.field_metadata import FieldMetadata from calibre.library.field_metadata import FieldMetadata
from calibre.ebooks.metadata import title_sort, author_to_author_sort from calibre.ebooks.metadata import title_sort, author_to_author_sort
@ -28,7 +29,6 @@ from calibre.utils.date import utcfromtimestamp, parse_date
from calibre.utils.filenames import ( from calibre.utils.filenames import (
is_case_sensitive, samefile, hardlink_file, ascii_filename, WindowsAtomicFolderMove, atomic_rename) is_case_sensitive, samefile, hardlink_file, ascii_filename, WindowsAtomicFolderMove, atomic_rename)
from calibre.utils.magick.draw import save_cover_data_to from calibre.utils.magick.draw import save_cover_data_to
from calibre.utils.recycle_bin import delete_tree, delete_file
from calibre.utils.formatter_functions import load_user_template_functions from calibre.utils.formatter_functions import load_user_template_functions
from calibre.db.tables import (OneToOneTable, ManyToOneTable, ManyToManyTable, from calibre.db.tables import (OneToOneTable, ManyToOneTable, ManyToManyTable,
SizeTable, FormatsTable, AuthorsTable, IdentifiersTable, PathTable, SizeTable, FormatsTable, AuthorsTable, IdentifiersTable, PathTable,
@ -1035,9 +1035,18 @@ class DB(object):
path = os.path.normcase(path).lower() path = os.path.normcase(path).lower()
return path return path
def rmtree(self, path, permanent=False): def is_deletable(self, path):
if not self.normpath(self.library_path).startswith(self.normpath(path)): return path and not self.normpath(self.library_path).startswith(self.normpath(path))
delete_tree(path, permanent=permanent)
def rmtree(self, path):
if self.is_deletable(path):
try:
shutil.rmtree(path)
except:
import traceback
traceback.print_exc()
time.sleep(1) # In case something has temporarily locked a file
shutil.rmtree(path)
def construct_path_name(self, book_id, title, author): def construct_path_name(self, book_id, title, author):
''' '''
@ -1170,7 +1179,7 @@ class DB(object):
path = self.format_abspath(book_id, fmt, fname, path) path = self.format_abspath(book_id, fmt, fname, path)
if path is not None: if path is not None:
try: try:
delete_file(path) delete_service().delete_files((path,), self.library_path)
except: except:
import traceback import traceback
traceback.print_exc() traceback.print_exc()
@ -1360,10 +1369,10 @@ class DB(object):
if os.path.exists(spath) and not samefile(spath, tpath): if os.path.exists(spath) and not samefile(spath, tpath):
if wam is not None: if wam is not None:
wam.delete_originals() wam.delete_originals()
self.rmtree(spath, permanent=True) self.rmtree(spath)
parent = os.path.dirname(spath) parent = os.path.dirname(spath)
if len(os.listdir(parent)) == 0: if len(os.listdir(parent)) == 0:
self.rmtree(parent, permanent=True) self.rmtree(parent)
finally: finally:
if wam is not None: if wam is not None:
wam.close_handles() wam.close_handles()
@ -1404,16 +1413,20 @@ class DB(object):
return f.read() return f.read()
def remove_books(self, path_map, permanent=False): def remove_books(self, path_map, permanent=False):
for book_id, path in path_map.iteritems():
if path:
path = os.path.join(self.library_path, path)
if os.path.exists(path):
self.rmtree(path, permanent=permanent)
parent = os.path.dirname(path)
if len(os.listdir(parent)) == 0:
self.rmtree(parent, permanent=permanent)
self.conn.executemany( self.conn.executemany(
'DELETE FROM books WHERE id=?', [(x,) for x in path_map]) 'DELETE FROM books WHERE id=?', [(x,) for x in path_map])
paths = {os.path.join(self.library_path, x) for x in path_map.itervalues() if x}
paths = {x for x in paths if os.path.exists(x) and self.is_deletable(x)}
if permanent:
for path in paths:
self.rmtree(path)
try:
os.rmdir(os.path.dirname(path))
except OSError as e:
if e.errno != errno.ENOTEMPTY:
raise
else:
delete_service().delete_books(paths, self.library_path)
def add_custom_data(self, name, val_map, delete_first): def add_custom_data(self, name, val_map, delete_first):
if delete_first: if delete_first:

View File

@ -0,0 +1,116 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import os, tempfile, shutil, errno, time
from threading import Thread
from Queue import Queue
from calibre.utils.recycle_bin import delete_tree, delete_file
class DeleteService(Thread):
''' Provide a blocking file delete implementation with support for the
recycle bin. On windows, deleting files to the recycle bin spins the event
loop, which can cause locking errors in the main thread. We get around this
by only moving the files/folders to be deleted out of the library in the
main thread, they are deleted to recycle bin in a separate worker thread.
This has the added advantage that doing a restore from the recycle bin wont
cause metadata.db and the file system to get out of sync. Also, deleting
becomes much faster, since in the common case, the move is done by a simple
os.rename(). The downside is that if the user quits calibre while a long
move to recycle bin is happening, the files may not all be deleted.'''
daemon = True
def __init__(self):
Thread.__init__(self)
self.requests = Queue()
def shutdown(self, timeout=20):
self.requests.put(None)
self.join(timeout)
def create_staging(self, library_path):
base_path = os.path.dirname(library_path)
base = os.path.basename(library_path)
try:
return tempfile.mkdtemp(prefix=base+' deleted ', dir=base_path)
except OSError:
return tempfile.mkdtemp(prefix=base+' deleted ')
def delete_books(self, paths, library_path):
tdir = self.create_staging(library_path)
self.queue_paths(tdir, paths, delete_empty_parent=True)
def queue_paths(self, tdir, paths, delete_empty_parent=True):
for path in paths:
if os.path.exists(path):
try:
shutil.move(path, tdir)
except EnvironmentError:
# Wait a little in case something has locked a file
time.sleep(1)
shutil.move(path, tdir)
if delete_empty_parent:
parent = os.path.dirname(path)
try:
os.rmdir(parent)
except OSError as e:
if e.errno != errno.ENOTEMPTY:
raise
self.requests.put(os.path.join(tdir, os.path.basename(path)))
def delete_files(self, paths, library_path):
tdir = self.create_staging(library_path)
self.queue_paths(tdir, paths, delete_empty_parent=False)
def run(self):
while True:
x = self.requests.get()
try:
if x is None:
break
try:
self.do_delete(x)
except:
import traceback
traceback.print_exc()
finally:
self.requests.task_done()
def wait(self):
'Blocks until all pending deletes have completed'
self.requests.join()
def do_delete(self, x):
if os.path.isdir(x):
delete_tree(x)
else:
delete_file(x)
try:
os.rmdir(os.path.dirname(x))
except OSError as e:
if e.errno != errno.ENOTEMPTY:
raise
__ds = None
def delete_service():
global __ds
if __ds is None:
__ds = DeleteService()
__ds.start()
return __ds
def shutdown(timeout=20):
global __ds
if __ds is not None:
__ds.shutdown(timeout)
__ds = None

View File

@ -209,6 +209,7 @@ class AddRemoveTest(BaseTest):
def test_remove_books(self): # {{{ def test_remove_books(self): # {{{
'Test removal of books' 'Test removal of books'
cl = self.cloned_library
cache = self.init_cache() cache = self.init_cache()
af, ae, at = self.assertFalse, self.assertEqual, self.assertTrue af, ae, at = self.assertFalse, self.assertEqual, self.assertTrue
authors = cache.fields['authors'].table authors = cache.fields['authors'].table
@ -233,7 +234,7 @@ class AddRemoveTest(BaseTest):
item_id = {v:k for k, v in cache.fields['#series'].table.id_map.iteritems()}['My Series Two'] item_id = {v:k for k, v in cache.fields['#series'].table.id_map.iteritems()}['My Series Two']
cache.remove_books((1,), permanent=True) cache.remove_books((1,), permanent=True)
for x in (fmtpath, bookpath, authorpath): for x in (fmtpath, bookpath, authorpath):
af(os.path.exists(x)) af(os.path.exists(x), 'The file %s exists, when it should not' % x)
for c in (cache, self.init_cache()): for c in (cache, self.init_cache()):
table = c.fields['authors'].table table = c.fields['authors'].table
self.assertNotIn(1, c.all_book_ids()) self.assertNotIn(1, c.all_book_ids())
@ -252,6 +253,19 @@ class AddRemoveTest(BaseTest):
self.assertFalse(table.book_col_map) self.assertFalse(table.book_col_map)
self.assertFalse(table.col_book_map) self.assertFalse(table.col_book_map)
# Test the delete service
from calibre.db.delete_service import delete_service
cache = self.init_cache(cl)
# Check that files are removed
fmtpath = cache.format_abspath(1, 'FMT1')
bookpath = os.path.dirname(fmtpath)
authorpath = os.path.dirname(bookpath)
item_id = {v:k for k, v in cache.fields['#series'].table.id_map.iteritems()}['My Series Two']
cache.remove_books((1,))
delete_service().wait()
for x in (fmtpath, bookpath, authorpath):
af(os.path.exists(x), 'The file %s exists, when it should not' % x)
# }}} # }}}
def test_original_fmt(self): # {{{ def test_original_fmt(self): # {{{
@ -271,4 +285,3 @@ class AddRemoveTest(BaseTest):
af(db.has_format(1, 'ORIGINAL_FMT1')) af(db.has_format(1, 'ORIGINAL_FMT1'))
ae(set(fmts), set(db.formats(1, verify_formats=False))) ae(set(fmts), set(db.formats(1, verify_formats=False)))
# }}} # }}}

View File

@ -854,6 +854,8 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
pass pass
except KeyboardInterrupt: except KeyboardInterrupt:
pass pass
from calibre.db.delete_service import shutdown
shutdown()
time.sleep(2) time.sleep(2)
self.istores.join() self.istores.join()
self.hide_windows() self.hide_windows()