From 9b21595f9216edde64058717227f4674fc7bac2c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 22 Oct 2012 17:31:13 +0530 Subject: [PATCH] Make title/author renames completely atomic on windows --- src/calibre/library/database2.py | 141 +++++++++++++++++++++---------- src/calibre/utils/filenames.py | 95 +++++++++++++++------ 2 files changed, 164 insertions(+), 72 deletions(-) diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 5b2abd0c6b..2839f8ea6d 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en' The database used to store ebook metadata ''' import os, sys, shutil, cStringIO, glob, time, functools, traceback, re, \ - json, uuid, hashlib, copy, errno + json, uuid, hashlib, copy from collections import defaultdict import threading, random from itertools import repeat @@ -31,7 +31,7 @@ from calibre.ptempfile import (PersistentTemporaryFile, from calibre.customize.ui import run_plugins_on_import from calibre import isbytestring from calibre.utils.filenames import (ascii_filename, samefile, - windows_is_folder_in_use) + WindowsAtomicFolderMove) from calibre.utils.date import (utcnow, now as nowf, utcfromtimestamp, parse_only_date, UNDEFINED_DATE) from calibre.utils.config import prefs, tweaks, from_json, to_json @@ -641,45 +641,43 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): if name and name != fname: changed = True break - tpath = os.path.join(self.library_path, *path.split('/')) - if not os.path.exists(tpath): - os.makedirs(tpath) if path == current_path and not changed: return - spath = os.path.join(self.library_path, *current_path.split('/')) + tpath = os.path.join(self.library_path, *path.split('/')) - if current_path and os.path.exists(spath): # Migrate existing files - if iswindows: - uf = windows_is_folder_in_use(spath) - if uf is not None: - err = IOError(errno.EACCES, - _('File is open in another process')) - err.filename = uf - raise err - cdata = self.cover(id, index_is_id=True) - if cdata is not None: - with lopen(os.path.join(tpath, 'cover.jpg'), 'wb') as f: - f.write(cdata) - for format in formats: - copy_function = functools.partial(self.copy_format_to, id, - format, index_is_id=True) - try: - self.add_format(id, format, None, index_is_id=True, - path=tpath, notify=False, copy_function=copy_function) - except NoSuchFormat: - continue - self.conn.execute('UPDATE books SET path=? WHERE id=?', (path, id)) - self.dirtied([id], commit=False) - self.conn.commit() - self.data.set(id, self.FIELD_MAP['path'], path, row_is_id=True) - # Delete not needed directories - if current_path and os.path.exists(spath): - if not samefile(spath, tpath): - self.rmtree(spath, permanent=True) - parent = os.path.dirname(spath) - if len(os.listdir(parent)) == 0: - self.rmtree(parent, permanent=True) + wam = WindowsAtomicFolderMove(spath) if iswindows else None + try: + if not os.path.exists(tpath): + os.makedirs(tpath) + + if current_path and os.path.exists(spath): # Migrate existing files + self.copy_cover_to(id, os.path.join(tpath, 'cover.jpg'), + index_is_id=True, windows_atomic_move=wam) + for format in formats: + copy_function = functools.partial(self.copy_format_to, id, + format, index_is_id=True, windows_atomic_move=wam) + try: + self.add_format(id, format, None, index_is_id=True, + path=tpath, notify=False, copy_function=copy_function) + except NoSuchFormat: + continue + self.conn.execute('UPDATE books SET path=? WHERE id=?', (path, id)) + self.dirtied([id], commit=False) + self.conn.commit() + self.data.set(id, self.FIELD_MAP['path'], path, row_is_id=True) + # Delete not needed directories + if current_path and os.path.exists(spath): + if not samefile(spath, tpath): + if wam is not None: + wam.delete_originals() + self.rmtree(spath, permanent=True) + parent = os.path.dirname(spath) + if len(os.listdir(parent)) == 0: + self.rmtree(parent, permanent=True) + finally: + if wam is not None: + wam.close_handles() curpath = self.library_path c1, c2 = current_path.split('/'), path.split('/') @@ -1348,26 +1346,77 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): return None return fmt_path - def copy_format_to(self, index, fmt, dest, index_is_id=False): + def copy_format_to(self, index, fmt, dest, index_is_id=False, + windows_atomic_move=None): ''' Copy the format ``fmt`` to the file like object ``dest``. If the specified format does not exist, raises :class:`NoSuchFormat` error. dest can also be a path, in which case the format is copied to it, iff the path is different from the current path (taking case sensitivity into account). + + windows_atomic_move is an internally used parameter. You should not use + it in any code outside this module. ''' path = self.format_abspath(index, fmt, index_is_id=index_is_id) if path is None: id_ = index if index_is_id else self.id(index) raise NoSuchFormat('Record %d has no %s file'%(id_, fmt)) - if hasattr(dest, 'write'): - with lopen(path, 'rb') as f: - shutil.copyfileobj(f, dest) - if hasattr(dest, 'flush'): - dest.flush() - elif dest and not samefile(dest, path): - with lopen(path, 'rb') as f, lopen(dest, 'wb') as d: - shutil.copyfileobj(f, d) + if windows_atomic_move is not None: + if not isinstance(dest, basestring): + raise Exception("Error, you must pass the dest as a path when" + " using windows_atomic_move") + if dest and not samefile(dest, path): + windows_atomic_move.copy_path_to(path, dest) + else: + if hasattr(dest, 'write'): + with lopen(path, 'rb') as f: + shutil.copyfileobj(f, dest) + if hasattr(dest, 'flush'): + dest.flush() + elif dest and not samefile(dest, path): + with lopen(path, 'rb') as f, lopen(dest, 'wb') as d: + shutil.copyfileobj(f, d) + + def copy_cover_to(self, index, dest, index_is_id=False, + windows_atomic_move=None): + ''' + Copy the format cover to the file like object ``dest``. Returns False + if no cover exists or dest is the same file as the current cover. + dest can also be a path in which case the cover is + copied to it iff the path is different from the current path (taking + case sensitivity into account). + + windows_atomic_move is an internally used parameter. You should not use + it in any code outside this module. + ''' + id = index if index_is_id else self.id(index) + path = os.path.join(self.library_path, self.path(id, index_is_id=True), 'cover.jpg') + if windows_atomic_move is not None: + if not isinstance(dest, basestring): + raise Exception("Error, you must pass the dest as a path when" + " using windows_atomic_move") + if os.access(path, os.R_OK) and dest and not samefile(dest, path): + windows_atomic_move.copy_path_to(path, dest) + return True + else: + if os.access(path, os.R_OK): + try: + f = lopen(path, 'rb') + except (IOError, OSError): + time.sleep(0.2) + f = lopen(path, 'rb') + with f: + if hasattr(dest, 'write'): + shutil.copyfileobj(f, dest) + if hasattr(dest, 'flush'): + dest.flush() + return True + elif dest and not samefile(dest, path): + with lopen(dest, 'wb') as d: + shutil.copyfileobj(f, d) + return True + return False def format(self, index, format, index_is_id=False, as_file=False, mode='r+b', as_path=False, preserve_filename=False): diff --git a/src/calibre/utils/filenames.py b/src/calibre/utils/filenames.py index f6455831ba..1609088c49 100644 --- a/src/calibre/utils/filenames.py +++ b/src/calibre/utils/filenames.py @@ -3,7 +3,7 @@ Make strings safe for use as ASCII filenames, while trying to preserve as much meaning as possible. ''' -import os +import os, errno from math import ceil from calibre import sanitize_file_name, isbytestring, force_unicode @@ -249,32 +249,75 @@ def samefile(src, dst): os.path.normcase(os.path.abspath(dst))) return samestring -def windows_is_file_opened(path): - import win32file, winerror - from pywintypes import error - if isbytestring(path): path = path.decode(filesystem_encoding) - try: - h = win32file.CreateFile(path, win32file.GENERIC_READ, 0, None, - win32file.OPEN_EXISTING, 0, 0) - except error as e: - if getattr(e, 'winerror', 0) == winerror.ERROR_SHARING_VIOLATION: - return True - else: - win32file.CloseHandle(h) - return False +class WindowsAtomicFolderMove(object): -def windows_is_folder_in_use(path): ''' - Returns the path to a file that is used in another process in the specified - folder, or None if no such file exists. Note - that this function is not a guarantee. A file may well be opened in the - folder after this function returns. However, it is useful to handle the - common case of a sharing violation gracefully most of the time. + Move all the files inside a specified folder in an atomic fashion, + preventing any other process from locking a file while the operation is + incomplete. Raises an IOError if another process has locked a file before + the operation starts. Note that this only operates on the files in the + folder, not any sub-folders. ''' - if isbytestring(path): path = path.decode(filesystem_encoding) - for x in os.listdir(path): - f = os.path.join(path, x) - if windows_is_file_opened(f): - return f - return None + + def __init__(self, path): + self.handle_map = {} + + import win32file, winerror + from pywintypes import error + + if isbytestring(path): path = path.decode(filesystem_encoding) + + if not os.path.exists(path): + return + + for x in os.listdir(path): + f = os.path.normcase(os.path.abspath(os.path.join(path, x))) + if not os.path.isfile(f): continue + try: + h = win32file.CreateFile(f, win32file.GENERIC_READ, + win32file.FILE_SHARE_DELETE, None, + win32file.OPEN_EXISTING, win32file.FILE_FLAG_SEQUENTIAL_SCAN, 0) + except error as e: + self.close_handles() + if getattr(e, 'winerror', 0) == winerror.ERROR_SHARING_VIOLATION: + err = IOError(errno.EACCES, + _('File is open in another process')) + err.filename = f + raise err + raise + except: + self.close_handles() + raise + self.handle_map[f] = h + + def copy_path_to(self, path, dest): + import win32file + handle = None + for p, h in self.handle_map.iteritems(): + if samefile_windows(path, p): + handle = h + break + if handle is None: + raise ValueError(u'The file %r did not exist when this move' + ' operation was started'%path) + with lopen(dest, 'wb') as f: + while True: + hr, raw = win32file.ReadFile(handle, 1024*1024) + if hr != 0: + raise IOError(hr, u'Error while reading from %r'%path) + if not raw: + break + f.write(raw) + + def close_handles(self): + import win32file + for h in self.handle_map.itervalues(): + win32file.CloseHandle(h) + self.handle_map = {} + + def delete_originals(self): + import win32file + for path in self.handle_map.iterkeys(): + win32file.DeleteFile(path) + self.close_handles()