From fb091d7be0e26845f2a0e644b983d460a3c10246 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 30 Aug 2012 13:57:32 +0530 Subject: [PATCH] Speed up changing the title and author of files with books larger than 3MB by avoiding a double copy. --- src/calibre/library/database2.py | 53 +++++++++++++++++++------------- src/calibre/utils/filenames.py | 42 +++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 22 deletions(-) diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 7969a0e032..f1103f57ee 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -30,7 +30,7 @@ from calibre.ptempfile import (PersistentTemporaryFile, base_dir, SpooledTemporaryFile) from calibre.customize.ui import run_plugins_on_import from calibre import isbytestring -from calibre.utils.filenames import ascii_filename +from calibre.utils.filenames import ascii_filename, samefile from calibre.utils.date import (utcnow, now as nowf, utcfromtimestamp, parse_only_date) from calibre.utils.config import prefs, tweaks, from_json, to_json @@ -618,7 +618,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): def normpath(self, path): path = os.path.abspath(os.path.realpath(path)) if not self.is_case_sensitive: - path = path.lower() + path = os.path.normcase(path).lower() return path def set_path(self, index, index_is_id=False): @@ -654,21 +654,20 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): with lopen(os.path.join(tpath, 'cover.jpg'), 'wb') as f: f.write(cdata) for format in formats: - with SpooledTemporaryFile(SPOOL_SIZE) as stream: - try: - self.copy_format_to(id, format, stream, index_is_id=True) - stream.seek(0) - except NoSuchFormat: - continue - self.add_format(id, format, stream, index_is_id=True, - path=tpath, notify=False) + copy_function = functools.partial(self.copy_format_to, id, + format, index_is_id=True) + try: + self.add_format(id, format, None, index_is_id=True, + path=tpath, notify=False, copy_function=copy_function) + except NoSuchFormat: + continue self.conn.execute('UPDATE books SET path=? WHERE id=?', (path, id)) self.dirtied([id], commit=False) self.conn.commit() self.data.set(id, self.FIELD_MAP['path'], path, row_is_id=True) # Delete not needed directories if current_path and os.path.exists(spath): - if self.normpath(spath) != self.normpath(tpath): + if not samefile(spath, tpath): self.rmtree(spath, permanent=True) parent = os.path.dirname(spath) if len(os.listdir(parent)) == 0: @@ -1343,15 +1342,22 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): ''' Copy the format ``fmt`` to the file like object ``dest``. If the specified format does not exist, raises :class:`NoSuchFormat` error. + dest can also be a path, in which case the format is copied to it, iff + the path is different from the current path (taking case sensitivity + into account). ''' path = self.format_abspath(index, fmt, index_is_id=index_is_id) if path is None: id_ = index if index_is_id else self.id(index) raise NoSuchFormat('Record %d has no %s file'%(id_, fmt)) - with lopen(path, 'rb') as f: - shutil.copyfileobj(f, dest) - if hasattr(dest, 'flush'): - dest.flush() + if hasattr(dest, 'write'): + with lopen(path, 'rb') as f: + shutil.copyfileobj(f, dest) + if hasattr(dest, 'flush'): + dest.flush() + elif dest and not samefile(dest, path): + with lopen(path, 'rb') as f, lopen(dest, 'wb') as d: + shutil.copyfileobj(f, d) def format(self, index, format, index_is_id=False, as_file=False, mode='r+b', as_path=False, preserve_filename=False): @@ -1411,7 +1417,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): index_is_id=index_is_id, path=path, notify=notify) def add_format(self, index, format, stream, index_is_id=False, path=None, - notify=True, replace=True): + notify=True, replace=True, copy_function=None): id = index if index_is_id else self.id(index) if not format: format = '' self.format_metadata_cache[id].pop(format.upper(), None) @@ -1426,12 +1432,15 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): pdir = os.path.dirname(dest) if not os.path.exists(pdir): os.makedirs(pdir) - if not getattr(stream, 'name', False) or \ - os.path.abspath(dest) != os.path.abspath(stream.name): - with lopen(dest, 'wb') as f: - shutil.copyfileobj(stream, f) - stream.seek(0, 2) - size=stream.tell() + if copy_function is not None: + copy_function(dest) + size = os.path.getsize(dest) + else: + if (not getattr(stream, 'name', False) or not samefile(dest, + stream.name)): + with lopen(dest, 'wb') as f: + shutil.copyfileobj(stream, f) + size = f.tell() self.conn.execute('INSERT OR REPLACE INTO data (book,format,uncompressed_size,name) VALUES (?,?,?,?)', (id, format.upper(), size, name)) self.update_last_modified([id], commit=False) diff --git a/src/calibre/utils/filenames.py b/src/calibre/utils/filenames.py index a4c9e35c3e..c843100157 100644 --- a/src/calibre/utils/filenames.py +++ b/src/calibre/utils/filenames.py @@ -199,3 +199,45 @@ def case_preserving_open_file(path, mode='wb', mkdir_mode=0777): fpath = os.path.join(cpath, fname) return ans, fpath +def samefile_windows(src, dst): + import win32file + from pywintypes import error + + def get_fileid(x): + if isbytestring(x): x = x.decode(filesystem_encoding) + try: + h = win32file.CreateFile(x, 0, 0, None, win32file.OPEN_EXISTING, + win32file.FILE_FLAG_BACKUP_SEMANTICS, 0) + data = win32file.GetFileInformationByHandle(h) + except (error, EnvironmentError): + return None + return (data[4], data[8], data[9]) + + a, b = get_fileid(src), get_fileid(dst) + if a is None and b is None: + return False + return a == b + +def samefile(src, dst): + ''' + Check if two paths point to the same actual file on the filesystem. Handles + symlinks, case insensitivity, mapped drives, etc. + + Returns True iff both paths exist and point to the same file on disk. + ''' + if iswindows: + return samefile_windows(src, dst) + + if hasattr(os.path, 'samefile'): + # Unix + try: + return os.path.samefile(src, dst) + except EnvironmentError: + return False + + # All other platforms: check for same pathname. + samestring = (os.path.normcase(os.path.abspath(src)) == + os.path.normcase(os.path.abspath(dst))) + return samestring + +