Speed up changing the title and author of files with books larger than 3MB by avoiding a double copy.

This commit is contained in:
Kovid Goyal 2012-08-30 13:57:32 +05:30
parent 54c198d4c0
commit fb091d7be0
2 changed files with 73 additions and 22 deletions

View File

@ -30,7 +30,7 @@ from calibre.ptempfile import (PersistentTemporaryFile,
base_dir, SpooledTemporaryFile)
from calibre.customize.ui import run_plugins_on_import
from calibre import isbytestring
from calibre.utils.filenames import ascii_filename
from calibre.utils.filenames import ascii_filename, samefile
from calibre.utils.date import (utcnow, now as nowf, utcfromtimestamp,
parse_only_date)
from calibre.utils.config import prefs, tweaks, from_json, to_json
@ -618,7 +618,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
def normpath(self, path):
path = os.path.abspath(os.path.realpath(path))
if not self.is_case_sensitive:
path = path.lower()
path = os.path.normcase(path).lower()
return path
def set_path(self, index, index_is_id=False):
@ -654,21 +654,20 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
with lopen(os.path.join(tpath, 'cover.jpg'), 'wb') as f:
f.write(cdata)
for format in formats:
with SpooledTemporaryFile(SPOOL_SIZE) as stream:
try:
self.copy_format_to(id, format, stream, index_is_id=True)
stream.seek(0)
except NoSuchFormat:
continue
self.add_format(id, format, stream, index_is_id=True,
path=tpath, notify=False)
copy_function = functools.partial(self.copy_format_to, id,
format, index_is_id=True)
try:
self.add_format(id, format, None, index_is_id=True,
path=tpath, notify=False, copy_function=copy_function)
except NoSuchFormat:
continue
self.conn.execute('UPDATE books SET path=? WHERE id=?', (path, id))
self.dirtied([id], commit=False)
self.conn.commit()
self.data.set(id, self.FIELD_MAP['path'], path, row_is_id=True)
# Delete not needed directories
if current_path and os.path.exists(spath):
if self.normpath(spath) != self.normpath(tpath):
if not samefile(spath, tpath):
self.rmtree(spath, permanent=True)
parent = os.path.dirname(spath)
if len(os.listdir(parent)) == 0:
@ -1343,15 +1342,22 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
'''
Copy the format ``fmt`` to the file like object ``dest``. If the
specified format does not exist, raises :class:`NoSuchFormat` error.
dest can also be a path, in which case the format is copied to it, iff
the path is different from the current path (taking case sensitivity
into account).
'''
path = self.format_abspath(index, fmt, index_is_id=index_is_id)
if path is None:
id_ = index if index_is_id else self.id(index)
raise NoSuchFormat('Record %d has no %s file'%(id_, fmt))
with lopen(path, 'rb') as f:
shutil.copyfileobj(f, dest)
if hasattr(dest, 'flush'):
dest.flush()
if hasattr(dest, 'write'):
with lopen(path, 'rb') as f:
shutil.copyfileobj(f, dest)
if hasattr(dest, 'flush'):
dest.flush()
elif dest and not samefile(dest, path):
with lopen(path, 'rb') as f, lopen(dest, 'wb') as d:
shutil.copyfileobj(f, d)
def format(self, index, format, index_is_id=False, as_file=False,
mode='r+b', as_path=False, preserve_filename=False):
@ -1411,7 +1417,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
index_is_id=index_is_id, path=path, notify=notify)
def add_format(self, index, format, stream, index_is_id=False, path=None,
notify=True, replace=True):
notify=True, replace=True, copy_function=None):
id = index if index_is_id else self.id(index)
if not format: format = ''
self.format_metadata_cache[id].pop(format.upper(), None)
@ -1426,12 +1432,15 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
pdir = os.path.dirname(dest)
if not os.path.exists(pdir):
os.makedirs(pdir)
if not getattr(stream, 'name', False) or \
os.path.abspath(dest) != os.path.abspath(stream.name):
with lopen(dest, 'wb') as f:
shutil.copyfileobj(stream, f)
stream.seek(0, 2)
size=stream.tell()
if copy_function is not None:
copy_function(dest)
size = os.path.getsize(dest)
else:
if (not getattr(stream, 'name', False) or not samefile(dest,
stream.name)):
with lopen(dest, 'wb') as f:
shutil.copyfileobj(stream, f)
size = f.tell()
self.conn.execute('INSERT OR REPLACE INTO data (book,format,uncompressed_size,name) VALUES (?,?,?,?)',
(id, format.upper(), size, name))
self.update_last_modified([id], commit=False)

View File

@ -199,3 +199,45 @@ def case_preserving_open_file(path, mode='wb', mkdir_mode=0777):
fpath = os.path.join(cpath, fname)
return ans, fpath
def samefile_windows(src, dst):
import win32file
from pywintypes import error
def get_fileid(x):
if isbytestring(x): x = x.decode(filesystem_encoding)
try:
h = win32file.CreateFile(x, 0, 0, None, win32file.OPEN_EXISTING,
win32file.FILE_FLAG_BACKUP_SEMANTICS, 0)
data = win32file.GetFileInformationByHandle(h)
except (error, EnvironmentError):
return None
return (data[4], data[8], data[9])
a, b = get_fileid(src), get_fileid(dst)
if a is None and b is None:
return False
return a == b
def samefile(src, dst):
'''
Check if two paths point to the same actual file on the filesystem. Handles
symlinks, case insensitivity, mapped drives, etc.
Returns True iff both paths exist and point to the same file on disk.
'''
if iswindows:
return samefile_windows(src, dst)
if hasattr(os.path, 'samefile'):
# Unix
try:
return os.path.samefile(src, dst)
except EnvironmentError:
return False
# All other platforms: check for same pathname.
samestring = (os.path.normcase(os.path.abspath(src)) ==
os.path.normcase(os.path.abspath(dst)))
return samestring