Speed up changing the title and author of files with books larger than 3MB by avoiding a double copy.

This commit is contained in:
Kovid Goyal 2012-08-30 13:57:32 +05:30
parent 54c198d4c0
commit fb091d7be0
2 changed files with 73 additions and 22 deletions

View File

@ -30,7 +30,7 @@ from calibre.ptempfile import (PersistentTemporaryFile,
base_dir, SpooledTemporaryFile) base_dir, SpooledTemporaryFile)
from calibre.customize.ui import run_plugins_on_import from calibre.customize.ui import run_plugins_on_import
from calibre import isbytestring from calibre import isbytestring
from calibre.utils.filenames import ascii_filename from calibre.utils.filenames import ascii_filename, samefile
from calibre.utils.date import (utcnow, now as nowf, utcfromtimestamp, from calibre.utils.date import (utcnow, now as nowf, utcfromtimestamp,
parse_only_date) parse_only_date)
from calibre.utils.config import prefs, tweaks, from_json, to_json from calibre.utils.config import prefs, tweaks, from_json, to_json
@ -618,7 +618,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
def normpath(self, path): def normpath(self, path):
path = os.path.abspath(os.path.realpath(path)) path = os.path.abspath(os.path.realpath(path))
if not self.is_case_sensitive: if not self.is_case_sensitive:
path = path.lower() path = os.path.normcase(path).lower()
return path return path
def set_path(self, index, index_is_id=False): def set_path(self, index, index_is_id=False):
@ -654,21 +654,20 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
with lopen(os.path.join(tpath, 'cover.jpg'), 'wb') as f: with lopen(os.path.join(tpath, 'cover.jpg'), 'wb') as f:
f.write(cdata) f.write(cdata)
for format in formats: for format in formats:
with SpooledTemporaryFile(SPOOL_SIZE) as stream: copy_function = functools.partial(self.copy_format_to, id,
format, index_is_id=True)
try: try:
self.copy_format_to(id, format, stream, index_is_id=True) self.add_format(id, format, None, index_is_id=True,
stream.seek(0) path=tpath, notify=False, copy_function=copy_function)
except NoSuchFormat: except NoSuchFormat:
continue continue
self.add_format(id, format, stream, index_is_id=True,
path=tpath, notify=False)
self.conn.execute('UPDATE books SET path=? WHERE id=?', (path, id)) self.conn.execute('UPDATE books SET path=? WHERE id=?', (path, id))
self.dirtied([id], commit=False) self.dirtied([id], commit=False)
self.conn.commit() self.conn.commit()
self.data.set(id, self.FIELD_MAP['path'], path, row_is_id=True) self.data.set(id, self.FIELD_MAP['path'], path, row_is_id=True)
# Delete not needed directories # Delete not needed directories
if current_path and os.path.exists(spath): if current_path and os.path.exists(spath):
if self.normpath(spath) != self.normpath(tpath): if not samefile(spath, tpath):
self.rmtree(spath, permanent=True) self.rmtree(spath, permanent=True)
parent = os.path.dirname(spath) parent = os.path.dirname(spath)
if len(os.listdir(parent)) == 0: if len(os.listdir(parent)) == 0:
@ -1343,15 +1342,22 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
''' '''
Copy the format ``fmt`` to the file like object ``dest``. If the Copy the format ``fmt`` to the file like object ``dest``. If the
specified format does not exist, raises :class:`NoSuchFormat` error. specified format does not exist, raises :class:`NoSuchFormat` error.
dest can also be a path, in which case the format is copied to it, iff
the path is different from the current path (taking case sensitivity
into account).
''' '''
path = self.format_abspath(index, fmt, index_is_id=index_is_id) path = self.format_abspath(index, fmt, index_is_id=index_is_id)
if path is None: if path is None:
id_ = index if index_is_id else self.id(index) id_ = index if index_is_id else self.id(index)
raise NoSuchFormat('Record %d has no %s file'%(id_, fmt)) raise NoSuchFormat('Record %d has no %s file'%(id_, fmt))
if hasattr(dest, 'write'):
with lopen(path, 'rb') as f: with lopen(path, 'rb') as f:
shutil.copyfileobj(f, dest) shutil.copyfileobj(f, dest)
if hasattr(dest, 'flush'): if hasattr(dest, 'flush'):
dest.flush() dest.flush()
elif dest and not samefile(dest, path):
with lopen(path, 'rb') as f, lopen(dest, 'wb') as d:
shutil.copyfileobj(f, d)
def format(self, index, format, index_is_id=False, as_file=False, def format(self, index, format, index_is_id=False, as_file=False,
mode='r+b', as_path=False, preserve_filename=False): mode='r+b', as_path=False, preserve_filename=False):
@ -1411,7 +1417,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
index_is_id=index_is_id, path=path, notify=notify) index_is_id=index_is_id, path=path, notify=notify)
def add_format(self, index, format, stream, index_is_id=False, path=None, def add_format(self, index, format, stream, index_is_id=False, path=None,
notify=True, replace=True): notify=True, replace=True, copy_function=None):
id = index if index_is_id else self.id(index) id = index if index_is_id else self.id(index)
if not format: format = '' if not format: format = ''
self.format_metadata_cache[id].pop(format.upper(), None) self.format_metadata_cache[id].pop(format.upper(), None)
@ -1426,12 +1432,15 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
pdir = os.path.dirname(dest) pdir = os.path.dirname(dest)
if not os.path.exists(pdir): if not os.path.exists(pdir):
os.makedirs(pdir) os.makedirs(pdir)
if not getattr(stream, 'name', False) or \ if copy_function is not None:
os.path.abspath(dest) != os.path.abspath(stream.name): copy_function(dest)
size = os.path.getsize(dest)
else:
if (not getattr(stream, 'name', False) or not samefile(dest,
stream.name)):
with lopen(dest, 'wb') as f: with lopen(dest, 'wb') as f:
shutil.copyfileobj(stream, f) shutil.copyfileobj(stream, f)
stream.seek(0, 2) size = f.tell()
size=stream.tell()
self.conn.execute('INSERT OR REPLACE INTO data (book,format,uncompressed_size,name) VALUES (?,?,?,?)', self.conn.execute('INSERT OR REPLACE INTO data (book,format,uncompressed_size,name) VALUES (?,?,?,?)',
(id, format.upper(), size, name)) (id, format.upper(), size, name))
self.update_last_modified([id], commit=False) self.update_last_modified([id], commit=False)

View File

@ -199,3 +199,45 @@ def case_preserving_open_file(path, mode='wb', mkdir_mode=0777):
fpath = os.path.join(cpath, fname) fpath = os.path.join(cpath, fname)
return ans, fpath return ans, fpath
def samefile_windows(src, dst):
import win32file
from pywintypes import error
def get_fileid(x):
if isbytestring(x): x = x.decode(filesystem_encoding)
try:
h = win32file.CreateFile(x, 0, 0, None, win32file.OPEN_EXISTING,
win32file.FILE_FLAG_BACKUP_SEMANTICS, 0)
data = win32file.GetFileInformationByHandle(h)
except (error, EnvironmentError):
return None
return (data[4], data[8], data[9])
a, b = get_fileid(src), get_fileid(dst)
if a is None and b is None:
return False
return a == b
def samefile(src, dst):
'''
Check if two paths point to the same actual file on the filesystem. Handles
symlinks, case insensitivity, mapped drives, etc.
Returns True iff both paths exist and point to the same file on disk.
'''
if iswindows:
return samefile_windows(src, dst)
if hasattr(os.path, 'samefile'):
# Unix
try:
return os.path.samefile(src, dst)
except EnvironmentError:
return False
# All other platforms: check for same pathname.
samestring = (os.path.normcase(os.path.abspath(src)) ==
os.path.normcase(os.path.abspath(dst)))
return samestring