Preserve all files in book dirs during export/import

This commit is contained in:
Kovid Goyal 2023-04-17 11:17:30 +05:30
parent 1a20d1e005
commit a29e2a2537
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
3 changed files with 74 additions and 12 deletions

View File

@ -57,6 +57,7 @@ from polyglot.builtins import (
# }}}
COVER_FILE_NAME = 'cover.jpg'
METADATA_FILE_NAME = 'metadata.opf'
DEFAULT_TRASH_EXPIRY_TIME_SECONDS = 14 * 86400
TRASH_DIR_NAME = '.caltrash'
BOOK_ID_PATH_TEMPLATE = ' ({})'
@ -1584,6 +1585,7 @@ class DB:
try:
f = open(path, 'rb')
except OSError:
if iswindows:
time.sleep(0.2)
try:
f = open(path, 'rb')
@ -1624,6 +1626,7 @@ class DB:
try:
f = open(path, 'rb')
except OSError:
if iswindows:
time.sleep(0.2)
f = open(path, 'rb')
with f:
@ -1660,6 +1663,7 @@ class DB:
try:
os.remove(path)
except OSError:
if iswindows:
time.sleep(0.2)
os.remove(path)
else:
@ -1671,6 +1675,7 @@ class DB:
try:
save_cover_data_to(data, path)
except OSError:
if iswindows:
time.sleep(0.2)
save_cover_data_to(data, path)
@ -1884,8 +1889,42 @@ class DB:
os.makedirs(tpath)
update_paths_in_db()
def iter_extra_files(self, book_id, book_path, formats_field):
known_files = {COVER_FILE_NAME, METADATA_FILE_NAME}
for fmt in formats_field.for_book(book_id, default_value=()):
fname = formats_field.format_fname(book_id, fmt)
fpath = self.format_abspath(book_id, fmt, fname, book_path, do_file_rename=False)
if fpath:
known_files.add(os.path.basename(fpath))
full_book_path = os.path.abspath(os.path.join(self.library_path, book_path))
for dirpath, dirnames, filenames in os.walk(full_book_path):
for fname in filenames:
path = os.path.join(dirpath, fname)
if os.access(path, os.R_OK):
relpath = os.path.relpath(path, full_book_path)
relpath = relpath.replace(os.sep, '/')
if relpath not in known_files:
try:
src = open(path, 'rb')
except OSError:
if iswindows:
time.sleep(1)
src = open(path, 'rb')
with src:
yield relpath, src, os.path.getmtime(path)
def add_extra_file(self, relpath, stream, book_path):
dest = os.path.abspath(os.path.join(self.library_path, book_path, relpath))
try:
d = open(dest, 'wb')
except OSError:
os.makedirs(os.path.dirname(dest), exist_ok=True)
d = open(dest, 'wb')
with d:
shutil.copyfileobj(stream, d)
def write_backup(self, path, raw):
path = os.path.abspath(os.path.join(self.library_path, path, 'metadata.opf'))
path = os.path.abspath(os.path.join(self.library_path, path, METADATA_FILE_NAME))
try:
with open(path, 'wb') as f:
f.write(raw)
@ -1904,7 +1943,7 @@ class DB:
f.write(raw)
def read_backup(self, path):
path = os.path.abspath(os.path.join(self.library_path, path, 'metadata.opf'))
path = os.path.abspath(os.path.join(self.library_path, path, METADATA_FILE_NAME))
with open(path, 'rb') as f:
return f.read()
@ -1972,7 +2011,7 @@ class DB:
mi, _, annotations = read_opf(bdir, read_annotations=read_annotations)
formats = []
for x in os.scandir(bdir):
if x.is_file() and x.name not in (COVER_FILE_NAME, 'metadata.opf') and '.' in x.name:
if x.is_file() and x.name not in (COVER_FILE_NAME, METADATA_FILE_NAME) and '.' in x.name:
try:
size = x.stat(follow_symlinks=False).st_size
except OSError:
@ -2016,7 +2055,7 @@ class DB:
mtime = x.stat(follow_symlinks=False).st_mtime
except Exception:
continue
opf = OPF(os.path.join(x.path, 'metadata.opf'), basedir=x.path)
opf = OPF(os.path.join(x.path, METADATA_FILE_NAME), basedir=x.path)
books.append(TrashEntry(book_id, opf.title or unknown, (opf.authors or au)[0], os.path.join(x.path, COVER_FILE_NAME), mtime))
base = os.path.join(self.trash_dir, 'f')
um = {'title': unknown, 'authors': au}

View File

@ -2927,7 +2927,8 @@ class Cache:
os.remove(pt.name)
format_metadata = {}
metadata = {'format_data':format_metadata, 'metadata.db':dbkey, 'total':total}
extra_files = {}
metadata = {'format_data':format_metadata, 'metadata.db':dbkey, 'total':total, 'extra_files': extra_files}
if has_fts:
metadata['full-text-search.db'] = ftsdbkey
for i, book_id in enumerate(book_ids):
@ -2935,11 +2936,11 @@ class Cache:
return
if progress is not None:
progress(self._field_for('title', book_id), i + poff, total)
format_metadata[book_id] = {}
format_metadata[book_id] = fm = {}
for fmt in self._formats(book_id):
mdata = self.format_metadata(book_id, fmt)
key = f'{key_prefix}:{book_id}:{fmt}'
format_metadata[book_id][fmt] = key
fm[fmt] = key
with exporter.start_file(key, mtime=mdata.get('mtime')) as dest:
self._copy_format_to(book_id, fmt, dest, report_file_size=dest.ensure_space)
cover_key = '{}:{}:{}'.format(key_prefix, book_id, '.cover')
@ -2947,7 +2948,15 @@ class Cache:
if not self.copy_cover_to(book_id, dest, report_file_size=dest.ensure_space):
dest.discard()
else:
format_metadata[book_id]['.cover'] = cover_key
fm['.cover'] = cover_key
bp = self.field_for('path', book_id)
extra_files[book_id] = ef = {}
if bp:
for (relpath, fobj, mtime) in self.backend.iter_extra_files(book_id, bp, self.fields['formats']):
key = f'{key_prefix}:{book_id}:.|{relpath}'
with exporter.start_file(key, mtime=mtime) as dest:
shutil.copyfileobj(fobj, dest)
ef[relpath] = key
exporter.set_metadata(library_key, metadata)
if progress is not None:
progress(_('Completed'), total, total)
@ -3067,6 +3076,7 @@ def import_library(library_key, importer, library_path, progress=None, abort=Non
cache = Cache(DB(library_path, load_user_formatter_functions=False))
cache.init()
format_data = {int(book_id):data for book_id, data in iteritems(metadata['format_data'])}
extra_files = {int(book_id):data for book_id, data in metadata.get('extra_files', {}).items()}
for i, (book_id, fmt_key_map) in enumerate(iteritems(format_data)):
if abort is not None and abort.is_set():
return
@ -3084,6 +3094,10 @@ def import_library(library_key, importer, library_path, progress=None, abort=Non
size, fname = cache._do_add_format(book_id, fmt, stream, mtime=stream.mtime)
cache.fields['formats'].table.update_fmt(book_id, fmt, fname, size, cache.backend)
stream.close()
for relpath, efkey in extra_files.get(book_id, {}).items():
stream = importer.start_file(efkey, _('{0} for {1}').format(relpath, title))
path = cache._field_for('path', book_id).replace('/', os.sep)
cache.backend.add_extra_file(relpath, stream, path)
cache.dump_metadata({book_id})
if progress is not None:
progress(_('Completed'), total, total)

View File

@ -215,6 +215,12 @@ class FilesystemTest(BaseTest):
from calibre.db.cache import import_library
from calibre.utils.exim import Exporter, Importer
cache = self.init_cache()
bookdir = os.path.dirname(cache.format_abspath(1, '__COVER_INTERNAL__'))
with open(os.path.join(bookdir, 'exf'), 'w') as f:
f.write('exf')
os.mkdir(os.path.join(bookdir, 'sub'))
with open(os.path.join(bookdir, 'sub', 'recurse'), 'w') as f:
f.write('recurse')
for part_size in (1 << 30, 100, 1):
with TemporaryDirectory('export_lib') as tdir, TemporaryDirectory('import_lib') as idir:
exporter = Exporter(tdir, part_size=part_size)
@ -228,6 +234,9 @@ class FilesystemTest(BaseTest):
for fmt in cache.formats(book_id):
self.assertEqual(cache.format(book_id, fmt), ic.format(book_id, fmt))
self.assertEqual(cache.format_metadata(book_id, fmt)['mtime'], cache.format_metadata(book_id, fmt)['mtime'])
bookdir = os.path.dirname(ic.format_abspath(1, '__COVER_INTERNAL__'))
self.assertEqual('exf', open(os.path.join(bookdir, 'exf')).read())
self.assertEqual('recurse', open(os.path.join(bookdir, 'sub', 'recurse')).read())
cache.add_format(1, 'TXT', BytesIO(b'testing exim'))
cache.fts_indexing_sleep_time = 0.001
cache.enable_fts()