Code to import a previously exported library

This commit is contained in:
Kovid Goyal 2015-12-14 13:01:27 +05:30
parent b7666befd2
commit ebadee8a5e
4 changed files with 152 additions and 28 deletions

View File

@ -1354,7 +1354,7 @@ class DB(object):
with f: with f:
return True, f.read(), stat.st_mtime return True, f.read(), stat.st_mtime
def set_cover(self, book_id, path, data): def set_cover(self, book_id, path, data, no_processing=False):
path = os.path.abspath(os.path.join(self.library_path, path)) path = os.path.abspath(os.path.join(self.library_path, path))
if not os.path.exists(path): if not os.path.exists(path):
os.makedirs(path) os.makedirs(path)
@ -1371,6 +1371,10 @@ class DB(object):
except (IOError, OSError): except (IOError, OSError):
time.sleep(0.2) time.sleep(0.2)
os.remove(path) os.remove(path)
else:
if no_processing:
with open(path, 'wb') as f:
f.write(data)
else: else:
try: try:
save_cover_data_to(data, path) save_cover_data_to(data, path)

View File

@ -1320,6 +1320,24 @@ class Cache(object):
self._reload_from_db() self._reload_from_db()
raise raise
def _do_add_format(self, book_id, fmt, stream, name=None):
path = self._field_for('path', book_id)
if path is None:
# Theoretically, this should never happen, but apparently it
# does: http://www.mobileread.com/forums/showthread.php?t=233353
self._update_path({book_id}, mark_as_dirtied=False)
path = self._field_for('path', book_id)
path = path.replace('/', os.sep)
title = self._field_for('title', book_id, default_value=_('Unknown'))
try:
author = self._field_for('authors', book_id, default_value=(_('Unknown'),))[0]
except IndexError:
author = _('Unknown')
size, fname = self.backend.add_format(book_id, fmt, stream, title, author, path, name)
return size, fname
@api @api
def add_format(self, book_id, fmt, stream_or_path, replace=True, run_hooks=True, dbapi=None): def add_format(self, book_id, fmt, stream_or_path, replace=True, run_hooks=True, dbapi=None):
''' '''
@ -1343,28 +1361,14 @@ class Cache(object):
self.format_metadata_cache[book_id].pop(fmt, None) self.format_metadata_cache[book_id].pop(fmt, None)
try: try:
name = self.fields['formats'].format_fname(book_id, fmt) name = self.fields['formats'].format_fname(book_id, fmt)
except: except Exception:
name = None name = None
if name and not replace: if name and not replace:
return False return False
path = self._field_for('path', book_id)
if path is None:
# Theoretically, this should never happen, but apparently it
# does: http://www.mobileread.com/forums/showthread.php?t=233353
self._update_path({book_id}, mark_as_dirtied=False)
path = self._field_for('path', book_id)
path = path.replace('/', os.sep)
title = self._field_for('title', book_id, default_value=_('Unknown'))
try:
author = self._field_for('authors', book_id, default_value=(_('Unknown'),))[0]
except IndexError:
author = _('Unknown')
stream = stream_or_path if hasattr(stream_or_path, 'read') else lopen(stream_or_path, 'rb') stream = stream_or_path if hasattr(stream_or_path, 'read') else lopen(stream_or_path, 'rb')
size, fname = self._do_add_format(book_id, fmt, stream, name)
size, fname = self.backend.add_format(book_id, fmt, stream, title, author, path, name)
del stream del stream
max_size = self.fields['formats'].table.update_fmt(book_id, fmt, fname, size, self.backend) max_size = self.fields['formats'].table.update_fmt(book_id, fmt, fname, size, self.backend)
@ -2112,7 +2116,7 @@ class Cache(object):
with lopen(pt.name, 'rb') as f: with lopen(pt.name, 'rb') as f:
exporter.add_file(f, dbkey) exporter.add_file(f, dbkey)
os.remove(pt.name) os.remove(pt.name)
metadata = {'format_data':format_metadata, 'metadata.db':dbkey} metadata = {'format_data':format_metadata, 'metadata.db':dbkey, 'total':total}
for i, book_id in enumerate(book_ids): for i, book_id in enumerate(book_ids):
if progress is not None: if progress is not None:
progress(self._field_for('title', book_id), i + 1, total) progress(self._field_for('title', book_id), i + 1, total)
@ -2126,7 +2130,43 @@ class Cache(object):
with exporter.start_file(cover_key) as dest: with exporter.start_file(cover_key) as dest:
if not self.copy_cover_to(book_id, dest, report_file_size=dest.ensure_space): if not self.copy_cover_to(book_id, dest, report_file_size=dest.ensure_space):
dest.discard() dest.discard()
else:
format_metadata[book_id]['.cover'] = cover_key
exporter.set_metadata(library_key, metadata) exporter.set_metadata(library_key, metadata)
exporter.commit()
if progress is not None: if progress is not None:
progress(_('Completed'), total, total) progress(_('Completed'), total, total)
def import_library(library_key, importer, library_path, progress=None):
from calibre.db.backend import DB
metadata = importer.metadata[library_key]
total = metadata['total']
if progress is not None:
progress('metadata.db', 0, total)
with open(os.path.join(library_path, 'metadata.db'), 'wb') as f:
src = importer.start_file(metadata['metadata.db'], 'metadata.db for ' + library_path)
shutil.copyfileobj(src, f)
src.close()
cache = Cache(DB(library_path, load_user_formatter_functions=False))
cache.init()
format_data = {int(book_id):data for book_id, data in metadata['format_data'].iteritems()}
cache._update_path(set(format_data), mark_as_dirtied=False)
for i, (book_id, fmt_key_map) in enumerate(format_data.iteritems()):
title = cache._field_for('title', book_id)
if progress is not None:
progress(title, i + 1, total)
for fmt, fmtkey in fmt_key_map.iteritems():
if fmt == '.cover':
stream = importer.start_file(fmtkey, _('Cover for %s') % title)
path = cache._field_for('path', book_id).replace('/', os.sep)
cache.backend.set_cover(book_id, path, stream, no_processing=True)
else:
stream = importer.start_file(fmtkey, _('{0} format for {1}').format(fmt.upper(), title))
size, fname = cache._do_add_format(book_id, fmt, stream)
cache.fields['formats'].table.update_fmt(book_id, fmt, fname, size, cache.backend)
stream.close()
cache.dump_metadata({book_id})
if progress is not None:
progress(_('Completed'), total, total)
return cache
# }}} # }}}

View File

@ -145,9 +145,17 @@ class FilesystemTest(BaseTest):
self.assertEqual(sorted([os.path.basename(fpath)]), sorted(os.listdir(os.path.dirname(fpath)))) self.assertEqual(sorted([os.path.basename(fpath)]), sorted(os.listdir(os.path.dirname(fpath))))
def test_export_import(self): def test_export_import(self):
from calibre.utils.exim import Exporter from calibre.db.cache import import_library
from calibre.utils.exim import Exporter, Importer
cache = self.init_cache() cache = self.init_cache()
for part_size in (1024, 100, 1): for part_size in (1 << 30, 100, 1):
with TemporaryDirectory('export_lib') as tdir: with TemporaryDirectory('export_lib') as tdir, TemporaryDirectory('import_lib') as idir:
exporter = Exporter(tdir, part_size=part_size) exporter = Exporter(tdir, part_size=part_size)
cache.export_library('l', exporter) cache.export_library('l', exporter)
importer = Importer(tdir)
ic = import_library('l', importer, idir)
self.assertEqual(cache.all_book_ids(), ic.all_book_ids())
for book_id in cache.all_book_ids():
self.assertEqual(cache.cover(book_id), ic.cover(book_id), 'Covers not identical for book: %d' % book_id)
for fmt in cache.formats(book_id):
self.assertEqual(cache.format(book_id, fmt), ic.format(book_id, fmt))

View File

@ -58,6 +58,7 @@ class Exporter(object):
VERSION = 1 VERSION = 1
TAIL_FMT = b'!II?' # part_num, version, is_last TAIL_FMT = b'!II?' # part_num, version, is_last
MDATA_SZ_FMT = b'!Q' MDATA_SZ_FMT = b'!Q'
EXT = '.calibre-data'
def __init__(self, path_to_export_dir, part_size=(1 << 30)): def __init__(self, path_to_export_dir, part_size=(1 << 30)):
self.part_size = part_size self.part_size = part_size
@ -78,7 +79,7 @@ class Exporter(object):
def new_part(self): def new_part(self):
self.parts.append(open(os.path.join( self.parts.append(open(os.path.join(
self.base, 'part-{:04d}.calibre-data'.format(len(self.parts) + 1)), 'wb')) self.base, 'part-{:04d}{}'.format(len(self.parts) + 1, self.EXT)), 'wb'))
def commit_part(self, is_last=False): def commit_part(self, is_last=False):
self.f.write(struct.pack(self.TAIL_FMT, len(self.parts), self.VERSION, is_last)) self.f.write(struct.pack(self.TAIL_FMT, len(self.parts), self.VERSION, is_last))
@ -112,3 +113,74 @@ class Exporter(object):
def start_file(self, key): def start_file(self, key):
return FileDest(key, self) return FileDest(key, self)
class FileSource(object):
def __init__(self, f, size, digest, description, importer):
self.f, self.size, self.digest, self.description = f, size, digest, description
self.end = f.tell() + size
self.hasher = hashlib.sha1()
self.importer = importer
def read(self, size=None):
if size is not None and size < 1:
return b''
left = self.end - self.f.tell()
amt = min(left, size or left)
if amt < 1:
return b''
ans = self.f.read(amt)
self.hasher.update(ans)
return ans
def close(self):
if self.hasher.hexdigest() != self.digest:
self.importer.corrupted_files.append(self.description)
self.hasher = self.f = None
class Importer(object):
def __init__(self, path_to_export_dir):
self.corrupted_files = []
part_map = {}
tail_size = struct.calcsize(Exporter.TAIL_FMT)
for name in os.listdir(path_to_export_dir):
if name.lower().endswith(Exporter.EXT):
path = os.path.join(path_to_export_dir, name)
with open(path, 'rb') as f:
f.seek(-tail_size, os.SEEK_END)
raw = f.read()
if len(raw) != tail_size:
raise ValueError('The exported data in %s is not valid, tail too small' % name)
part_num, version, is_last = struct.unpack(Exporter.TAIL_FMT, raw)
if version > Exporter.VERSION:
raise ValueError('The exported data in %s is not valid, version (%d) is higher than maximum supported version.' % (
name, version))
part_map[part_num] = path, is_last
nums = sorted(part_map)
if not nums:
raise ValueError('No exported data found in: %s' % path_to_export_dir)
if nums[0] != 1:
raise ValueError('The first part of this exported data set is missing')
if not part_map[nums[-1]][1]:
raise ValueError('The last part of this exported data set is missing')
if len(nums) != nums[-1]:
raise ValueError('There are some parts of the exported data set missing')
self.part_map = {num:path for num, (path, is_last) in part_map.iteritems()}
msf = struct.calcsize(Exporter.MDATA_SZ_FMT)
offset = tail_size + msf
with self.part(nums[-1]) as f:
f.seek(-offset, os.SEEK_END)
sz, = struct.unpack(Exporter.MDATA_SZ_FMT, f.read(msf))
f.seek(- sz - offset, os.SEEK_END)
self.metadata = json.loads(f.read(sz))
self.file_metadata = self.metadata['file_metadata']
def part(self, num):
return lopen(self.part_map[num], 'rb')
def start_file(self, key, description):
partnum, pos, size, digest = self.file_metadata[key]
f = self.part(partnum)
f.seek(pos)
return FileSource(f, size, digest, description, self)