mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Code to import a previously exported library
This commit is contained in:
parent
b7666befd2
commit
ebadee8a5e
@ -1354,7 +1354,7 @@ class DB(object):
|
||||
with f:
|
||||
return True, f.read(), stat.st_mtime
|
||||
|
||||
def set_cover(self, book_id, path, data):
|
||||
def set_cover(self, book_id, path, data, no_processing=False):
|
||||
path = os.path.abspath(os.path.join(self.library_path, path))
|
||||
if not os.path.exists(path):
|
||||
os.makedirs(path)
|
||||
@ -1371,6 +1371,10 @@ class DB(object):
|
||||
except (IOError, OSError):
|
||||
time.sleep(0.2)
|
||||
os.remove(path)
|
||||
else:
|
||||
if no_processing:
|
||||
with open(path, 'wb') as f:
|
||||
f.write(data)
|
||||
else:
|
||||
try:
|
||||
save_cover_data_to(data, path)
|
||||
|
@ -1320,6 +1320,24 @@ class Cache(object):
|
||||
self._reload_from_db()
|
||||
raise
|
||||
|
||||
def _do_add_format(self, book_id, fmt, stream, name=None):
|
||||
path = self._field_for('path', book_id)
|
||||
if path is None:
|
||||
# Theoretically, this should never happen, but apparently it
|
||||
# does: http://www.mobileread.com/forums/showthread.php?t=233353
|
||||
self._update_path({book_id}, mark_as_dirtied=False)
|
||||
path = self._field_for('path', book_id)
|
||||
|
||||
path = path.replace('/', os.sep)
|
||||
title = self._field_for('title', book_id, default_value=_('Unknown'))
|
||||
try:
|
||||
author = self._field_for('authors', book_id, default_value=(_('Unknown'),))[0]
|
||||
except IndexError:
|
||||
author = _('Unknown')
|
||||
|
||||
size, fname = self.backend.add_format(book_id, fmt, stream, title, author, path, name)
|
||||
return size, fname
|
||||
|
||||
@api
|
||||
def add_format(self, book_id, fmt, stream_or_path, replace=True, run_hooks=True, dbapi=None):
|
||||
'''
|
||||
@ -1343,28 +1361,14 @@ class Cache(object):
|
||||
self.format_metadata_cache[book_id].pop(fmt, None)
|
||||
try:
|
||||
name = self.fields['formats'].format_fname(book_id, fmt)
|
||||
except:
|
||||
except Exception:
|
||||
name = None
|
||||
|
||||
if name and not replace:
|
||||
return False
|
||||
|
||||
path = self._field_for('path', book_id)
|
||||
if path is None:
|
||||
# Theoretically, this should never happen, but apparently it
|
||||
# does: http://www.mobileread.com/forums/showthread.php?t=233353
|
||||
self._update_path({book_id}, mark_as_dirtied=False)
|
||||
path = self._field_for('path', book_id)
|
||||
|
||||
path = path.replace('/', os.sep)
|
||||
title = self._field_for('title', book_id, default_value=_('Unknown'))
|
||||
try:
|
||||
author = self._field_for('authors', book_id, default_value=(_('Unknown'),))[0]
|
||||
except IndexError:
|
||||
author = _('Unknown')
|
||||
stream = stream_or_path if hasattr(stream_or_path, 'read') else lopen(stream_or_path, 'rb')
|
||||
|
||||
size, fname = self.backend.add_format(book_id, fmt, stream, title, author, path, name)
|
||||
size, fname = self._do_add_format(book_id, fmt, stream, name)
|
||||
del stream
|
||||
|
||||
max_size = self.fields['formats'].table.update_fmt(book_id, fmt, fname, size, self.backend)
|
||||
@ -2112,7 +2116,7 @@ class Cache(object):
|
||||
with lopen(pt.name, 'rb') as f:
|
||||
exporter.add_file(f, dbkey)
|
||||
os.remove(pt.name)
|
||||
metadata = {'format_data':format_metadata, 'metadata.db':dbkey}
|
||||
metadata = {'format_data':format_metadata, 'metadata.db':dbkey, 'total':total}
|
||||
for i, book_id in enumerate(book_ids):
|
||||
if progress is not None:
|
||||
progress(self._field_for('title', book_id), i + 1, total)
|
||||
@ -2126,7 +2130,43 @@ class Cache(object):
|
||||
with exporter.start_file(cover_key) as dest:
|
||||
if not self.copy_cover_to(book_id, dest, report_file_size=dest.ensure_space):
|
||||
dest.discard()
|
||||
else:
|
||||
format_metadata[book_id]['.cover'] = cover_key
|
||||
exporter.set_metadata(library_key, metadata)
|
||||
exporter.commit()
|
||||
if progress is not None:
|
||||
progress(_('Completed'), total, total)
|
||||
|
||||
def import_library(library_key, importer, library_path, progress=None):
|
||||
from calibre.db.backend import DB
|
||||
metadata = importer.metadata[library_key]
|
||||
total = metadata['total']
|
||||
if progress is not None:
|
||||
progress('metadata.db', 0, total)
|
||||
with open(os.path.join(library_path, 'metadata.db'), 'wb') as f:
|
||||
src = importer.start_file(metadata['metadata.db'], 'metadata.db for ' + library_path)
|
||||
shutil.copyfileobj(src, f)
|
||||
src.close()
|
||||
cache = Cache(DB(library_path, load_user_formatter_functions=False))
|
||||
cache.init()
|
||||
format_data = {int(book_id):data for book_id, data in metadata['format_data'].iteritems()}
|
||||
cache._update_path(set(format_data), mark_as_dirtied=False)
|
||||
for i, (book_id, fmt_key_map) in enumerate(format_data.iteritems()):
|
||||
title = cache._field_for('title', book_id)
|
||||
if progress is not None:
|
||||
progress(title, i + 1, total)
|
||||
for fmt, fmtkey in fmt_key_map.iteritems():
|
||||
if fmt == '.cover':
|
||||
stream = importer.start_file(fmtkey, _('Cover for %s') % title)
|
||||
path = cache._field_for('path', book_id).replace('/', os.sep)
|
||||
cache.backend.set_cover(book_id, path, stream, no_processing=True)
|
||||
else:
|
||||
stream = importer.start_file(fmtkey, _('{0} format for {1}').format(fmt.upper(), title))
|
||||
size, fname = cache._do_add_format(book_id, fmt, stream)
|
||||
cache.fields['formats'].table.update_fmt(book_id, fmt, fname, size, cache.backend)
|
||||
stream.close()
|
||||
cache.dump_metadata({book_id})
|
||||
if progress is not None:
|
||||
progress(_('Completed'), total, total)
|
||||
return cache
|
||||
# }}}
|
||||
|
@ -145,9 +145,17 @@ class FilesystemTest(BaseTest):
|
||||
self.assertEqual(sorted([os.path.basename(fpath)]), sorted(os.listdir(os.path.dirname(fpath))))
|
||||
|
||||
def test_export_import(self):
|
||||
from calibre.utils.exim import Exporter
|
||||
from calibre.db.cache import import_library
|
||||
from calibre.utils.exim import Exporter, Importer
|
||||
cache = self.init_cache()
|
||||
for part_size in (1024, 100, 1):
|
||||
with TemporaryDirectory('export_lib') as tdir:
|
||||
for part_size in (1 << 30, 100, 1):
|
||||
with TemporaryDirectory('export_lib') as tdir, TemporaryDirectory('import_lib') as idir:
|
||||
exporter = Exporter(tdir, part_size=part_size)
|
||||
cache.export_library('l', exporter)
|
||||
importer = Importer(tdir)
|
||||
ic = import_library('l', importer, idir)
|
||||
self.assertEqual(cache.all_book_ids(), ic.all_book_ids())
|
||||
for book_id in cache.all_book_ids():
|
||||
self.assertEqual(cache.cover(book_id), ic.cover(book_id), 'Covers not identical for book: %d' % book_id)
|
||||
for fmt in cache.formats(book_id):
|
||||
self.assertEqual(cache.format(book_id, fmt), ic.format(book_id, fmt))
|
||||
|
@ -58,6 +58,7 @@ class Exporter(object):
|
||||
VERSION = 1
|
||||
TAIL_FMT = b'!II?' # part_num, version, is_last
|
||||
MDATA_SZ_FMT = b'!Q'
|
||||
EXT = '.calibre-data'
|
||||
|
||||
def __init__(self, path_to_export_dir, part_size=(1 << 30)):
|
||||
self.part_size = part_size
|
||||
@ -78,7 +79,7 @@ class Exporter(object):
|
||||
|
||||
def new_part(self):
|
||||
self.parts.append(open(os.path.join(
|
||||
self.base, 'part-{:04d}.calibre-data'.format(len(self.parts) + 1)), 'wb'))
|
||||
self.base, 'part-{:04d}{}'.format(len(self.parts) + 1, self.EXT)), 'wb'))
|
||||
|
||||
def commit_part(self, is_last=False):
|
||||
self.f.write(struct.pack(self.TAIL_FMT, len(self.parts), self.VERSION, is_last))
|
||||
@ -112,3 +113,74 @@ class Exporter(object):
|
||||
|
||||
def start_file(self, key):
|
||||
return FileDest(key, self)
|
||||
|
||||
class FileSource(object):
|
||||
|
||||
def __init__(self, f, size, digest, description, importer):
|
||||
self.f, self.size, self.digest, self.description = f, size, digest, description
|
||||
self.end = f.tell() + size
|
||||
self.hasher = hashlib.sha1()
|
||||
self.importer = importer
|
||||
|
||||
def read(self, size=None):
|
||||
if size is not None and size < 1:
|
||||
return b''
|
||||
left = self.end - self.f.tell()
|
||||
amt = min(left, size or left)
|
||||
if amt < 1:
|
||||
return b''
|
||||
ans = self.f.read(amt)
|
||||
self.hasher.update(ans)
|
||||
return ans
|
||||
|
||||
def close(self):
|
||||
if self.hasher.hexdigest() != self.digest:
|
||||
self.importer.corrupted_files.append(self.description)
|
||||
self.hasher = self.f = None
|
||||
|
||||
class Importer(object):
|
||||
|
||||
def __init__(self, path_to_export_dir):
|
||||
self.corrupted_files = []
|
||||
part_map = {}
|
||||
tail_size = struct.calcsize(Exporter.TAIL_FMT)
|
||||
for name in os.listdir(path_to_export_dir):
|
||||
if name.lower().endswith(Exporter.EXT):
|
||||
path = os.path.join(path_to_export_dir, name)
|
||||
with open(path, 'rb') as f:
|
||||
f.seek(-tail_size, os.SEEK_END)
|
||||
raw = f.read()
|
||||
if len(raw) != tail_size:
|
||||
raise ValueError('The exported data in %s is not valid, tail too small' % name)
|
||||
part_num, version, is_last = struct.unpack(Exporter.TAIL_FMT, raw)
|
||||
if version > Exporter.VERSION:
|
||||
raise ValueError('The exported data in %s is not valid, version (%d) is higher than maximum supported version.' % (
|
||||
name, version))
|
||||
part_map[part_num] = path, is_last
|
||||
nums = sorted(part_map)
|
||||
if not nums:
|
||||
raise ValueError('No exported data found in: %s' % path_to_export_dir)
|
||||
if nums[0] != 1:
|
||||
raise ValueError('The first part of this exported data set is missing')
|
||||
if not part_map[nums[-1]][1]:
|
||||
raise ValueError('The last part of this exported data set is missing')
|
||||
if len(nums) != nums[-1]:
|
||||
raise ValueError('There are some parts of the exported data set missing')
|
||||
self.part_map = {num:path for num, (path, is_last) in part_map.iteritems()}
|
||||
msf = struct.calcsize(Exporter.MDATA_SZ_FMT)
|
||||
offset = tail_size + msf
|
||||
with self.part(nums[-1]) as f:
|
||||
f.seek(-offset, os.SEEK_END)
|
||||
sz, = struct.unpack(Exporter.MDATA_SZ_FMT, f.read(msf))
|
||||
f.seek(- sz - offset, os.SEEK_END)
|
||||
self.metadata = json.loads(f.read(sz))
|
||||
self.file_metadata = self.metadata['file_metadata']
|
||||
|
||||
def part(self, num):
|
||||
return lopen(self.part_map[num], 'rb')
|
||||
|
||||
def start_file(self, key, description):
|
||||
partnum, pos, size, digest = self.file_metadata[key]
|
||||
f = self.part(partnum)
|
||||
f.seek(pos)
|
||||
return FileSource(f, size, digest, description, self)
|
||||
|
Loading…
x
Reference in New Issue
Block a user