Code to import a previously exported library

2025-07-09 03:04:10 -04:00 · 2015-12-14 13:01:27 +05:30 · 2015-12-14 13:01:27 +05:30 · ebadee8a5e
commit ebadee8a5e
parent b7666befd2
4 changed files with 152 additions and 28 deletions
--- a/src/calibre/db/backend.py
+++ b/src/calibre/db/backend.py
@ -1354,7 +1354,7 @@ class DB(object):
        with f:
            return True, f.read(), stat.st_mtime
-    def set_cover(self, book_id, path, data):
+    def set_cover(self, book_id, path, data, no_processing=False):
        path = os.path.abspath(os.path.join(self.library_path, path))
        if not os.path.exists(path):
            os.makedirs(path)
@ -1371,6 +1371,10 @@ class DB(object):
                except (IOError, OSError):
                    time.sleep(0.2)
                    os.remove(path)
        else:
            if no_processing:
                with open(path, 'wb') as f:
                    f.write(data)
            else:
                try:
                    save_cover_data_to(data, path)
--- a/src/calibre/db/cache.py
+++ b/src/calibre/db/cache.py
@ -1320,6 +1320,24 @@ class Cache(object):
            self._reload_from_db()
            raise
    def _do_add_format(self, book_id, fmt, stream, name=None):
        path = self._field_for('path', book_id)
        if path is None:
            # Theoretically, this should never happen, but apparently it
            # does: http://www.mobileread.com/forums/showthread.php?t=233353
            self._update_path({book_id}, mark_as_dirtied=False)
            path = self._field_for('path', book_id)
        path = path.replace('/', os.sep)
        title = self._field_for('title', book_id, default_value=_('Unknown'))
        try:
            author = self._field_for('authors', book_id, default_value=(_('Unknown'),))[0]
        except IndexError:
            author = _('Unknown')
        size, fname = self.backend.add_format(book_id, fmt, stream, title, author, path, name)
        return size, fname
    @api
    def add_format(self, book_id, fmt, stream_or_path, replace=True, run_hooks=True, dbapi=None):
        '''
@ -1343,28 +1361,14 @@ class Cache(object):
            self.format_metadata_cache[book_id].pop(fmt, None)
            try:
                name = self.fields['formats'].format_fname(book_id, fmt)
-            except:
+            except Exception:
                name = None
            if name and not replace:
                return False
            path = self._field_for('path', book_id)
            if path is None:
                # Theoretically, this should never happen, but apparently it
                # does: http://www.mobileread.com/forums/showthread.php?t=233353
                self._update_path({book_id}, mark_as_dirtied=False)
                path = self._field_for('path', book_id)
            path = path.replace('/', os.sep)
            title = self._field_for('title', book_id, default_value=_('Unknown'))
            try:
                author = self._field_for('authors', book_id, default_value=(_('Unknown'),))[0]
            except IndexError:
                author = _('Unknown')
            stream = stream_or_path if hasattr(stream_or_path, 'read') else lopen(stream_or_path, 'rb')
-
+            size, fname = self._do_add_format(book_id, fmt, stream, name)
            size, fname = self.backend.add_format(book_id, fmt, stream, title, author, path, name)
            del stream
            max_size = self.fields['formats'].table.update_fmt(book_id, fmt, fname, size, self.backend)
@ -2112,7 +2116,7 @@ class Cache(object):
        with lopen(pt.name, 'rb') as f:
            exporter.add_file(f, dbkey)
        os.remove(pt.name)
-        metadata = {'format_data':format_metadata, 'metadata.db':dbkey}
+        metadata = {'format_data':format_metadata, 'metadata.db':dbkey, 'total':total}
        for i, book_id in enumerate(book_ids):
            if progress is not None:
                progress(self._field_for('title', book_id), i + 1, total)
@ -2126,7 +2130,43 @@ class Cache(object):
            with exporter.start_file(cover_key) as dest:
                if not self.copy_cover_to(book_id, dest, report_file_size=dest.ensure_space):
                    dest.discard()
                else:
                    format_metadata[book_id]['.cover'] = cover_key
        exporter.set_metadata(library_key, metadata)
        exporter.commit()
        if progress is not None:
            progress(_('Completed'), total, total)
 def import_library(library_key, importer, library_path, progress=None):
    from calibre.db.backend import DB
    metadata = importer.metadata[library_key]
    total = metadata['total']
    if progress is not None:
        progress('metadata.db', 0, total)
    with open(os.path.join(library_path, 'metadata.db'), 'wb') as f:
        src = importer.start_file(metadata['metadata.db'], 'metadata.db for ' + library_path)
        shutil.copyfileobj(src, f)
        src.close()
    cache = Cache(DB(library_path, load_user_formatter_functions=False))
    cache.init()
    format_data = {int(book_id):data for book_id, data in metadata['format_data'].iteritems()}
    cache._update_path(set(format_data), mark_as_dirtied=False)
    for i, (book_id, fmt_key_map) in enumerate(format_data.iteritems()):
        title = cache._field_for('title', book_id)
        if progress is not None:
            progress(title, i + 1, total)
        for fmt, fmtkey in fmt_key_map.iteritems():
            if fmt == '.cover':
                stream = importer.start_file(fmtkey, _('Cover for %s') % title)
                path = cache._field_for('path', book_id).replace('/', os.sep)
                cache.backend.set_cover(book_id, path, stream, no_processing=True)
            else:
                stream = importer.start_file(fmtkey, _('{0} format for {1}').format(fmt.upper(), title))
                size, fname = cache._do_add_format(book_id, fmt, stream)
                cache.fields['formats'].table.update_fmt(book_id, fmt, fname, size, cache.backend)
            stream.close()
        cache.dump_metadata({book_id})
    if progress is not None:
        progress(_('Completed'), total, total)
    return cache
 # }}}
--- a/src/calibre/db/tests/filesystem.py
+++ b/src/calibre/db/tests/filesystem.py
@ -145,9 +145,17 @@ class FilesystemTest(BaseTest):
        self.assertEqual(sorted([os.path.basename(fpath)]), sorted(os.listdir(os.path.dirname(fpath))))
    def test_export_import(self):
-        from calibre.utils.exim import Exporter
+        from calibre.db.cache import import_library
        from calibre.utils.exim import Exporter, Importer
        cache = self.init_cache()
-        for part_size in (1024, 100, 1):
+        for part_size in (1 << 30, 100, 1):
-            with TemporaryDirectory('export_lib') as tdir:
+            with TemporaryDirectory('export_lib') as tdir, TemporaryDirectory('import_lib') as idir:
                exporter = Exporter(tdir, part_size=part_size)
                cache.export_library('l', exporter)
                importer = Importer(tdir)
                ic = import_library('l', importer, idir)
                self.assertEqual(cache.all_book_ids(), ic.all_book_ids())
                for book_id in cache.all_book_ids():
                    self.assertEqual(cache.cover(book_id), ic.cover(book_id), 'Covers not identical for book: %d' % book_id)
                    for fmt in cache.formats(book_id):
                        self.assertEqual(cache.format(book_id, fmt), ic.format(book_id, fmt))
--- a/src/calibre/utils/exim.py
+++ b/src/calibre/utils/exim.py
@ -58,6 +58,7 @@ class Exporter(object):
    VERSION = 1
    TAIL_FMT = b'!II?'  # part_num, version, is_last
    MDATA_SZ_FMT = b'!Q'
    EXT = '.calibre-data'
    def __init__(self, path_to_export_dir, part_size=(1 << 30)):
        self.part_size = part_size
@ -78,7 +79,7 @@ class Exporter(object):
    def new_part(self):
        self.parts.append(open(os.path.join(
-            self.base, 'part-{:04d}.calibre-data'.format(len(self.parts) + 1)), 'wb'))
+            self.base, 'part-{:04d}{}'.format(len(self.parts) + 1, self.EXT)), 'wb'))
    def commit_part(self, is_last=False):
        self.f.write(struct.pack(self.TAIL_FMT, len(self.parts), self.VERSION, is_last))
@ -112,3 +113,74 @@ class Exporter(object):
    def start_file(self, key):
        return FileDest(key, self)
 class FileSource(object):
    def __init__(self, f, size, digest, description, importer):
        self.f, self.size, self.digest, self.description = f, size, digest, description
        self.end = f.tell() + size
        self.hasher = hashlib.sha1()
        self.importer = importer
    def read(self, size=None):
        if size is not None and size < 1:
            return b''
        left = self.end - self.f.tell()
        amt = min(left, size or left)
        if amt < 1:
            return b''
        ans = self.f.read(amt)
        self.hasher.update(ans)
        return ans
    def close(self):
        if self.hasher.hexdigest() != self.digest:
            self.importer.corrupted_files.append(self.description)
        self.hasher = self.f = None
 class Importer(object):
    def __init__(self, path_to_export_dir):
        self.corrupted_files = []
        part_map = {}
        tail_size = struct.calcsize(Exporter.TAIL_FMT)
        for name in os.listdir(path_to_export_dir):
            if name.lower().endswith(Exporter.EXT):
                path = os.path.join(path_to_export_dir, name)
                with open(path, 'rb') as f:
                    f.seek(-tail_size, os.SEEK_END)
                    raw = f.read()
                if len(raw) != tail_size:
                    raise ValueError('The exported data in %s is not valid, tail too small' % name)
                part_num, version, is_last = struct.unpack(Exporter.TAIL_FMT, raw)
                if version > Exporter.VERSION:
                    raise ValueError('The exported data in %s is not valid, version (%d) is higher than maximum supported version.' % (
                        name, version))
                part_map[part_num] =  path, is_last
        nums = sorted(part_map)
        if not nums:
            raise ValueError('No exported data found in: %s' % path_to_export_dir)
        if nums[0] != 1:
            raise ValueError('The first part of this exported data set is missing')
        if not part_map[nums[-1]][1]:
            raise ValueError('The last part of this exported data set is missing')
        if len(nums) != nums[-1]:
            raise ValueError('There are some parts of the exported data set missing')
        self.part_map = {num:path for num, (path, is_last) in part_map.iteritems()}
        msf = struct.calcsize(Exporter.MDATA_SZ_FMT)
        offset = tail_size + msf
        with self.part(nums[-1]) as f:
            f.seek(-offset, os.SEEK_END)
            sz, = struct.unpack(Exporter.MDATA_SZ_FMT, f.read(msf))
            f.seek(- sz - offset, os.SEEK_END)
            self.metadata = json.loads(f.read(sz))
            self.file_metadata = self.metadata['file_metadata']
    def part(self, num):
        return lopen(self.part_map[num], 'rb')
    def start_file(self, key, description):
        partnum, pos, size, digest = self.file_metadata[key]
        f = self.part(partnum)
        f.seek(pos)
        return FileSource(f, size, digest, description, self)