From 0db0c10811cf2cf45a6b48dd96966f1d10638ed0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 13 Dec 2015 13:19:57 +0530 Subject: [PATCH] Code to export all data in a calibre library --- src/calibre/db/backend.py | 25 +++++++++++++++-- src/calibre/db/cache.py | 43 ++++++++++++++++++++++++++---- src/calibre/db/tests/filesystem.py | 9 +++++++ 3 files changed, 70 insertions(+), 7 deletions(-) diff --git a/src/calibre/db/backend.py b/src/calibre/db/backend.py index 28be8c18a7..bc0dfcae7a 100644 --- a/src/calibre/db/backend.py +++ b/src/calibre/db/backend.py @@ -1295,7 +1295,7 @@ class DB(object): except EnvironmentError: pass # Cover doesn't exist - def copy_cover_to(self, path, dest, windows_atomic_move=None, use_hardlink=False): + def copy_cover_to(self, path, dest, windows_atomic_move=None, use_hardlink=False, report_file_size=None): path = os.path.abspath(os.path.join(self.library_path, path, 'cover.jpg')) if windows_atomic_move is not None: if not isinstance(dest, basestring): @@ -1318,6 +1318,10 @@ class DB(object): with f: if hasattr(dest, 'write'): + if report_file_size is not None: + f.seek(0, os.SEEK_END) + report_file_size(f.tell()) + f.seek(0) shutil.copyfileobj(f, dest) if hasattr(dest, 'flush'): dest.flush() @@ -1375,7 +1379,7 @@ class DB(object): save_cover_data_to(data, path) def copy_format_to(self, book_id, fmt, fname, path, dest, - windows_atomic_move=None, use_hardlink=False): + windows_atomic_move=None, use_hardlink=False, report_file_size=None): path = self.format_abspath(book_id, fmt, fname, path) if path is None: return False @@ -1396,6 +1400,10 @@ class DB(object): else: if hasattr(dest, 'write'): with lopen(path, 'rb') as f: + if report_file_size is not None: + f.seek(0, os.SEEK_END) + report_file_size(f.tell()) + f.seek(0) shutil.copyfileobj(f, dest) if hasattr(dest, 'flush'): dest.flush() @@ -1723,4 +1731,17 @@ class DB(object): self.execute('UPDATE books SET path=? WHERE id=?', (path.replace(os.sep, '/'), book_id)) vals = [(book_id, fmt, size, name) for fmt, size, name in formats] self.executemany('INSERT INTO data (book,format,uncompressed_size,name) VALUES (?,?,?,?)', vals) + + def backup_database(self, path): + # We have to open a new connection to self.dbpath, until this issue is fixed: + # https://github.com/rogerbinns/apsw/issues/199 + dest_db = apsw.Connection(path) + source = apsw.Connection(self.dbpath) + with dest_db.backup('main', source, 'main') as b: + while not b.done: + b.step(100) + source.close() + dest_db.cursor().execute('DELETE FROM metadata_dirtied; VACUUM;') + dest_db.close() + # }}} diff --git a/src/calibre/db/cache.py b/src/calibre/db/cache.py index e639d406f8..721ba89bc7 100644 --- a/src/calibre/db/cache.py +++ b/src/calibre/db/cache.py @@ -684,7 +684,7 @@ class Cache(object): return self.backend.cover_last_modified(path) @read_api - def copy_cover_to(self, book_id, dest, use_hardlink=False): + def copy_cover_to(self, book_id, dest, use_hardlink=False, report_file_size=None): ''' Copy the cover to the file like object ``dest``. Returns False if no cover exists or dest is the same file as the current cover. @@ -697,11 +697,11 @@ class Cache(object): except AttributeError: return False - return self.backend.copy_cover_to(path, dest, - use_hardlink=use_hardlink) + return self.backend.copy_cover_to(path, dest, use_hardlink=use_hardlink, + report_file_size=report_file_size) @read_api - def copy_format_to(self, book_id, fmt, dest, use_hardlink=False): + def copy_format_to(self, book_id, fmt, dest, use_hardlink=False, report_file_size=None): ''' Copy the format ``fmt`` to the file like object ``dest``. If the specified format does not exist, raises :class:`NoSuchFormat` error. @@ -717,7 +717,7 @@ class Cache(object): raise NoSuchFormat('Record %d has no %s file'%(book_id, fmt)) return self.backend.copy_format_to(book_id, fmt, name, path, dest, - use_hardlink=use_hardlink) + use_hardlink=use_hardlink, report_file_size=report_file_size) @read_api def format_abspath(self, book_id, fmt): @@ -2096,4 +2096,37 @@ class Cache(object): if report_progress is not None: report_progress(i+1, len(book_ids), mi) + @read_api + def export_library(self, library_key, exporter, progress=None): + from binascii import hexlify + key_prefix = hexlify(library_key) + book_ids = self._all_book_ids() + total = len(book_ids) + 1 + format_metadata = {} + if progress is not None: + progress('metadata.db', 0, total) + pt = PersistentTemporaryFile('-export.db') + pt.close() + self.backend.backup_database(pt.name) + dbkey = key_prefix + ':::' + 'metadata.db' + with lopen(pt.name, 'rb') as f: + exporter.add_file(f, dbkey) + os.remove(pt.name) + metadata = {'format_data':format_metadata, 'metadata.db':dbkey} + for i, book_id in enumerate(book_ids): + if progress is not None: + progress(self._field_for('title', book_id), i + 1, total) + format_metadata[book_id] = {} + for fmt in self._formats(book_id): + key = '%s:%s:%s' % (key_prefix, book_id, fmt) + format_metadata[book_id][fmt] = key + with exporter.start_file(key) as dest: + self._copy_format_to(book_id, fmt, dest, report_file_size=dest.ensure_space) + cover_key = '%s:%s:%s' % (key_prefix, book_id, '.cover') + with exporter.start_file(cover_key) as dest: + if not self.copy_cover_to(book_id, dest, report_file_size=dest.ensure_space): + dest.discard() + exporter.set_metadata(library_key, metadata) + if progress is not None: + progress(_('Completed'), total, total) # }}} diff --git a/src/calibre/db/tests/filesystem.py b/src/calibre/db/tests/filesystem.py index 96b28d8a80..50e9ab6798 100644 --- a/src/calibre/db/tests/filesystem.py +++ b/src/calibre/db/tests/filesystem.py @@ -12,6 +12,7 @@ from io import BytesIO from calibre.constants import iswindows from calibre.db.tests.base import BaseTest +from calibre.ptempfile import TemporaryDirectory class FilesystemTest(BaseTest): @@ -142,3 +143,11 @@ class FilesystemTest(BaseTest): cache.set_field('title', {3:title}) fpath = cache.format_abspath(3, 'TXT') self.assertEqual(sorted([os.path.basename(fpath)]), sorted(os.listdir(os.path.dirname(fpath)))) + + def test_export_import(self): + from calibre.utils.exim import Exporter + cache = self.init_cache() + for part_size in (1024, 100, 1): + with TemporaryDirectory('export_lib') as tdir: + exporter = Exporter(tdir, part_size=part_size) + cache.export_library('l', exporter)