diff --git a/src/calibre/db/backend.py b/src/calibre/db/backend.py index 77e5004d6d..e3827b3bf7 100644 --- a/src/calibre/db/backend.py +++ b/src/calibre/db/backend.py @@ -1001,6 +1001,15 @@ class DB: yield from self.notes.search( self.conn, fts_engine_query, use_stemming, highlight_start, highlight_end, snippet_size, restrict_to_fields, return_text, process_each_result) + def export_notes_data(self, outfile): + import zipfile, tempfile + with zipfile.ZipFile(outfile, mode='w') as zf: + with tempfile.NamedTemporaryFile() as dbf: + self.backup_notes_database(dbf.name) + dbf.seek(0) + zf.writestr('notes.db', dbf.read()) + self.notes.export_non_db_data(zf) + def initialize_fts(self, dbref): self.fts = None if not self.prefs['fts_enabled']: @@ -2572,18 +2581,21 @@ class DB: self.conn # Connect to the moved metadata.db progress(_('Completed'), total, total) - def backup_database(self, path): + def _backup_database(self, path, name, extra_sql=''): with closing(apsw.Connection(path)) as dest_db: - with dest_db.backup('main', self.conn, 'main') as b: + with dest_db.backup('main', self.conn, name) as b: while not b.done: with suppress(apsw.BusyError): b.step(128) - dest_db.cursor().execute('DELETE FROM metadata_dirtied; VACUUM;') + if extra_sql: + dest_db.cursor().execute(extra_sql) + + def backup_database(self, path): + self._backup_database(path, 'main', 'DELETE FROM metadata_dirtied; VACUUM;') def backup_fts_database(self, path): - with closing(apsw.Connection(path)) as dest_db: - with dest_db.backup('main', self.conn, 'fts_db') as b: - while not b.done: - with suppress(apsw.BusyError): - b.step(128) + self._backup_database(path, 'fts_db') + + def backup_notes_database(self, path): + self._backup_database(path, 'notes_db') # }}} diff --git a/src/calibre/db/cache.py b/src/calibre/db/cache.py index 38053b759c..5ae304fcff 100644 --- a/src/calibre/db/cache.py +++ b/src/calibre/db/cache.py @@ -15,12 +15,13 @@ import traceback import weakref from collections import defaultdict from collections.abc import MutableSet, Set +from contextlib import closing from functools import partial, wraps from io import DEFAULT_BUFFER_SIZE, BytesIO from queue import Queue from threading import Lock from time import monotonic, sleep, time -from typing import NamedTuple, Tuple, Optional +from typing import NamedTuple, Optional, Tuple from calibre import as_unicode, detect_ncpus, isbytestring from calibre.constants import iswindows, preferred_encoding @@ -31,6 +32,7 @@ from calibre.customize.ui import ( from calibre.db import SPOOL_SIZE, _get_next_series_num_for_list from calibre.db.annotations import merge_annotations from calibre.db.categories import get_categories +from calibre.db.constants import NOTES_DIR_NAME from calibre.db.errors import NoSuchBook, NoSuchFormat from calibre.db.fields import IDENTITY, InvalidLinkTable, create_field from calibre.db.lazy import FormatMetadata, FormatsList, ProxyMetadata @@ -3009,12 +3011,18 @@ class Cache: from polyglot.binary import as_hex_unicode key_prefix = as_hex_unicode(library_key) book_ids = self._all_book_ids() - total = len(book_ids) + 1 + total = len(book_ids) + 2 has_fts = self.is_fts_enabled() if has_fts: total += 1 - if progress is not None: - progress('metadata.db', 0, total) + poff = 0 + def report_progress(fname): + nonlocal poff + if progress is not None: + progress(fname, poff, total) + poff += 1 + + report_progress('metadata.db') pt = PersistentTemporaryFile('-export.db') pt.close() self.backend.backup_database(pt.name) @@ -3022,29 +3030,33 @@ class Cache: with open(pt.name, 'rb') as f: exporter.add_file(f, dbkey) os.remove(pt.name) - poff = 1 if has_fts: - poff += 1 - if progress is not None: - progress('full-text-search.db', 1, total) + report_progress('full-text-search.db') pt = PersistentTemporaryFile('-export.db') pt.close() self.backend.backup_fts_database(pt.name) - ftsdbkey = key_prefix + ':::' + 'full-text-search.db' + ftsdbkey = key_prefix + ':::full-text-search.db' with open(pt.name, 'rb') as f: exporter.add_file(f, ftsdbkey) os.remove(pt.name) + notesdbkey = key_prefix + ':::notes.db' + with PersistentTemporaryFile('-export.db') as pt: + self.backend.export_notes_data(pt) + pt.flush() + pt.seek(0) + report_progress('notes.db') + exporter.add_file(pt, notesdbkey) format_metadata = {} extra_files = {} - metadata = {'format_data':format_metadata, 'metadata.db':dbkey, 'total':total, 'extra_files': extra_files} + metadata = {'format_data':format_metadata, 'metadata.db':dbkey, 'notes.db': notesdbkey, 'total':total, 'extra_files': extra_files} if has_fts: metadata['full-text-search.db'] = ftsdbkey for i, book_id in enumerate(book_ids): if abort is not None and abort.is_set(): return if progress is not None: - progress(self._field_for('title', book_id), i + poff, total) + report_progress(self._field_for('title', book_id)) format_metadata[book_id] = fm = {} for fmt in self._formats(book_id): mdata = self.format_metadata(book_id, fmt) @@ -3335,9 +3347,13 @@ def import_library(library_key, importer, library_path, progress=None, abort=Non from calibre.db.backend import DB metadata = importer.metadata[library_key] total = metadata['total'] - poff = 1 - if progress is not None: - progress('metadata.db', 0, total) + poff = 0 + def report_progress(fname): + nonlocal poff + if progress is not None: + progress(fname, poff, total) + poff += 1 + report_progress('metadata.db') if abort is not None and abort.is_set(): return with open(os.path.join(library_path, 'metadata.db'), 'wb') as f: @@ -3354,8 +3370,21 @@ def import_library(library_key, importer, library_path, progress=None, abort=Non src = importer.start_file(metadata['full-text-search.db'], 'full-text-search.db for ' + library_path) shutil.copyfileobj(src, f) src.close() + if abort is not None and abort.is_set(): + return + if 'notes.db' in metadata: + import zipfile + notes_dir = os.path.join(library_path, NOTES_DIR_NAME) + os.makedirs(notes_dir, exist_ok=True) + with closing(importer.start_file(metadata['notes.db'], 'notes.db for ' + library_path)) as stream: + stream.check_hash = False + with zipfile.ZipFile(stream) as zf: + zf.extractall(notes_dir) + if abort is not None and abort.is_set(): + return cache = Cache(DB(library_path, load_user_formatter_functions=False)) cache.init() + format_data = {int(book_id):data for book_id, data in iteritems(metadata['format_data'])} extra_files = {int(book_id):data for book_id, data in metadata.get('extra_files', {}).items()} for i, (book_id, fmt_key_map) in enumerate(iteritems(format_data)): diff --git a/src/calibre/db/notes/connect.py b/src/calibre/db/notes/connect.py index 0d63170e01..c226f12936 100644 --- a/src/calibre/db/notes/connect.py +++ b/src/calibre/db/notes/connect.py @@ -60,15 +60,15 @@ class Notes: conn = backend.get_connection() self.temp_table_counter = count() libdir = os.path.dirname(os.path.abspath(conn.db_filename('main'))) - notes_dir = os.path.join(libdir, NOTES_DIR_NAME) - self.resources_dir = os.path.join(notes_dir, 'resources') - self.backup_dir = os.path.join(notes_dir, 'backup') - self.retired_dir = os.path.join(notes_dir, 'retired') - if not os.path.exists(notes_dir): - os.makedirs(notes_dir, exist_ok=True) + self.notes_dir = os.path.join(libdir, NOTES_DIR_NAME) + self.resources_dir = os.path.join(self.notes_dir, 'resources') + self.backup_dir = os.path.join(self.notes_dir, 'backup') + self.retired_dir = os.path.join(self.notes_dir, 'retired') + if not os.path.exists(self.notes_dir): + os.makedirs(self.notes_dir, exist_ok=True) if iswindows: - winutil.set_file_attributes(notes_dir, winutil.FILE_ATTRIBUTE_HIDDEN | winutil.FILE_ATTRIBUTE_NOT_CONTENT_INDEXED) - dbpath = os.path.join(notes_dir, 'notes.db') + winutil.set_file_attributes(self.notes_dir, winutil.FILE_ATTRIBUTE_HIDDEN | winutil.FILE_ATTRIBUTE_NOT_CONTENT_INDEXED) + dbpath = os.path.join(self.notes_dir, 'notes.db') conn.execute("ATTACH DATABASE ? AS notes_db", (dbpath,)) os.makedirs(self.resources_dir, exist_ok=True) os.makedirs(self.backup_dir, exist_ok=True) @@ -351,3 +351,16 @@ class Notes: break except apsw.SQLError as e: raise FTSQueryError(fts_engine_query, query, e) from e + + def export_non_db_data(self, zf): + import zipfile + def add_dir(which): + for dirpath, _, filenames in os.walk(which): + for f in filenames: + path = os.path.join(dirpath, f) + with open(path, 'rb') as src: + zi = zipfile.ZipInfo.from_file(path, arcname=os.path.relpath(path, self.notes_dir)) + with zf.open(zi, 'w') as dest: + shutil.copyfileobj(src, dest) + add_dir(self.backup_dir) + add_dir(self.resources_dir) diff --git a/src/calibre/db/tests/filesystem.py b/src/calibre/db/tests/filesystem.py index 9e2bce4a6a..085e783427 100644 --- a/src/calibre/db/tests/filesystem.py +++ b/src/calibre/db/tests/filesystem.py @@ -264,6 +264,9 @@ class FilesystemTest(BaseTest): bookdir = os.path.dirname(ic.format_abspath(1, '__COVER_INTERNAL__')) self.assertEqual('exf', open(os.path.join(bookdir, 'exf')).read()) self.assertEqual('recurse', open(os.path.join(bookdir, 'sub', 'recurse')).read()) + r1 = cache.add_notes_resource(b'res1', 'res.jpg') + r2 = cache.add_notes_resource(b'res2', 'res.jpg') + cache.set_notes_for('authors', 2, 'some notes', resource_ids=(r1, r2)) cache.add_format(1, 'TXT', BytesIO(b'testing exim')) cache.fts_indexing_sleep_time = 0.001 cache.enable_fts() @@ -281,6 +284,8 @@ class FilesystemTest(BaseTest): importer = Importer(tdir) ic = import_library('l', importer, idir) self.assertEqual(ic.fts_search('exim')[0]['id'], 1) + self.assertEqual(cache.notes_for('authors', 2), ic.notes_for('authors', 2)) + self.assertEqual(cache.get_notes_resource(r1), ic.get_notes_resource(r1)) def test_find_books_in_directory(self): from calibre.db.adding import find_books_in_directory, compile_rule diff --git a/src/calibre/utils/exim.py b/src/calibre/utils/exim.py index b67fc08f20..3415a58229 100644 --- a/src/calibre/utils/exim.py +++ b/src/calibre/utils/exim.py @@ -222,10 +222,24 @@ class FileSource: def __init__(self, f, size, digest, description, mtime, importer): self.f, self.size, self.digest, self.description = f, size, digest, description + self.seekable = self.f.seekable self.mtime = mtime - self.end = f.tell() + size + self.start = f.tell() + self.end = self.start + size self.hasher = hashlib.sha1() self.importer = importer + self.check_hash = True + + def seek(self, amt, whence=os.SEEK_SET): + if whence == os.SEEK_SET: + return self.f.seek(self.start + amt, os.SEEK_SET) + if whence == os.SEEK_END: + return self.f.seek(self.end + amt, os.SEEK_SET) + if whence == os.SEEK_CUR: + return self.f.seek(amt, whence) + + def tell(self): + return self.f.tell() - self.start def read(self, size=None): if size is not None and size < 1: @@ -235,11 +249,12 @@ class FileSource: if amt < 1: return b'' ans = self.f.read(amt) - self.hasher.update(ans) + if self.check_hash: + self.hasher.update(ans) return ans def close(self): - if self.hasher.hexdigest() != self.digest: + if self.check_hash and self.hasher.hexdigest() != self.digest: self.importer.corrupted_files.append(self.description) self.hasher = self.f = None