From d94eff1f48359eaebc8928b560e2091a0fbfc219 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 18 Nov 2023 07:33:09 +0530 Subject: [PATCH] Fix regression that broke restore of db from backups --- src/calibre/db/notes/connect.py | 76 ++++++++++++++++++--------------- src/calibre/db/restore.py | 38 +++++++++++++++-- src/calibre/db/tests/notes.py | 10 +++++ src/calibre/db/tests/writing.py | 30 +++++++------ 4 files changed, 102 insertions(+), 52 deletions(-) diff --git a/src/calibre/db/notes/connect.py b/src/calibre/db/notes/connect.py index 1e7bfc29d1..11628fc65a 100644 --- a/src/calibre/db/notes/connect.py +++ b/src/calibre/db/notes/connect.py @@ -146,7 +146,7 @@ class Notes: for (h,) in conn.execute('SELECT resource from notes_db.notes_resources_link WHERE note=?', (note_id,)): yield h - def set_backup_for(self, field_name, item_id, marked_up_text, searchable_text, resource_hashes): + def set_backup_for(self, field_name, item_id, item_value, marked_up_text, searchable_text, resource_hashes): path = make_long_path_useable(os.path.join(self.backup_dir, field_name, str(item_id))) try: f = open(path, 'wb') @@ -159,6 +159,8 @@ class Notes: f.write(searchable_text.encode('utf-8')) f.write(SEP) f.write('\n'.join(resource_hashes).encode('utf-8')) + f.write(SEP) + f.write(item_value.encode('utf-8')) def path_to_retired_dir_for_item(self, field_name, item_id, item_value): key = icu_lower(item_value or '') @@ -255,7 +257,7 @@ class Notes: conn.executemany(''' INSERT INTO notes_db.notes_resources_link (note,resource) VALUES (?,?); ''', tuple((note_id, x) for x in resources_to_add)) - self.set_backup_for(field_name, item_id, marked_up_text, searchable_text, used_resource_hashes) + self.set_backup_for(field_name, item_id, item_value, marked_up_text, searchable_text, used_resource_hashes) return note_id def get_note(self, conn, field_name, item_id): @@ -466,11 +468,12 @@ class Notes: errors = [] for subdir in os.listdir(make_long_path_useable(self.resources_dir)): for rf in os.listdir(make_long_path_useable(os.path.join(self.resources_dir, subdir))): - name_path = os.path.join(self.resources_dir, subdir, rf + METADATA_EXT) - name = 'unnamed' - with suppress(OSError), open(make_long_path_useable(name_path)) as n: - name = json.loads(n.read())['name'] - resources[rf] = name + if not rf.endswith(METADATA_EXT): + name_path = os.path.join(self.resources_dir, subdir, rf + METADATA_EXT) + name = 'unnamed' + with suppress(OSError), open(make_long_path_useable(name_path)) as n: + name = json.loads(n.read())['name'] + resources[rf.replace('-', ':', 1)] = name items = {} for f in os.listdir(make_long_path_useable(self.backup_dir)): if f in self.allowed_fields: @@ -484,35 +487,40 @@ class Notes: report_progress(None, total) for field, entries in items.items(): table = tables[field] - for item_id in entries: - item_val = table.id_map.get(item_id) + rmap = {v: k for k, v in table.id_map.items()} + for old_item_id in entries: i += 1 - if item_val is not None: - report_progress(item_val, i) - try: - with open(make_long_path_useable(os.path.join(self.backup_dir, field, str(item_id)))) as f: - raw = f.read() - st = os.stat(f.fileno()) - except OSError as e: - errors.append(_('Failed to read from document for {path} with error: {error}').format(path=item_val, error=e)) - continue - try: - doc, searchable_text, res = raw.split(SEP, 2) - except Exception: - errors.append(_('Failed to parse document for: {}').format(item_val)) - continue - resources = frozenset(res.splitlines()) - missing_resources = resources - known_resources - if missing_resources: - errors.append(_('Some resources for {} were missing').format(item_val)) - resources &= known_resources - try: - self.set_note(conn, field, item_id, item_val, doc, resources, searchable_text, ctime=st.st_ctime, mtime=st.st_mtime) - except Exception as e: - errors.append(_('Failed to set note for {path} with error: {error}').format(path=item_val, error=e)) - else: - errors.append(_('Could not restore item: {} as not present in database').format(f'{field}/{item_id}')) + try: + with open(make_long_path_useable(os.path.join(self.backup_dir, field, str(old_item_id))), 'rb') as f: + raw = f.read() + st = os.stat(f.fileno()) + except OSError as e: + errors.append(_('Failed to read from document for {path} with error: {error}').format(path=f'{field}:{old_item_id}', error=e)) report_progress('', i) + continue + try: + doc, searchable_text, res, old_item_val = (str(x, 'utf-8') for x in raw.split(SEP, 3)) + except Exception as err: + errors.append(_('Failed to parse document for: {0} with error: {1}').format(old_item_id, err)) + report_progress('', i) + continue + item_id = rmap.get(old_item_val) + if item_id is None: + errors.append(_( + 'The item {old_item_val} does not exist in the {field} column in the restored database, could not restore its notes' + ).format(old_item_val=old_item_val, field=field)) + report_progress('', i) + continue + report_progress(old_item_val, i) + resources = frozenset(res.splitlines()) + missing_resources = resources - known_resources + if missing_resources: + errors.append(_('Some resources for {} were missing').format(old_item_val)) + resources &= known_resources + try: + self.set_note(conn, field, item_id, old_item_val, doc, resources, searchable_text, ctime=st.st_ctime, mtime=st.st_mtime) + except Exception as e: + errors.append(_('Failed to set note for {path} with error: {error}').format(path=old_item_val, error=e)) return errors def export_note(self, conn, field_name, item_id): diff --git a/src/calibre/db/restore.py b/src/calibre/db/restore.py index 3a54a3ec47..1025697e4a 100644 --- a/src/calibre/db/restore.py +++ b/src/calibre/db/restore.py @@ -16,7 +16,7 @@ from operator import itemgetter from threading import Thread from calibre import force_unicode, isbytestring -from calibre.constants import filesystem_encoding +from calibre.constants import filesystem_encoding, iswindows from calibre.db.backend import DB, DBPrefs from calibre.db.constants import METADATA_FILE_NAME, TRASH_DIR_NAME, NOTES_DIR_NAME, NOTES_DB_NAME from calibre.db.cache import Cache @@ -293,9 +293,12 @@ class Restore(Thread): self.progress_callback(None, len(self.books)) self.books.sort(key=itemgetter('id')) - shutil.copytree(os.path.join(self.src_library_path, NOTES_DIR_NAME), os.path.join(self.library_path, NOTES_DIR_NAME)) + notes_dest = os.path.join(self.library_path, NOTES_DIR_NAME) + if os.path.exists(notes_dest): # created by load_preferences() + shutil.rmtree(notes_dest) + shutil.copytree(os.path.join(self.src_library_path, NOTES_DIR_NAME), notes_dest) with suppress(FileNotFoundError): - os.remove(os.path.join(self.library_path, NOTES_DIR_NAME, NOTES_DB_NAME)) + os.remove(os.path.join(notes_dest, NOTES_DB_NAME)) db = Restorer(self.library_path) for i, book in enumerate(self.books): @@ -316,6 +319,7 @@ class Restore(Thread): def replace_db(self): dbpath = os.path.join(self.src_library_path, 'metadata.db') ndbpath = os.path.join(self.library_path, 'metadata.db') + sleep_time = 30 if iswindows else 0 save_path = self.olddb = os.path.splitext(dbpath)[0]+'_pre_restore.db' if os.path.exists(save_path): @@ -324,7 +328,33 @@ class Restore(Thread): try: os.replace(dbpath, save_path) except OSError: - time.sleep(30) # Wait a little for dropbox or the antivirus or whatever to release the file + if iswindows: + time.sleep(sleep_time) # Wait a little for dropbox or the antivirus or whatever to release the file shutil.copyfile(dbpath, save_path) os.remove(dbpath) shutil.copyfile(ndbpath, dbpath) + + old_notes_path = os.path.join(self.src_library_path, NOTES_DIR_NAME) + new_notes_path = os.path.join(self.library_path, NOTES_DIR_NAME) + temp = old_notes_path + '-staging' + try: + shutil.move(new_notes_path, temp) + except OSError: + if not iswindows: + raise + time.sleep(sleep_time) # Wait a little for dropbox or the antivirus or whatever to release the file + shutil.move(new_notes_path, temp) + try: + shutil.rmtree(old_notes_path) + except OSError: + if not iswindows: + raise + time.sleep(sleep_time) # Wait a little for dropbox or the antivirus or whatever to release the file + shutil.rmtree(old_notes_path) + try: + shutil.move(temp, old_notes_path) + except OSError: + if not iswindows: + raise + time.sleep(sleep_time) # Wait a little for dropbox or the antivirus or whatever to release the file + shutil.move(temp, old_notes_path) diff --git a/src/calibre/db/tests/notes.py b/src/calibre/db/tests/notes.py index b220ec49ae..65bd5c0ae9 100644 --- a/src/calibre/db/tests/notes.py +++ b/src/calibre/db/tests/notes.py @@ -12,6 +12,16 @@ from calibre.db.tests.base import BaseTest from calibre.utils.resources import get_image_path +def test_notes_restore(self: 'NotesTest'): + cache, notes = self.create_notes_db() + authors = sorted(cache.all_field_ids('authors')) + doc = 'simple notes for an author' + h1 = cache.add_notes_resource(b'resource1', 'r1.jpg') + h2 = cache.add_notes_resource(b'resource2', 'r1.jpg') + cache.set_notes_for('authors', authors[0], doc, resource_hashes=(h1, h2)) + doc2 = 'simple notes for an author2' + cache.set_notes_for('authors', authors[1], doc2, resource_hashes=(h2,)) + def test_notes_api(self: 'NotesTest'): cache, notes = self.create_notes_db() authors = sorted(cache.all_field_ids('authors')) diff --git a/src/calibre/db/tests/writing.py b/src/calibre/db/tests/writing.py index 383af79ac2..1d94d67086 100644 --- a/src/calibre/db/tests/writing.py +++ b/src/calibre/db/tests/writing.py @@ -10,11 +10,12 @@ from collections import namedtuple from functools import partial from io import BytesIO +from calibre.db.backend import FTSQueryError +from calibre.db.constants import RESOURCE_URL_SCHEME +from calibre.db.tests.base import IMG, BaseTest from calibre.ebooks.metadata import author_to_author_sort, title_sort from calibre.ebooks.metadata.book.base import Metadata from calibre.utils.date import UNDEFINED_DATE -from calibre.db.tests.base import BaseTest, IMG -from calibre.db.backend import FTSQueryError from polyglot.builtins import iteritems, itervalues @@ -322,8 +323,9 @@ class WritingTest(BaseTest): af(self.init_cache(cl).dirtied_cache) prev = cache.field_for('last_modified', 3) - import calibre.db.cache as c from datetime import timedelta + + import calibre.db.cache as c utime = prev+timedelta(days=1) onowf = c.nowf c.nowf = lambda: utime @@ -401,12 +403,14 @@ class WritingTest(BaseTest): with open(os.path.join(bookdir, 'sub', 'recurse'), 'w') as f: f.write('recurse') ebefore = read_all_extra_files() - authors = cache.field_for('authors', 1) - author_id = cache.get_item_id('authors', authors[0]) - doc = 'simple notes for an author' + authors = sorted(cache.all_field_ids('authors')) h1 = cache.add_notes_resource(b'resource1', 'r1.jpg') - h2 = cache.add_notes_resource(b'resource2', 'r1.jpg') - cache.set_notes_for('authors', author_id, doc, resource_hashes=(h1, h2)) + h2 = cache.add_notes_resource(b'resource2', 'r2.jpg') + doc = f'simple notes for an author ' + cache.set_notes_for('authors', authors[0], doc, resource_hashes=(h1,)) + doc += f'2 ' + cache.set_notes_for('authors', authors[1], doc, resource_hashes=(h1,h2)) + notes_before = {cache.get_item_name('authors', aid): cache.export_note('authors', aid) for aid in authors} cache.close() from calibre.db.restore import Restore restorer = Restore(cl) @@ -418,11 +422,9 @@ class WritingTest(BaseTest): ae(lbefore, tuple(cache.get_all_link_maps_for_book(i) for i in book_ids)) ae(fbefore, read_all_formats()) ae(ebefore, read_all_extra_files()) - ae(cache.notes_for('authors', author_id), doc) - ae(cache.notes_resources_used_by('authors', author_id), frozenset({h1, h2})) - ae(cache.get_notes_resource(h1)['data'], b'resource1') - ae(cache.get_notes_resource(h2)['data'], b'resource2') - + authors = sorted(cache.all_field_ids('authors')) + notes_after = {cache.get_item_name('authors', aid): cache.export_note('authors', aid) for aid in authors} + ae(notes_before, notes_after) # }}} def test_set_cover(self): # {{{ @@ -821,7 +823,7 @@ class WritingTest(BaseTest): def test_annotations(self): # {{{ 'Test handling of annotations' - from calibre.utils.date import utcnow, EPOCH + from calibre.utils.date import EPOCH, utcnow cl = self.cloned_library cache = self.init_cache(cl) # First empty dirtied