Fix regression that broke restore of db from backups

This commit is contained in:
Kovid Goyal 2023-11-18 07:33:09 +05:30
parent 6aff552e75
commit d94eff1f48
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
4 changed files with 102 additions and 52 deletions

View File

@ -146,7 +146,7 @@ class Notes:
for (h,) in conn.execute('SELECT resource from notes_db.notes_resources_link WHERE note=?', (note_id,)):
yield h
def set_backup_for(self, field_name, item_id, marked_up_text, searchable_text, resource_hashes):
def set_backup_for(self, field_name, item_id, item_value, marked_up_text, searchable_text, resource_hashes):
path = make_long_path_useable(os.path.join(self.backup_dir, field_name, str(item_id)))
try:
f = open(path, 'wb')
@ -159,6 +159,8 @@ class Notes:
f.write(searchable_text.encode('utf-8'))
f.write(SEP)
f.write('\n'.join(resource_hashes).encode('utf-8'))
f.write(SEP)
f.write(item_value.encode('utf-8'))
def path_to_retired_dir_for_item(self, field_name, item_id, item_value):
key = icu_lower(item_value or '')
@ -255,7 +257,7 @@ class Notes:
conn.executemany('''
INSERT INTO notes_db.notes_resources_link (note,resource) VALUES (?,?);
''', tuple((note_id, x) for x in resources_to_add))
self.set_backup_for(field_name, item_id, marked_up_text, searchable_text, used_resource_hashes)
self.set_backup_for(field_name, item_id, item_value, marked_up_text, searchable_text, used_resource_hashes)
return note_id
def get_note(self, conn, field_name, item_id):
@ -466,11 +468,12 @@ class Notes:
errors = []
for subdir in os.listdir(make_long_path_useable(self.resources_dir)):
for rf in os.listdir(make_long_path_useable(os.path.join(self.resources_dir, subdir))):
name_path = os.path.join(self.resources_dir, subdir, rf + METADATA_EXT)
name = 'unnamed'
with suppress(OSError), open(make_long_path_useable(name_path)) as n:
name = json.loads(n.read())['name']
resources[rf] = name
if not rf.endswith(METADATA_EXT):
name_path = os.path.join(self.resources_dir, subdir, rf + METADATA_EXT)
name = 'unnamed'
with suppress(OSError), open(make_long_path_useable(name_path)) as n:
name = json.loads(n.read())['name']
resources[rf.replace('-', ':', 1)] = name
items = {}
for f in os.listdir(make_long_path_useable(self.backup_dir)):
if f in self.allowed_fields:
@ -484,35 +487,40 @@ class Notes:
report_progress(None, total)
for field, entries in items.items():
table = tables[field]
for item_id in entries:
item_val = table.id_map.get(item_id)
rmap = {v: k for k, v in table.id_map.items()}
for old_item_id in entries:
i += 1
if item_val is not None:
report_progress(item_val, i)
try:
with open(make_long_path_useable(os.path.join(self.backup_dir, field, str(item_id)))) as f:
raw = f.read()
st = os.stat(f.fileno())
except OSError as e:
errors.append(_('Failed to read from document for {path} with error: {error}').format(path=item_val, error=e))
continue
try:
doc, searchable_text, res = raw.split(SEP, 2)
except Exception:
errors.append(_('Failed to parse document for: {}').format(item_val))
continue
resources = frozenset(res.splitlines())
missing_resources = resources - known_resources
if missing_resources:
errors.append(_('Some resources for {} were missing').format(item_val))
resources &= known_resources
try:
self.set_note(conn, field, item_id, item_val, doc, resources, searchable_text, ctime=st.st_ctime, mtime=st.st_mtime)
except Exception as e:
errors.append(_('Failed to set note for {path} with error: {error}').format(path=item_val, error=e))
else:
errors.append(_('Could not restore item: {} as not present in database').format(f'{field}/{item_id}'))
try:
with open(make_long_path_useable(os.path.join(self.backup_dir, field, str(old_item_id))), 'rb') as f:
raw = f.read()
st = os.stat(f.fileno())
except OSError as e:
errors.append(_('Failed to read from document for {path} with error: {error}').format(path=f'{field}:{old_item_id}', error=e))
report_progress('', i)
continue
try:
doc, searchable_text, res, old_item_val = (str(x, 'utf-8') for x in raw.split(SEP, 3))
except Exception as err:
errors.append(_('Failed to parse document for: {0} with error: {1}').format(old_item_id, err))
report_progress('', i)
continue
item_id = rmap.get(old_item_val)
if item_id is None:
errors.append(_(
'The item {old_item_val} does not exist in the {field} column in the restored database, could not restore its notes'
).format(old_item_val=old_item_val, field=field))
report_progress('', i)
continue
report_progress(old_item_val, i)
resources = frozenset(res.splitlines())
missing_resources = resources - known_resources
if missing_resources:
errors.append(_('Some resources for {} were missing').format(old_item_val))
resources &= known_resources
try:
self.set_note(conn, field, item_id, old_item_val, doc, resources, searchable_text, ctime=st.st_ctime, mtime=st.st_mtime)
except Exception as e:
errors.append(_('Failed to set note for {path} with error: {error}').format(path=old_item_val, error=e))
return errors
def export_note(self, conn, field_name, item_id):

View File

@ -16,7 +16,7 @@ from operator import itemgetter
from threading import Thread
from calibre import force_unicode, isbytestring
from calibre.constants import filesystem_encoding
from calibre.constants import filesystem_encoding, iswindows
from calibre.db.backend import DB, DBPrefs
from calibre.db.constants import METADATA_FILE_NAME, TRASH_DIR_NAME, NOTES_DIR_NAME, NOTES_DB_NAME
from calibre.db.cache import Cache
@ -293,9 +293,12 @@ class Restore(Thread):
self.progress_callback(None, len(self.books))
self.books.sort(key=itemgetter('id'))
shutil.copytree(os.path.join(self.src_library_path, NOTES_DIR_NAME), os.path.join(self.library_path, NOTES_DIR_NAME))
notes_dest = os.path.join(self.library_path, NOTES_DIR_NAME)
if os.path.exists(notes_dest): # created by load_preferences()
shutil.rmtree(notes_dest)
shutil.copytree(os.path.join(self.src_library_path, NOTES_DIR_NAME), notes_dest)
with suppress(FileNotFoundError):
os.remove(os.path.join(self.library_path, NOTES_DIR_NAME, NOTES_DB_NAME))
os.remove(os.path.join(notes_dest, NOTES_DB_NAME))
db = Restorer(self.library_path)
for i, book in enumerate(self.books):
@ -316,6 +319,7 @@ class Restore(Thread):
def replace_db(self):
dbpath = os.path.join(self.src_library_path, 'metadata.db')
ndbpath = os.path.join(self.library_path, 'metadata.db')
sleep_time = 30 if iswindows else 0
save_path = self.olddb = os.path.splitext(dbpath)[0]+'_pre_restore.db'
if os.path.exists(save_path):
@ -324,7 +328,33 @@ class Restore(Thread):
try:
os.replace(dbpath, save_path)
except OSError:
time.sleep(30) # Wait a little for dropbox or the antivirus or whatever to release the file
if iswindows:
time.sleep(sleep_time) # Wait a little for dropbox or the antivirus or whatever to release the file
shutil.copyfile(dbpath, save_path)
os.remove(dbpath)
shutil.copyfile(ndbpath, dbpath)
old_notes_path = os.path.join(self.src_library_path, NOTES_DIR_NAME)
new_notes_path = os.path.join(self.library_path, NOTES_DIR_NAME)
temp = old_notes_path + '-staging'
try:
shutil.move(new_notes_path, temp)
except OSError:
if not iswindows:
raise
time.sleep(sleep_time) # Wait a little for dropbox or the antivirus or whatever to release the file
shutil.move(new_notes_path, temp)
try:
shutil.rmtree(old_notes_path)
except OSError:
if not iswindows:
raise
time.sleep(sleep_time) # Wait a little for dropbox or the antivirus or whatever to release the file
shutil.rmtree(old_notes_path)
try:
shutil.move(temp, old_notes_path)
except OSError:
if not iswindows:
raise
time.sleep(sleep_time) # Wait a little for dropbox or the antivirus or whatever to release the file
shutil.move(temp, old_notes_path)

View File

@ -12,6 +12,16 @@ from calibre.db.tests.base import BaseTest
from calibre.utils.resources import get_image_path
def test_notes_restore(self: 'NotesTest'):
cache, notes = self.create_notes_db()
authors = sorted(cache.all_field_ids('authors'))
doc = 'simple notes for an author'
h1 = cache.add_notes_resource(b'resource1', 'r1.jpg')
h2 = cache.add_notes_resource(b'resource2', 'r1.jpg')
cache.set_notes_for('authors', authors[0], doc, resource_hashes=(h1, h2))
doc2 = 'simple notes for an author2'
cache.set_notes_for('authors', authors[1], doc2, resource_hashes=(h2,))
def test_notes_api(self: 'NotesTest'):
cache, notes = self.create_notes_db()
authors = sorted(cache.all_field_ids('authors'))

View File

@ -10,11 +10,12 @@ from collections import namedtuple
from functools import partial
from io import BytesIO
from calibre.db.backend import FTSQueryError
from calibre.db.constants import RESOURCE_URL_SCHEME
from calibre.db.tests.base import IMG, BaseTest
from calibre.ebooks.metadata import author_to_author_sort, title_sort
from calibre.ebooks.metadata.book.base import Metadata
from calibre.utils.date import UNDEFINED_DATE
from calibre.db.tests.base import BaseTest, IMG
from calibre.db.backend import FTSQueryError
from polyglot.builtins import iteritems, itervalues
@ -322,8 +323,9 @@ class WritingTest(BaseTest):
af(self.init_cache(cl).dirtied_cache)
prev = cache.field_for('last_modified', 3)
import calibre.db.cache as c
from datetime import timedelta
import calibre.db.cache as c
utime = prev+timedelta(days=1)
onowf = c.nowf
c.nowf = lambda: utime
@ -401,12 +403,14 @@ class WritingTest(BaseTest):
with open(os.path.join(bookdir, 'sub', 'recurse'), 'w') as f:
f.write('recurse')
ebefore = read_all_extra_files()
authors = cache.field_for('authors', 1)
author_id = cache.get_item_id('authors', authors[0])
doc = 'simple notes for an author'
authors = sorted(cache.all_field_ids('authors'))
h1 = cache.add_notes_resource(b'resource1', 'r1.jpg')
h2 = cache.add_notes_resource(b'resource2', 'r1.jpg')
cache.set_notes_for('authors', author_id, doc, resource_hashes=(h1, h2))
h2 = cache.add_notes_resource(b'resource2', 'r2.jpg')
doc = f'simple notes for an author <img src="{RESOURCE_URL_SCHEME}://{h1.replace(":", "/",1)}"> '
cache.set_notes_for('authors', authors[0], doc, resource_hashes=(h1,))
doc += f'2 <img src="{RESOURCE_URL_SCHEME}://{h2.replace(":", "/",1)}">'
cache.set_notes_for('authors', authors[1], doc, resource_hashes=(h1,h2))
notes_before = {cache.get_item_name('authors', aid): cache.export_note('authors', aid) for aid in authors}
cache.close()
from calibre.db.restore import Restore
restorer = Restore(cl)
@ -418,11 +422,9 @@ class WritingTest(BaseTest):
ae(lbefore, tuple(cache.get_all_link_maps_for_book(i) for i in book_ids))
ae(fbefore, read_all_formats())
ae(ebefore, read_all_extra_files())
ae(cache.notes_for('authors', author_id), doc)
ae(cache.notes_resources_used_by('authors', author_id), frozenset({h1, h2}))
ae(cache.get_notes_resource(h1)['data'], b'resource1')
ae(cache.get_notes_resource(h2)['data'], b'resource2')
authors = sorted(cache.all_field_ids('authors'))
notes_after = {cache.get_item_name('authors', aid): cache.export_note('authors', aid) for aid in authors}
ae(notes_before, notes_after)
# }}}
def test_set_cover(self): # {{{
@ -821,7 +823,7 @@ class WritingTest(BaseTest):
def test_annotations(self): # {{{
'Test handling of annotations'
from calibre.utils.date import utcnow, EPOCH
from calibre.utils.date import EPOCH, utcnow
cl = self.cloned_library
cache = self.init_cache(cl)
# First empty dirtied