Fix data files not being merged when merging books

Fixes #2017373 [Possible bug? Extra data files disappear upon merging](https://bugs.launchpad.net/calibre/+bug/2017373)
This commit is contained in:
Kovid Goyal 2023-04-23 13:04:04 +05:30
parent feac41c8f7
commit edbf95a902
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
5 changed files with 118 additions and 22 deletions

View File

@ -1923,25 +1923,38 @@ class DB:
with src:
yield relpath, src, mtime
def add_extra_file(self, relpath, stream, book_path, replace=True):
dest = os.path.abspath(os.path.join(self.library_path, book_path, relpath))
if not replace and os.path.exists(dest):
return False
def add_extra_file(self, relpath, stream, book_path, replace=True, auto_rename=False):
bookdir = os.path.join(self.library_path, book_path)
dest = os.path.abspath(os.path.join(bookdir, relpath))
if not replace and os.path.exists(make_long_path_useable(dest)):
if not auto_rename:
return None
dirname, basename = os.path.split(dest)
num = 0
while True:
mdir = 'merge conflict'
if num:
mdir += f' {num}'
candidate = os.path.join(dirname, mdir, basename)
if not os.path.exists(make_long_path_useable(candidate)):
dest = candidate
break
num += 1
if isinstance(stream, str):
try:
shutil.copy2(stream, dest)
shutil.copy2(make_long_path_useable(stream), make_long_path_useable(dest))
except FileNotFoundError:
os.makedirs(os.path.dirname(dest), exist_ok=True)
shutil.copy2(stream, dest)
os.makedirs(make_long_path_useable(os.path.dirname(dest)), exist_ok=True)
shutil.copy2(make_long_path_useable(stream), make_long_path_useable(dest))
else:
try:
d = open(dest, 'wb')
d = open(make_long_path_useable(dest), 'wb')
except FileNotFoundError:
os.makedirs(os.path.dirname(dest), exist_ok=True)
d = open(dest, 'wb')
os.makedirs(make_long_path_useable(os.path.dirname(dest)), exist_ok=True)
d = open(make_long_path_useable(dest), 'wb')
with d:
shutil.copyfileobj(stream, d)
return True
return os.path.relpath(dest, bookdir).replace(os.sep, '/')
def write_backup(self, path, raw):
path = os.path.abspath(os.path.join(self.library_path, path, METADATA_FILE_NAME))

View File

@ -3064,12 +3064,28 @@ class Cache:
self.backend.reindex_annotations()
@write_api
def add_extra_files(self, book_id, map_of_relpath_to_stream_or_path, replace=True):
def add_extra_files(self, book_id, map_of_relpath_to_stream_or_path, replace=True, auto_rename=False):
' Add extra data files '
path = self._field_for('path', book_id).replace('/', os.sep)
added = {}
for relpath, stream_or_path in map_of_relpath_to_stream_or_path.items():
added[relpath] = self.backend.add_extra_file(relpath, stream_or_path, path, replace)
added[relpath] = bool(self.backend.add_extra_file(relpath, stream_or_path, path, replace, auto_rename))
return added
@write_api
def merge_extra_files(self, dest_id, src_ids, replace=False):
' Merge the extra files from src_ids into dest_id. Conflicting files are auto-renamed unless replace=True in which case they are replaced. '
added = set()
path = self._field_for('path', dest_id)
if path:
path = path.replace('/', os.sep)
for src_id in src_ids:
book_path = self._field_for('path', src_id)
if book_path:
book_path = book_path.replace('/', os.sep)
for (relpath, file_path, mtime) in self.backend.iter_extra_files(
src_id, book_path, self.fields['formats'], yield_paths=True):
added.add(self.backend.add_extra_file(relpath, file_path, path, replace=replace, auto_rename=True))
return added
@read_api

View File

@ -8,15 +8,19 @@ from calibre.utils.date import now
from polyglot.builtins import iteritems
def automerge_book(automerge_action, book_id, mi, identical_book_list, newdb, format_map):
def automerge_book(automerge_action, book_id, mi, identical_book_list, newdb, format_map, extra_file_map):
seen_fmts = set()
replace = automerge_action == 'overwrite'
for identical_book in identical_book_list:
ib_fmts = newdb.formats(identical_book)
if ib_fmts:
seen_fmts |= {fmt.upper() for fmt in ib_fmts}
at_least_one_format_added = False
for fmt, path in iteritems(format_map):
newdb.add_format(identical_book, fmt, path, replace=replace, run_hooks=False)
if newdb.add_format(identical_book, fmt, path, replace=replace, run_hooks=False):
at_least_one_format_added = True
if at_least_one_format_added and extra_file_map:
newdb.add_extra_files(identical_book, extra_file_map, replace=False, auto_rename=True)
if automerge_action == 'new record':
incoming_fmts = {fmt.upper() for fmt in format_map}
@ -28,9 +32,12 @@ def automerge_book(automerge_action, book_id, mi, identical_book_list, newdb, fo
# We should arguably put only the duplicate
# formats, but no real harm is done by having
# all formats
return newdb.add_books(
new_book_id = newdb.add_books(
[(mi, format_map)], add_duplicates=True, apply_import_tags=tweaks['add_new_book_tags_when_importing_books'],
preserve_uuid=False, run_hooks=False)[0][0]
preserve_uuid=False, run_hooks=False)[0][0]
if extra_file_map:
newdb.add_extra_files(new_book_id, extra_file_map)
return new_book_id
def postprocess_copy(book_id, new_book_id, new_authors, db, newdb, identical_books_data, duplicate_action):
@ -72,6 +79,7 @@ def copy_one_book(
mi.timestamp = now()
format_map = {}
fmts = list(db.formats(book_id, verify_formats=False))
extra_file_map = db.list_extra_files_matching(book_id)
for fmt in fmts:
path = db.format_abspath(book_id, fmt)
if path:
@ -91,7 +99,7 @@ def copy_one_book(
identical_book_list = find_identical_books(mi, identical_books_data)
if identical_book_list: # books with same author and nearly same title exist in newdb
if duplicate_action == 'add_formats_to_existing':
new_book_id = automerge_book(automerge_action, book_id, mi, identical_book_list, newdb, format_map)
new_book_id = automerge_book(automerge_action, book_id, mi, identical_book_list, newdb, format_map, extra_file_map)
return_data['action'] = 'automerge'
return_data['new_book_id'] = new_book_id
postprocess_copy(book_id, new_book_id, new_authors, db, newdb, identical_books_data, duplicate_action)

View File

@ -399,36 +399,91 @@ class AddRemoveTest(BaseTest):
def compare_field(field, func=self.assertEqual):
func(src_db.field_for(field, rdata['book_id']), dest_db.field_for(field, rdata['new_book_id']))
def assert_has_extra_files(book_id):
bookdir = os.path.dirname(dest_db.format_abspath(book_id, '__COVER_INTERNAL__'))
self.assertEqual('exf', open(os.path.join(bookdir, 'exf')).read())
self.assertEqual('recurse', open(os.path.join(bookdir, 'sub', 'recurse')).read())
def assert_does_not_have_extra_files(book_id):
bookdir = os.path.dirname(dest_db.format_abspath(book_id, '__COVER_INTERNAL__'))
self.assertFalse(os.path.exists(os.path.join(bookdir, 'exf')))
self.assertFalse(os.path.exists(os.path.join(bookdir, 'sub', 'recurse')))
def clear_extra_files(book_id):
for file_path in dest_db.list_extra_files_matching(book_id).values():
os.remove(file_path)
assert_does_not_have_extra_files(1)
rdata = copy_one_book(1, src_db, dest_db)
self.assertEqual(rdata, make_rdata(new_book_id=max(dest_db.all_book_ids())))
compare_field('timestamp')
compare_field('uuid', self.assertNotEqual)
self.assertEqual(src_db.all_annotations_for_book(1), dest_db.all_annotations_for_book(max(dest_db.all_book_ids())))
assert_has_extra_files(rdata['new_book_id'])
clear_extra_files(rdata['new_book_id'])
rdata = copy_one_book(1, src_db, dest_db, preserve_date=False, preserve_uuid=True)
data_file_new_book_id = rdata['new_book_id']
self.assertEqual(rdata, make_rdata(new_book_id=max(dest_db.all_book_ids())))
compare_field('timestamp', self.assertNotEqual)
compare_field('uuid')
assert_has_extra_files(rdata['new_book_id'])
clear_extra_files(rdata['new_book_id'])
rdata = copy_one_book(1, src_db, dest_db, duplicate_action='ignore')
self.assertIsNone(rdata['new_book_id'])
self.assertEqual(rdata['action'], 'duplicate')
src_db.add_format(1, 'FMT1', BytesIO(b'replaced'), run_hooks=False)
assert_does_not_have_extra_files(1)
rdata = copy_one_book(1, src_db, dest_db, duplicate_action='add_formats_to_existing')
self.assertEqual(rdata['action'], 'automerge')
for new_book_id in (1, 4, 5):
self.assertEqual(dest_db.format(new_book_id, 'FMT1'), b'replaced')
assert_has_extra_files(new_book_id)
clear_extra_files(new_book_id)
src_db.add_format(1, 'FMT1', BytesIO(b'second-round'), run_hooks=False)
rdata = copy_one_book(1, src_db, dest_db, duplicate_action='add_formats_to_existing', automerge_action='ignore')
self.assertEqual(rdata['action'], 'automerge')
for new_book_id in (1, 4, 5):
self.assertEqual(dest_db.format(new_book_id, 'FMT1'), b'replaced')
assert_does_not_have_extra_files(new_book_id)
rdata = copy_one_book(1, src_db, dest_db, duplicate_action='add_formats_to_existing', automerge_action='new record')
self.assertEqual(rdata['action'], 'automerge')
for new_book_id in (1, 4, 5):
self.assertEqual(dest_db.format(new_book_id, 'FMT1'), b'replaced')
assert_does_not_have_extra_files(new_book_id)
self.assertEqual(dest_db.format(rdata['new_book_id'], 'FMT1'), b'second-round')
bookdir = os.path.dirname(dest_db.format_abspath(data_file_new_book_id, '__COVER_INTERNAL__'))
self.assertEqual('exf', open(os.path.join(bookdir, 'exf')).read())
self.assertEqual('recurse', open(os.path.join(bookdir, 'sub', 'recurse')).read())
assert_has_extra_files(rdata['new_book_id'])
# }}}
def test_merging_extra_files(self): # {{{
db = self.init_cache()
def add_extra(book_id, relpath):
db.add_extra_files(book_id, {relpath: BytesIO(f'{book_id}:{relpath}'.encode())})
def extra_files_for(book_id):
ans = {}
for relpath, file_path in db.list_extra_files_matching(book_id).items():
with open(file_path) as f:
ans[relpath] = f.read()
return ans
add_extra(1, 'one'), add_extra(1, 'sub/one')
add_extra(2, 'one'), add_extra(2, 'sub/one'), add_extra(2, 'two/two')
add_extra(3, 'one'), add_extra(3, 'sub/one'), add_extra(3, 'three')
self.assertEqual(extra_files_for(1), {
'one': '1:one', 'sub/one': '1:sub/one',
})
db.merge_extra_files(1, (2, 3))
self.assertEqual(extra_files_for(1), {
'one': '1:one', 'sub/one': '1:sub/one',
'merge conflict/one': '2:one', 'sub/merge conflict/one': '2:sub/one', 'two/two': '2:two/two',
'three': '3:three', 'merge conflict 1/one': '3:one', 'sub/merge conflict 1/one': '3:sub/one',
})
# }}}

View File

@ -656,6 +656,7 @@ class EditMetadataAction(InterfaceAction):
return
self.add_formats(dest_id, self.formats_for_books(rows))
self.merge_metadata(dest_id, src_ids)
self.merge_data_files(dest_id, src_ids)
self.delete_books_after_merge(src_ids)
# leave the selection highlight on first selected book
dest_row = rows[0].row()
@ -667,6 +668,9 @@ class EditMetadataAction(InterfaceAction):
self.gui.library_view.model().refresh_ids((dest_id,), cr)
self.gui.library_view.horizontalScrollBar().setValue(hpos)
def merge_data_files(self, dest_id, src_ids):
self.gui.current_db.new_api.merge_extra_files(dest_id, src_ids)
def add_formats(self, dest_id, src_books, replace=False):
for src_book in src_books:
if src_book: