Fix data files not being merged when merging books

Fixes #2017373 [Possible bug? Extra data files disappear upon merging](https://bugs.launchpad.net/calibre/+bug/2017373)
2025-07-09 03:04:10 -04:00 · 2023-04-23 13:04:04 +05:30 · 2023-04-23 13:04:04 +05:30 · edbf95a902
commit edbf95a902
parent feac41c8f7
5 changed files with 118 additions and 22 deletions
--- a/src/calibre/db/backend.py
+++ b/src/calibre/db/backend.py
@ -1923,25 +1923,38 @@ class DB:
                        with src:
                            yield relpath, src, mtime

-    def add_extra_file(self, relpath, stream, book_path, replace=True):
-        dest = os.path.abspath(os.path.join(self.library_path, book_path, relpath))
-        if not replace and os.path.exists(dest):
-            return False
+    def add_extra_file(self, relpath, stream, book_path, replace=True, auto_rename=False):
+        bookdir = os.path.join(self.library_path, book_path)
+        dest = os.path.abspath(os.path.join(bookdir, relpath))
+        if not replace and os.path.exists(make_long_path_useable(dest)):
+            if not auto_rename:
+                return None
+            dirname, basename = os.path.split(dest)
+            num = 0
+            while True:
+                mdir = 'merge conflict'
+                if num:
+                    mdir += f' {num}'
+                candidate = os.path.join(dirname, mdir, basename)
+                if not os.path.exists(make_long_path_useable(candidate)):
+                    dest = candidate
+                    break
+                num += 1
        if isinstance(stream, str):
            try:
-                shutil.copy2(stream, dest)
+                shutil.copy2(make_long_path_useable(stream), make_long_path_useable(dest))
            except FileNotFoundError:
-                os.makedirs(os.path.dirname(dest), exist_ok=True)
-                shutil.copy2(stream, dest)
+                os.makedirs(make_long_path_useable(os.path.dirname(dest)), exist_ok=True)
+                shutil.copy2(make_long_path_useable(stream), make_long_path_useable(dest))
        else:
            try:
-                d = open(dest, 'wb')
+                d = open(make_long_path_useable(dest), 'wb')
            except FileNotFoundError:
-                os.makedirs(os.path.dirname(dest), exist_ok=True)
-                d = open(dest, 'wb')
+                os.makedirs(make_long_path_useable(os.path.dirname(dest)), exist_ok=True)
+                d = open(make_long_path_useable(dest), 'wb')
            with d:
                shutil.copyfileobj(stream, d)
-        return True
+        return os.path.relpath(dest, bookdir).replace(os.sep, '/')

    def write_backup(self, path, raw):
        path = os.path.abspath(os.path.join(self.library_path, path, METADATA_FILE_NAME))
--- a/src/calibre/db/cache.py
+++ b/src/calibre/db/cache.py
@ -3064,12 +3064,28 @@ class Cache:
        self.backend.reindex_annotations()

    @write_api
-    def add_extra_files(self, book_id, map_of_relpath_to_stream_or_path, replace=True):
+    def add_extra_files(self, book_id, map_of_relpath_to_stream_or_path, replace=True, auto_rename=False):
        ' Add extra data files '
        path = self._field_for('path', book_id).replace('/', os.sep)
        added = {}
        for relpath, stream_or_path in map_of_relpath_to_stream_or_path.items():
-            added[relpath] = self.backend.add_extra_file(relpath, stream_or_path, path, replace)
+            added[relpath] = bool(self.backend.add_extra_file(relpath, stream_or_path, path, replace, auto_rename))
+        return added
+
+    @write_api
+    def merge_extra_files(self, dest_id, src_ids, replace=False):
+        ' Merge the extra files from src_ids into dest_id. Conflicting files are auto-renamed unless replace=True in which case they are replaced. '
+        added = set()
+        path = self._field_for('path', dest_id)
+        if path:
+            path = path.replace('/', os.sep)
+            for src_id in src_ids:
+                book_path = self._field_for('path', src_id)
+                if book_path:
+                    book_path = book_path.replace('/', os.sep)
+                    for (relpath, file_path, mtime) in self.backend.iter_extra_files(
+                            src_id, book_path, self.fields['formats'], yield_paths=True):
+                        added.add(self.backend.add_extra_file(relpath, file_path, path, replace=replace, auto_rename=True))
        return added

    @read_api
--- a/src/calibre/db/copy_to_library.py
+++ b/src/calibre/db/copy_to_library.py
@ -8,15 +8,19 @@ from calibre.utils.date import now
 from polyglot.builtins import iteritems


-def automerge_book(automerge_action, book_id, mi, identical_book_list, newdb, format_map):
+def automerge_book(automerge_action, book_id, mi, identical_book_list, newdb, format_map, extra_file_map):
    seen_fmts = set()
    replace = automerge_action == 'overwrite'
    for identical_book in identical_book_list:
        ib_fmts = newdb.formats(identical_book)
        if ib_fmts:
            seen_fmts |= {fmt.upper() for fmt in ib_fmts}
+        at_least_one_format_added = False
        for fmt, path in iteritems(format_map):
-            newdb.add_format(identical_book, fmt, path, replace=replace, run_hooks=False)
+            if newdb.add_format(identical_book, fmt, path, replace=replace, run_hooks=False):
+                at_least_one_format_added = True
+        if at_least_one_format_added and extra_file_map:
+            newdb.add_extra_files(identical_book, extra_file_map, replace=False, auto_rename=True)

    if automerge_action == 'new record':
        incoming_fmts = {fmt.upper() for fmt in format_map}
@ -28,9 +32,12 @@ def automerge_book(automerge_action, book_id, mi, identical_book_list, newdb, fo
            # We should arguably put only the duplicate
            # formats, but no real harm is done by having
            # all formats
-            return newdb.add_books(
+            new_book_id = newdb.add_books(
                [(mi, format_map)], add_duplicates=True, apply_import_tags=tweaks['add_new_book_tags_when_importing_books'],
-            preserve_uuid=False, run_hooks=False)[0][0]
+                preserve_uuid=False, run_hooks=False)[0][0]
+            if extra_file_map:
+                newdb.add_extra_files(new_book_id, extra_file_map)
+            return new_book_id


 def postprocess_copy(book_id, new_book_id, new_authors, db, newdb, identical_books_data, duplicate_action):
@ -72,6 +79,7 @@ def copy_one_book(
            mi.timestamp = now()
        format_map = {}
        fmts = list(db.formats(book_id, verify_formats=False))
+        extra_file_map = db.list_extra_files_matching(book_id)
        for fmt in fmts:
            path = db.format_abspath(book_id, fmt)
            if path:
@ -91,7 +99,7 @@ def copy_one_book(
            identical_book_list = find_identical_books(mi, identical_books_data)
            if identical_book_list:  # books with same author and nearly same title exist in newdb
                if duplicate_action == 'add_formats_to_existing':
-                    new_book_id = automerge_book(automerge_action, book_id, mi, identical_book_list, newdb, format_map)
+                    new_book_id = automerge_book(automerge_action, book_id, mi, identical_book_list, newdb, format_map, extra_file_map)
                    return_data['action'] = 'automerge'
                    return_data['new_book_id'] = new_book_id
                    postprocess_copy(book_id, new_book_id, new_authors, db, newdb, identical_books_data, duplicate_action)
--- a/src/calibre/db/tests/add_remove.py
+++ b/src/calibre/db/tests/add_remove.py
@ -399,36 +399,91 @@ class AddRemoveTest(BaseTest):
        def compare_field(field, func=self.assertEqual):
            func(src_db.field_for(field, rdata['book_id']), dest_db.field_for(field, rdata['new_book_id']))

+        def assert_has_extra_files(book_id):
+            bookdir = os.path.dirname(dest_db.format_abspath(book_id, '__COVER_INTERNAL__'))
+            self.assertEqual('exf', open(os.path.join(bookdir, 'exf')).read())
+            self.assertEqual('recurse', open(os.path.join(bookdir, 'sub', 'recurse')).read())
+
+        def assert_does_not_have_extra_files(book_id):
+            bookdir = os.path.dirname(dest_db.format_abspath(book_id, '__COVER_INTERNAL__'))
+            self.assertFalse(os.path.exists(os.path.join(bookdir, 'exf')))
+            self.assertFalse(os.path.exists(os.path.join(bookdir, 'sub', 'recurse')))
+
+        def clear_extra_files(book_id):
+            for file_path in dest_db.list_extra_files_matching(book_id).values():
+                os.remove(file_path)
+
+        assert_does_not_have_extra_files(1)
+
        rdata = copy_one_book(1, src_db, dest_db)
        self.assertEqual(rdata, make_rdata(new_book_id=max(dest_db.all_book_ids())))
        compare_field('timestamp')
        compare_field('uuid', self.assertNotEqual)
        self.assertEqual(src_db.all_annotations_for_book(1), dest_db.all_annotations_for_book(max(dest_db.all_book_ids())))
+        assert_has_extra_files(rdata['new_book_id'])
+        clear_extra_files(rdata['new_book_id'])
+
        rdata = copy_one_book(1, src_db, dest_db, preserve_date=False, preserve_uuid=True)
-        data_file_new_book_id = rdata['new_book_id']
        self.assertEqual(rdata, make_rdata(new_book_id=max(dest_db.all_book_ids())))
        compare_field('timestamp', self.assertNotEqual)
        compare_field('uuid')
+        assert_has_extra_files(rdata['new_book_id'])
+        clear_extra_files(rdata['new_book_id'])
+
        rdata = copy_one_book(1, src_db, dest_db, duplicate_action='ignore')
        self.assertIsNone(rdata['new_book_id'])
        self.assertEqual(rdata['action'], 'duplicate')
        src_db.add_format(1, 'FMT1', BytesIO(b'replaced'), run_hooks=False)
+        assert_does_not_have_extra_files(1)
+
        rdata = copy_one_book(1, src_db, dest_db, duplicate_action='add_formats_to_existing')
        self.assertEqual(rdata['action'], 'automerge')
        for new_book_id in (1, 4, 5):
            self.assertEqual(dest_db.format(new_book_id, 'FMT1'), b'replaced')
+            assert_has_extra_files(new_book_id)
+            clear_extra_files(new_book_id)
+
        src_db.add_format(1, 'FMT1', BytesIO(b'second-round'), run_hooks=False)
        rdata = copy_one_book(1, src_db, dest_db, duplicate_action='add_formats_to_existing', automerge_action='ignore')
        self.assertEqual(rdata['action'], 'automerge')
        for new_book_id in (1, 4, 5):
            self.assertEqual(dest_db.format(new_book_id, 'FMT1'), b'replaced')
+            assert_does_not_have_extra_files(new_book_id)
+
        rdata = copy_one_book(1, src_db, dest_db, duplicate_action='add_formats_to_existing', automerge_action='new record')
        self.assertEqual(rdata['action'], 'automerge')
        for new_book_id in (1, 4, 5):
            self.assertEqual(dest_db.format(new_book_id, 'FMT1'), b'replaced')
+            assert_does_not_have_extra_files(new_book_id)
        self.assertEqual(dest_db.format(rdata['new_book_id'], 'FMT1'), b'second-round')
-        bookdir = os.path.dirname(dest_db.format_abspath(data_file_new_book_id, '__COVER_INTERNAL__'))
-        self.assertEqual('exf', open(os.path.join(bookdir, 'exf')).read())
-        self.assertEqual('recurse', open(os.path.join(bookdir, 'sub', 'recurse')).read())
+        assert_has_extra_files(rdata['new_book_id'])

    # }}}
+
+    def test_merging_extra_files(self):  # {{{
+        db = self.init_cache()
+
+        def add_extra(book_id, relpath):
+            db.add_extra_files(book_id, {relpath: BytesIO(f'{book_id}:{relpath}'.encode())})
+
+        def extra_files_for(book_id):
+            ans = {}
+            for relpath, file_path in db.list_extra_files_matching(book_id).items():
+                with open(file_path) as f:
+                    ans[relpath] = f.read()
+            return ans
+
+        add_extra(1, 'one'), add_extra(1, 'sub/one')
+        add_extra(2, 'one'), add_extra(2, 'sub/one'), add_extra(2, 'two/two')
+        add_extra(3, 'one'), add_extra(3, 'sub/one'), add_extra(3, 'three')
+
+        self.assertEqual(extra_files_for(1), {
+            'one': '1:one', 'sub/one': '1:sub/one',
+        })
+        db.merge_extra_files(1, (2, 3))
+        self.assertEqual(extra_files_for(1), {
+            'one': '1:one', 'sub/one': '1:sub/one',
+            'merge conflict/one': '2:one', 'sub/merge conflict/one': '2:sub/one', 'two/two': '2:two/two',
+            'three': '3:three', 'merge conflict 1/one': '3:one', 'sub/merge conflict 1/one': '3:sub/one',
+        })
+    # }}}
--- a/src/calibre/gui2/actions/edit_metadata.py
+++ b/src/calibre/gui2/actions/edit_metadata.py
@ -656,6 +656,7 @@ class EditMetadataAction(InterfaceAction):
                return
            self.add_formats(dest_id, self.formats_for_books(rows))
            self.merge_metadata(dest_id, src_ids)
+            self.merge_data_files(dest_id, src_ids)
            self.delete_books_after_merge(src_ids)
            # leave the selection highlight on first selected book
            dest_row = rows[0].row()
@ -667,6 +668,9 @@ class EditMetadataAction(InterfaceAction):
        self.gui.library_view.model().refresh_ids((dest_id,), cr)
        self.gui.library_view.horizontalScrollBar().setValue(hpos)

+    def merge_data_files(self, dest_id, src_ids):
+        self.gui.current_db.new_api.merge_extra_files(dest_id, src_ids)
+
    def add_formats(self, dest_id, src_books, replace=False):
        for src_book in src_books:
            if src_book: