Fix data files not being merged when merging books

Fixes #2017373 [Possible bug? Extra data files disappear upon merging](https://bugs.launchpad.net/calibre/+bug/2017373)
2025-07-09 03:04:10 -04:00 · 2023-04-23 13:04:04 +05:30 · 2023-04-23 13:04:04 +05:30 · edbf95a902
commit edbf95a902
parent feac41c8f7
5 changed files with 118 additions and 22 deletions
--- a/src/calibre/db/backend.py
+++ b/src/calibre/db/backend.py
@ -1923,25 +1923,38 @@ class DB:
                        with src:
                            yield relpath, src, mtime
-    def add_extra_file(self, relpath, stream, book_path, replace=True):
+    def add_extra_file(self, relpath, stream, book_path, replace=True, auto_rename=False):
-        dest = os.path.abspath(os.path.join(self.library_path, book_path, relpath))
+        bookdir = os.path.join(self.library_path, book_path)
-        if not replace and os.path.exists(dest):
+        dest = os.path.abspath(os.path.join(bookdir, relpath))
-            return False
+        if not replace and os.path.exists(make_long_path_useable(dest)):
            if not auto_rename:
                return None
            dirname, basename = os.path.split(dest)
            num = 0
            while True:
                mdir = 'merge conflict'
                if num:
                    mdir += f' {num}'
                candidate = os.path.join(dirname, mdir, basename)
                if not os.path.exists(make_long_path_useable(candidate)):
                    dest = candidate
                    break
                num += 1
        if isinstance(stream, str):
            try:
-                shutil.copy2(stream, dest)
+                shutil.copy2(make_long_path_useable(stream), make_long_path_useable(dest))
            except FileNotFoundError:
-                os.makedirs(os.path.dirname(dest), exist_ok=True)
+                os.makedirs(make_long_path_useable(os.path.dirname(dest)), exist_ok=True)
-                shutil.copy2(stream, dest)
+                shutil.copy2(make_long_path_useable(stream), make_long_path_useable(dest))
        else:
            try:
-                d = open(dest, 'wb')
+                d = open(make_long_path_useable(dest), 'wb')
            except FileNotFoundError:
-                os.makedirs(os.path.dirname(dest), exist_ok=True)
+                os.makedirs(make_long_path_useable(os.path.dirname(dest)), exist_ok=True)
-                d = open(dest, 'wb')
+                d = open(make_long_path_useable(dest), 'wb')
            with d:
                shutil.copyfileobj(stream, d)
-        return True
+        return os.path.relpath(dest, bookdir).replace(os.sep, '/')
    def write_backup(self, path, raw):
        path = os.path.abspath(os.path.join(self.library_path, path, METADATA_FILE_NAME))
--- a/src/calibre/db/cache.py
+++ b/src/calibre/db/cache.py
@ -3064,12 +3064,28 @@ class Cache:
        self.backend.reindex_annotations()
    @write_api
-    def add_extra_files(self, book_id, map_of_relpath_to_stream_or_path, replace=True):
+    def add_extra_files(self, book_id, map_of_relpath_to_stream_or_path, replace=True, auto_rename=False):
        ' Add extra data files '
        path = self._field_for('path', book_id).replace('/', os.sep)
        added = {}
        for relpath, stream_or_path in map_of_relpath_to_stream_or_path.items():
-            added[relpath] = self.backend.add_extra_file(relpath, stream_or_path, path, replace)
+            added[relpath] = bool(self.backend.add_extra_file(relpath, stream_or_path, path, replace, auto_rename))
        return added
    @write_api
    def merge_extra_files(self, dest_id, src_ids, replace=False):
        ' Merge the extra files from src_ids into dest_id. Conflicting files are auto-renamed unless replace=True in which case they are replaced. '
        added = set()
        path = self._field_for('path', dest_id)
        if path:
            path = path.replace('/', os.sep)
            for src_id in src_ids:
                book_path = self._field_for('path', src_id)
                if book_path:
                    book_path = book_path.replace('/', os.sep)
                    for (relpath, file_path, mtime) in self.backend.iter_extra_files(
                            src_id, book_path, self.fields['formats'], yield_paths=True):
                        added.add(self.backend.add_extra_file(relpath, file_path, path, replace=replace, auto_rename=True))
        return added
    @read_api
--- a/src/calibre/db/copy_to_library.py
+++ b/src/calibre/db/copy_to_library.py
@ -8,15 +8,19 @@ from calibre.utils.date import now
 from polyglot.builtins import iteritems
-def automerge_book(automerge_action, book_id, mi, identical_book_list, newdb, format_map):
+def automerge_book(automerge_action, book_id, mi, identical_book_list, newdb, format_map, extra_file_map):
    seen_fmts = set()
    replace = automerge_action == 'overwrite'
    for identical_book in identical_book_list:
        ib_fmts = newdb.formats(identical_book)
        if ib_fmts:
            seen_fmts |= {fmt.upper() for fmt in ib_fmts}
        at_least_one_format_added = False
        for fmt, path in iteritems(format_map):
-            newdb.add_format(identical_book, fmt, path, replace=replace, run_hooks=False)
+            if newdb.add_format(identical_book, fmt, path, replace=replace, run_hooks=False):
                at_least_one_format_added = True
        if at_least_one_format_added and extra_file_map:
            newdb.add_extra_files(identical_book, extra_file_map, replace=False, auto_rename=True)
    if automerge_action == 'new record':
        incoming_fmts = {fmt.upper() for fmt in format_map}
@ -28,9 +32,12 @@ def automerge_book(automerge_action, book_id, mi, identical_book_list, newdb, fo
            # We should arguably put only the duplicate
            # formats, but no real harm is done by having
            # all formats
-            return newdb.add_books(
+            new_book_id = newdb.add_books(
                [(mi, format_map)], add_duplicates=True, apply_import_tags=tweaks['add_new_book_tags_when_importing_books'],
                preserve_uuid=False, run_hooks=False)[0][0]
            if extra_file_map:
                newdb.add_extra_files(new_book_id, extra_file_map)
            return new_book_id
 def postprocess_copy(book_id, new_book_id, new_authors, db, newdb, identical_books_data, duplicate_action):
@ -72,6 +79,7 @@ def copy_one_book(
            mi.timestamp = now()
        format_map = {}
        fmts = list(db.formats(book_id, verify_formats=False))
        extra_file_map = db.list_extra_files_matching(book_id)
        for fmt in fmts:
            path = db.format_abspath(book_id, fmt)
            if path:
@ -91,7 +99,7 @@ def copy_one_book(
            identical_book_list = find_identical_books(mi, identical_books_data)
            if identical_book_list:  # books with same author and nearly same title exist in newdb
                if duplicate_action == 'add_formats_to_existing':
-                    new_book_id = automerge_book(automerge_action, book_id, mi, identical_book_list, newdb, format_map)
+                    new_book_id = automerge_book(automerge_action, book_id, mi, identical_book_list, newdb, format_map, extra_file_map)
                    return_data['action'] = 'automerge'
                    return_data['new_book_id'] = new_book_id
                    postprocess_copy(book_id, new_book_id, new_authors, db, newdb, identical_books_data, duplicate_action)
--- a/src/calibre/db/tests/add_remove.py
+++ b/src/calibre/db/tests/add_remove.py
@ -399,36 +399,91 @@ class AddRemoveTest(BaseTest):
        def compare_field(field, func=self.assertEqual):
            func(src_db.field_for(field, rdata['book_id']), dest_db.field_for(field, rdata['new_book_id']))
        def assert_has_extra_files(book_id):
            bookdir = os.path.dirname(dest_db.format_abspath(book_id, '__COVER_INTERNAL__'))
            self.assertEqual('exf', open(os.path.join(bookdir, 'exf')).read())
            self.assertEqual('recurse', open(os.path.join(bookdir, 'sub', 'recurse')).read())
        def assert_does_not_have_extra_files(book_id):
            bookdir = os.path.dirname(dest_db.format_abspath(book_id, '__COVER_INTERNAL__'))
            self.assertFalse(os.path.exists(os.path.join(bookdir, 'exf')))
            self.assertFalse(os.path.exists(os.path.join(bookdir, 'sub', 'recurse')))
        def clear_extra_files(book_id):
            for file_path in dest_db.list_extra_files_matching(book_id).values():
                os.remove(file_path)
        assert_does_not_have_extra_files(1)
        rdata = copy_one_book(1, src_db, dest_db)
        self.assertEqual(rdata, make_rdata(new_book_id=max(dest_db.all_book_ids())))
        compare_field('timestamp')
        compare_field('uuid', self.assertNotEqual)
        self.assertEqual(src_db.all_annotations_for_book(1), dest_db.all_annotations_for_book(max(dest_db.all_book_ids())))
        assert_has_extra_files(rdata['new_book_id'])
        clear_extra_files(rdata['new_book_id'])
        rdata = copy_one_book(1, src_db, dest_db, preserve_date=False, preserve_uuid=True)
        data_file_new_book_id = rdata['new_book_id']
        self.assertEqual(rdata, make_rdata(new_book_id=max(dest_db.all_book_ids())))
        compare_field('timestamp', self.assertNotEqual)
        compare_field('uuid')
        assert_has_extra_files(rdata['new_book_id'])
        clear_extra_files(rdata['new_book_id'])
        rdata = copy_one_book(1, src_db, dest_db, duplicate_action='ignore')
        self.assertIsNone(rdata['new_book_id'])
        self.assertEqual(rdata['action'], 'duplicate')
        src_db.add_format(1, 'FMT1', BytesIO(b'replaced'), run_hooks=False)
        assert_does_not_have_extra_files(1)
        rdata = copy_one_book(1, src_db, dest_db, duplicate_action='add_formats_to_existing')
        self.assertEqual(rdata['action'], 'automerge')
        for new_book_id in (1, 4, 5):
            self.assertEqual(dest_db.format(new_book_id, 'FMT1'), b'replaced')
            assert_has_extra_files(new_book_id)
            clear_extra_files(new_book_id)
        src_db.add_format(1, 'FMT1', BytesIO(b'second-round'), run_hooks=False)
        rdata = copy_one_book(1, src_db, dest_db, duplicate_action='add_formats_to_existing', automerge_action='ignore')
        self.assertEqual(rdata['action'], 'automerge')
        for new_book_id in (1, 4, 5):
            self.assertEqual(dest_db.format(new_book_id, 'FMT1'), b'replaced')
            assert_does_not_have_extra_files(new_book_id)
        rdata = copy_one_book(1, src_db, dest_db, duplicate_action='add_formats_to_existing', automerge_action='new record')
        self.assertEqual(rdata['action'], 'automerge')
        for new_book_id in (1, 4, 5):
            self.assertEqual(dest_db.format(new_book_id, 'FMT1'), b'replaced')
            assert_does_not_have_extra_files(new_book_id)
        self.assertEqual(dest_db.format(rdata['new_book_id'], 'FMT1'), b'second-round')
-        bookdir = os.path.dirname(dest_db.format_abspath(data_file_new_book_id, '__COVER_INTERNAL__'))
+        assert_has_extra_files(rdata['new_book_id'])
        self.assertEqual('exf', open(os.path.join(bookdir, 'exf')).read())
        self.assertEqual('recurse', open(os.path.join(bookdir, 'sub', 'recurse')).read())
    # }}}
    def test_merging_extra_files(self):  # {{{
        db = self.init_cache()
        def add_extra(book_id, relpath):
            db.add_extra_files(book_id, {relpath: BytesIO(f'{book_id}:{relpath}'.encode())})
        def extra_files_for(book_id):
            ans = {}
            for relpath, file_path in db.list_extra_files_matching(book_id).items():
                with open(file_path) as f:
                    ans[relpath] = f.read()
            return ans
        add_extra(1, 'one'), add_extra(1, 'sub/one')
        add_extra(2, 'one'), add_extra(2, 'sub/one'), add_extra(2, 'two/two')
        add_extra(3, 'one'), add_extra(3, 'sub/one'), add_extra(3, 'three')
        self.assertEqual(extra_files_for(1), {
            'one': '1:one', 'sub/one': '1:sub/one',
        })
        db.merge_extra_files(1, (2, 3))
        self.assertEqual(extra_files_for(1), {
            'one': '1:one', 'sub/one': '1:sub/one',
            'merge conflict/one': '2:one', 'sub/merge conflict/one': '2:sub/one', 'two/two': '2:two/two',
            'three': '3:three', 'merge conflict 1/one': '3:one', 'sub/merge conflict 1/one': '3:sub/one',
        })
    # }}}
--- a/src/calibre/gui2/actions/edit_metadata.py
+++ b/src/calibre/gui2/actions/edit_metadata.py
@ -656,6 +656,7 @@ class EditMetadataAction(InterfaceAction):
                return
            self.add_formats(dest_id, self.formats_for_books(rows))
            self.merge_metadata(dest_id, src_ids)
            self.merge_data_files(dest_id, src_ids)
            self.delete_books_after_merge(src_ids)
            # leave the selection highlight on first selected book
            dest_row = rows[0].row()
@ -667,6 +668,9 @@ class EditMetadataAction(InterfaceAction):
        self.gui.library_view.model().refresh_ids((dest_id,), cr)
        self.gui.library_view.horizontalScrollBar().setValue(hpos)
    def merge_data_files(self, dest_id, src_ids):
        self.gui.current_db.new_api.merge_extra_files(dest_id, src_ids)
    def add_formats(self, dest_id, src_books, replace=False):
        for src_book in src_books:
            if src_book: