Copy to library: Speed up checking for duplicates when copying multiple books to a large library. Fixes #1593027 [Copy to library takes 30 times longer than import](https://bugs.launchpad.net/calibre/+bug/1593027)

2025-07-08 10:44:09 -04:00 · 2016-06-16 14:50:26 +05:30 · 2016-06-16 14:50:26 +05:30 · 96e743c5b1
commit 96e743c5b1
parent 5a2c70e91e
1 changed files with 10 additions and 5 deletions
--- a/src/calibre/gui2/actions/copy_to_library.py
+++ b/src/calibre/gui2/actions/copy_to_library.py
@ -16,6 +16,7 @@ from PyQt5.Qt import (
    QFormLayout, QCheckBox, QWidget, QScrollArea, QVBoxLayout, Qt, QListWidgetItem, QListWidget)
 from calibre.constants import isosx
 from calibre.db.utils import find_identical_books
 from calibre.gui2.actions import InterfaceAction
 from calibre.gui2 import (error_dialog, Dispatcher, warning_dialog, gprefs,
        info_dialog, choose_dir)
@ -105,6 +106,7 @@ class Worker(Thread):  # {{{
        self.auto_merged_ids = {}
        self.add_duplicates = add_duplicates
        self.duplicate_ids = {}
        self.check_for_duplicates = not add_duplicates and (prefs['add_formats_to_existing'] or prefs['check_for_dupes_on_ctl'])
    def run(self):
        try:
@ -130,6 +132,8 @@ class Worker(Thread):  # {{{
        from calibre.db.legacy import LibraryDatabase
        newdb = LibraryDatabase(self.loc, is_second_db=True)
        with closing(newdb):
            if self.check_for_duplicates:
                self.find_identical_books_data = newdb.new_api.data_for_find_identical_books()
            self._doit(newdb)
        newdb.break_cycles()
        del newdb
@ -154,11 +158,10 @@ class Worker(Thread):  # {{{
                if p:
                    paths.append(p)
            try:
-                if not self.add_duplicates:
+                if self.check_for_duplicates:
-                    if prefs['add_formats_to_existing'] or prefs['check_for_dupes_on_ctl']:
+                    # Scanning for dupes can be slow on a large library so
-                        # Scanning for dupes can be slow on a large library so
+                    # only do it if the option is set
-                        # only do it if the option is set
+                    identical_book_list = find_identical_books(mi, self.find_identical_books_data)
                        identical_book_list = newdb.find_identical_books(mi)
                    if identical_book_list:  # books with same author and nearly same title exist in newdb
                        if prefs['add_formats_to_existing']:
                            self.automerge_book(x, mi, identical_book_list, paths, newdb)
@ -193,6 +196,8 @@ class Worker(Thread):  # {{{
                co = self.db.conversion_options(x, 'PIPE')
                if co is not None:
                    newdb.set_conversion_options(new_book_id, 'PIPE', co)
                if self.check_for_duplicates:
                    newdb.new_api.update_data_for_find_identical_books(new_book_id, self.find_identical_books_data)
                self.processed.add(x)
            finally:
                for path in paths: