Copy to library: Speed up checking for duplicates when copying multiple books to a large library. Fixes #1593027 [Copy to library takes 30 times longer than import](https://bugs.launchpad.net/calibre/+bug/1593027)

This commit is contained in:
Kovid Goyal 2016-06-16 14:50:26 +05:30
parent 5a2c70e91e
commit 96e743c5b1

View File

@ -16,6 +16,7 @@ from PyQt5.Qt import (
QFormLayout, QCheckBox, QWidget, QScrollArea, QVBoxLayout, Qt, QListWidgetItem, QListWidget) QFormLayout, QCheckBox, QWidget, QScrollArea, QVBoxLayout, Qt, QListWidgetItem, QListWidget)
from calibre.constants import isosx from calibre.constants import isosx
from calibre.db.utils import find_identical_books
from calibre.gui2.actions import InterfaceAction from calibre.gui2.actions import InterfaceAction
from calibre.gui2 import (error_dialog, Dispatcher, warning_dialog, gprefs, from calibre.gui2 import (error_dialog, Dispatcher, warning_dialog, gprefs,
info_dialog, choose_dir) info_dialog, choose_dir)
@ -105,6 +106,7 @@ class Worker(Thread): # {{{
self.auto_merged_ids = {} self.auto_merged_ids = {}
self.add_duplicates = add_duplicates self.add_duplicates = add_duplicates
self.duplicate_ids = {} self.duplicate_ids = {}
self.check_for_duplicates = not add_duplicates and (prefs['add_formats_to_existing'] or prefs['check_for_dupes_on_ctl'])
def run(self): def run(self):
try: try:
@ -130,6 +132,8 @@ class Worker(Thread): # {{{
from calibre.db.legacy import LibraryDatabase from calibre.db.legacy import LibraryDatabase
newdb = LibraryDatabase(self.loc, is_second_db=True) newdb = LibraryDatabase(self.loc, is_second_db=True)
with closing(newdb): with closing(newdb):
if self.check_for_duplicates:
self.find_identical_books_data = newdb.new_api.data_for_find_identical_books()
self._doit(newdb) self._doit(newdb)
newdb.break_cycles() newdb.break_cycles()
del newdb del newdb
@ -154,11 +158,10 @@ class Worker(Thread): # {{{
if p: if p:
paths.append(p) paths.append(p)
try: try:
if not self.add_duplicates: if self.check_for_duplicates:
if prefs['add_formats_to_existing'] or prefs['check_for_dupes_on_ctl']: # Scanning for dupes can be slow on a large library so
# Scanning for dupes can be slow on a large library so # only do it if the option is set
# only do it if the option is set identical_book_list = find_identical_books(mi, self.find_identical_books_data)
identical_book_list = newdb.find_identical_books(mi)
if identical_book_list: # books with same author and nearly same title exist in newdb if identical_book_list: # books with same author and nearly same title exist in newdb
if prefs['add_formats_to_existing']: if prefs['add_formats_to_existing']:
self.automerge_book(x, mi, identical_book_list, paths, newdb) self.automerge_book(x, mi, identical_book_list, paths, newdb)
@ -193,6 +196,8 @@ class Worker(Thread): # {{{
co = self.db.conversion_options(x, 'PIPE') co = self.db.conversion_options(x, 'PIPE')
if co is not None: if co is not None:
newdb.set_conversion_options(new_book_id, 'PIPE', co) newdb.set_conversion_options(new_book_id, 'PIPE', co)
if self.check_for_duplicates:
newdb.new_api.update_data_for_find_identical_books(new_book_id, self.find_identical_books_data)
self.processed.add(x) self.processed.add(x)
finally: finally:
for path in paths: for path in paths: