Option to check for duplicates while copying between libraries

Add an option to check for duplicates (books with the same title/author)
when copying between libraries. Option is under Preferences->Adding
books. Fixes #1245089 [When copying or moving book to different library, Calibre does not warn if possible duplicate exists in target library.](https://bugs.launchpad.net/calibre/+bug/1245089)
This commit is contained in:
Kovid Goyal 2013-10-27 17:58:57 +05:30
parent cbf3dd41c7
commit 4992f3fb37
4 changed files with 245 additions and 168 deletions

View File

@ -11,8 +11,9 @@ from threading import Thread
from contextlib import closing
from collections import defaultdict
from PyQt4.Qt import (QToolButton, QDialog, QGridLayout, QIcon, QLabel, QDialogButtonBox,
QFormLayout, QCheckBox, QWidget, QScrollArea, QVBoxLayout)
from PyQt4.Qt import (
QToolButton, QDialog, QGridLayout, QIcon, QLabel, QDialogButtonBox,
QFormLayout, QCheckBox, QWidget, QScrollArea, QVBoxLayout, Qt, QListWidgetItem, QListWidget)
from calibre.gui2.actions import InterfaceAction
from calibre.gui2 import (error_dialog, Dispatcher, warning_dialog, gprefs,
@ -90,10 +91,10 @@ def ask_about_cc_mismatch(gui, db, newdb, missing_cols, incompatible_cols): # {
class Worker(Thread): # {{{
def __init__(self, ids, db, loc, progress, done, delete_after):
def __init__(self, ids, db, loc, progress, done, delete_after, add_duplicates):
Thread.__init__(self)
self.ids = ids
self.processed = set([])
self.processed = set()
self.db = db
self.loc = loc
self.error = None
@ -101,6 +102,8 @@ class Worker(Thread): # {{{
self.done = done
self.delete_after = delete_after
self.auto_merged_ids = {}
self.add_duplicates = add_duplicates
self.duplicate_ids = {}
def run(self):
try:
@ -142,20 +145,44 @@ class Worker(Thread): # {{{
fmts = []
else:
fmts = fmts.split(',')
identical_book_list = set()
paths = []
for fmt in fmts:
p = self.db.format(x, fmt, index_is_id=True,
as_path=True)
if p:
paths.append(p)
automerged = False
if prefs['add_formats_to_existing']:
try:
if not self.add_duplicates:
if prefs['add_formats_to_existing'] or prefs['check_for_dupes_on_ctl']:
# Scanning for dupes can be slow on a large library so
# only do it if the option is set
identical_book_list = newdb.find_identical_books(mi)
if identical_book_list: # books with same author and nearly same title exist in newdb
self.auto_merged_ids[x] = _('%(title)s by %(author)s')%\
dict(title=mi.title, author=mi.format_field('authors')[1])
automerged = True
if prefs['add_formats_to_existing']:
self.automerge_book(x, mi, identical_book_list, paths, newdb)
else: # Report duplicates for later processing
self.duplicate_ids[x] = (mi.title, mi.authors)
continue
newdb.import_book(mi, paths, notify=False, import_hooks=False,
apply_import_tags=tweaks['add_new_book_tags_when_importing_books'],
preserve_uuid=self.delete_after)
co = self.db.conversion_options(x, 'PIPE')
if co is not None:
newdb.set_conversion_options(x, 'PIPE', co)
self.processed.add(x)
finally:
for path in paths:
try:
os.remove(path)
except:
pass
def automerge_book(self, book_id, mi, identical_book_list, paths, newdb):
self.auto_merged_ids[book_id] = _('%(title)s by %(author)s') % dict(title=mi.title, author=mi.format_field('authors')[1])
seen_fmts = set()
self.processed.add(book_id)
for identical_book in identical_book_list:
ib_fmts = newdb.formats(identical_book, index_is_id=True)
if ib_fmts:
@ -180,19 +207,6 @@ class Worker(Thread): # {{{
apply_import_tags=tweaks['add_new_book_tags_when_importing_books'],
preserve_uuid=False)
if not automerged:
newdb.import_book(mi, paths, notify=False, import_hooks=False,
apply_import_tags=tweaks['add_new_book_tags_when_importing_books'],
preserve_uuid=self.delete_after)
co = self.db.conversion_options(x, 'PIPE')
if co is not None:
newdb.set_conversion_options(x, 'PIPE', co)
self.processed.add(x)
for path in paths:
try:
os.remove(path)
except:
pass
# }}}
@ -242,6 +256,51 @@ class ChooseLibrary(QDialog): # {{{
return (unicode(self.le.text()), self.delete_after_copy)
# }}}
class DuplicatesQuestion(QDialog): # {{{
def __init__(self, parent, duplicates, loc):
QDialog.__init__(self, parent)
l = QVBoxLayout()
self.setLayout(l)
self.la = la = QLabel(_('Books with the same title and author as the following already exist in the library %s.'
' Select which books you want copied anyway.') %
os.path.basename(loc))
la.setWordWrap(True)
l.addWidget(la)
self.setWindowTitle(_('Duplicate books'))
self.books = QListWidget(self)
self.items = []
for book_id, (title, authors) in duplicates.iteritems():
i = QListWidgetItem(_('%s by %s') % (title, ' & '.join(authors[:3])), self.books)
i.setData(Qt.UserRole, book_id)
i.setFlags(Qt.ItemIsUserCheckable | Qt.ItemIsEnabled)
i.setCheckState(Qt.Checked)
self.items.append(i)
l.addWidget(self.books)
self.bb = bb = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel)
bb.accepted.connect(self.accept)
bb.rejected.connect(self.reject)
self.a = b = bb.addButton(_('Select &all'), bb.ActionRole)
b.clicked.connect(self.select_all)
self.n = b = bb.addButton(_('Select &none'), bb.ActionRole)
b.clicked.connect(self.select_none)
l.addWidget(bb)
self.resize(600, 400)
def select_all(self):
for i in self.items:
i.setCheckState(Qt.Checked)
def select_none(self):
for i in self.items:
i.setCheckState(Qt.Unchecked)
@property
def ids(self):
return {i.data(Qt.UserRole).toInt()[0] for i in self.items if i.checkState() == Qt.Checked}
# }}}
# Static session-long set of pairs of libraries that have had their custom columns
# checked for compatibility
libraries_with_checked_columns = defaultdict(set)
@ -323,20 +382,9 @@ class CopyToLibraryAction(InterfaceAction):
return error_dialog(self.gui, _('No library'),
_('No library found at %s')%loc, show=True)
aname = _('Moving to') if delete_after else _('Copying to')
dtitle = '%s %s'%(aname, os.path.basename(loc))
self.pd = ProgressDialog(dtitle, min=0, max=len(ids)-1,
parent=self.gui, cancelable=False)
def progress(idx, title):
self.pd.set_msg(title)
self.pd.set_value(idx)
# Open the new db so we can check the custom columns. We use only the
# backend since we only need the custom column definitions, not the
# rest of the data in the db.
global libraries_with_checked_columns
from calibre.db.legacy import create_backend
@ -367,9 +415,26 @@ class CopyToLibraryAction(InterfaceAction):
del newdb
if not continue_processing:
return
duplicate_ids = self.do_copy(ids, db, loc, delete_after, False)
if duplicate_ids:
d = DuplicatesQuestion(self.gui, duplicate_ids, loc)
if d.exec_() == d.Accepted:
ids = d.ids
if ids:
self.do_copy(list(ids), db, loc, delete_after, add_duplicates=True)
def do_copy(self, ids, db, loc, delete_after, add_duplicates=False):
aname = _('Moving to') if delete_after else _('Copying to')
dtitle = '%s %s'%(aname, os.path.basename(loc))
self.pd = ProgressDialog(dtitle, min=0, max=len(ids)-1,
parent=self.gui, cancelable=False)
def progress(idx, title):
self.pd.set_msg(title)
self.pd.set_value(idx)
self.worker = Worker(ids, db, loc, Dispatcher(progress),
Dispatcher(self.pd.accept), delete_after)
Dispatcher(self.pd.accept), delete_after, add_duplicates)
self.worker.start()
self.pd.exec_()
@ -382,7 +447,8 @@ class CopyToLibraryAction(InterfaceAction):
e, tb = self.worker.error
error_dialog(self.gui, _('Failed'), _('Could not copy books: ') + e,
det_msg=tb, show=True)
else:
return
self.gui.status_bar.show_message(donemsg %
dict(num=len(ids), loc=loc), 2000)
if self.worker.auto_merged_ids:
@ -405,6 +471,7 @@ class CopyToLibraryAction(InterfaceAction):
permanent=True)
self.gui.iactions['Remove Books'].library_ids_deleted(
self.worker.processed, row)
return self.worker.duplicate_ids
def cannot_do_dialog(self):
warning_dialog(self.gui, _('Not allowed'),

View File

@ -27,6 +27,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
r('read_file_metadata', prefs)
r('swap_author_names', prefs)
r('add_formats_to_existing', prefs)
r('check_for_dupes_on_ctl', prefs)
r('preserve_date_on_ctl', gprefs)
r('manual_add_auto_convert', gprefs)
choices = [

View File

@ -24,21 +24,87 @@
<string>The Add &amp;Process</string>
</attribute>
<layout class="QGridLayout" name="gridLayout_2">
<item row="4" column="0" colspan="2">
<widget class="QCheckBox" name="opt_add_formats_to_existing">
<item row="1" column="1" colspan="2">
<layout class="QHBoxLayout" name="horizontalLayout">
<item>
<spacer name="horizontalSpacer">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>40</width>
<height>20</height>
</size>
</property>
</spacer>
</item>
<item>
<widget class="QCheckBox" name="opt_swap_author_names">
<property name="toolTip">
<string>Automerge: If books with similar titles and authors found, merge the incoming formats automatically into
existing book records. The box to the right controls what happens when an existing record already has
the incoming format. Note that this option also affects the Copy to library action.
Title match ignores leading indefinite articles (&quot;the&quot;, &quot;a&quot;, &quot;an&quot;), punctuation, case, etc. Author match is exact.</string>
<string>Swap the firstname and lastname of the author. This affects only metadata read from file names.</string>
</property>
<property name="text">
<string>&amp;Automerge added books if they already exist in the calibre library:</string>
<string>&amp;Swap author firstname and lastname</string>
</property>
</widget>
</item>
<item row="4" column="2">
</layout>
</item>
<item row="6" column="0">
<widget class="QLabel" name="label_230">
<property name="text">
<string>&amp;Tags to apply when adding a book:</string>
</property>
<property name="buddy">
<cstring>opt_new_book_tags</cstring>
</property>
</widget>
</item>
<item row="6" column="2">
<widget class="QLineEdit" name="opt_new_book_tags">
<property name="toolTip">
<string>A comma-separated list of tags that will be applied to books added to the library</string>
</property>
</widget>
</item>
<item row="7" column="0" colspan="3">
<widget class="QGroupBox" name="metadata_box">
<property name="title">
<string>&amp;Configure metadata from file name</string>
</property>
<layout class="QVBoxLayout" name="verticalLayout">
<item>
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>363</height>
</size>
</property>
</spacer>
</item>
</layout>
</widget>
</item>
<item row="2" column="0" colspan="3">
<widget class="QCheckBox" name="opt_preserve_date_on_ctl">
<property name="text">
<string>When using the &quot;&amp;Copy to library&quot; action to copy books between libraries, preserve the date</string>
</property>
</widget>
</item>
<item row="4" column="0" colspan="2">
<widget class="QCheckBox" name="opt_manual_add_auto_convert">
<property name="text">
<string>Automatically &amp;convert added books to the current output format</string>
</property>
</widget>
</item>
<item row="5" column="2">
<widget class="QComboBox" name="opt_automerge">
<property name="toolTip">
<string>Automerge: If books with similar titles and authors found, merge the incoming formats automatically into
@ -71,93 +137,34 @@ Author matching is exact.</string>
</property>
</widget>
</item>
<item row="1" column="1" colspan="2">
<layout class="QHBoxLayout" name="horizontalLayout">
<item>
<spacer name="horizontalSpacer">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>40</width>
<height>20</height>
</size>
</property>
</spacer>
</item>
<item>
<widget class="QCheckBox" name="opt_swap_author_names">
<property name="toolTip">
<string>Swap the firstname and lastname of the author. This affects only metadata read from file names.</string>
</property>
<property name="text">
<string>&amp;Swap author firstname and lastname</string>
</property>
</widget>
</item>
</layout>
</item>
<item row="5" column="0">
<widget class="QLabel" name="label_230">
<property name="text">
<string>&amp;Tags to apply when adding a book:</string>
</property>
<property name="buddy">
<cstring>opt_new_book_tags</cstring>
</property>
</widget>
</item>
<item row="5" column="2">
<widget class="QLineEdit" name="opt_new_book_tags">
<property name="toolTip">
<string>A comma-separated list of tags that will be applied to books added to the library</string>
</property>
</widget>
</item>
<item row="6" column="0" colspan="3">
<widget class="QGroupBox" name="metadata_box">
<property name="title">
<string>&amp;Configure metadata from file name</string>
</property>
<layout class="QVBoxLayout" name="verticalLayout">
<item>
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>363</height>
</size>
</property>
</spacer>
</item>
</layout>
</widget>
</item>
<item row="2" column="0" colspan="3">
<widget class="QCheckBox" name="opt_preserve_date_on_ctl">
<property name="text">
<string>When using the &quot;&amp;Copy to library&quot; action to copy books between libraries, preserve the date</string>
</property>
</widget>
</item>
<item row="3" column="0" colspan="2">
<widget class="QCheckBox" name="opt_manual_add_auto_convert">
<property name="text">
<string>Automatically &amp;convert added books to the current output format</string>
</property>
</widget>
</item>
<item row="3" column="2">
<item row="4" column="2">
<widget class="QCheckBox" name="opt_mark_new_books">
<property name="text">
<string>&amp;Mark newly added books</string>
</property>
</widget>
</item>
<item row="5" column="0" colspan="2">
<widget class="QCheckBox" name="opt_add_formats_to_existing">
<property name="toolTip">
<string>Automerge: If books with similar titles and authors found, merge the incoming formats automatically into
existing book records. The box to the right controls what happens when an existing record already has
the incoming format. Note that this option also affects the Copy to library action.
Title match ignores leading indefinite articles (&quot;the&quot;, &quot;a&quot;, &quot;an&quot;), punctuation, case, etc. Author match is exact.</string>
</property>
<property name="text">
<string>&amp;Automerge added books if they already exist in the calibre library:</string>
</property>
</widget>
</item>
<item row="3" column="0" colspan="3">
<widget class="QCheckBox" name="opt_check_for_dupes_on_ctl">
<property name="text">
<string>When using the &quot;Copy to Library&quot; action check for &amp;duplicates with the same title and author</string>
</property>
</widget>
</item>
</layout>
</widget>
<widget class="QWidget" name="tab_4">

View File

@ -398,6 +398,8 @@ def _prefs():
help=_('Swap author first and last names when reading metadata'))
c.add_opt('add_formats_to_existing', default=False,
help=_('Add new formats to existing book records'))
c.add_opt('check_for_dupes_on_ctl', default=False,
help=_('Check for duplicates when copying to another library'))
c.add_opt('installation_uuid', default=None, help='Installation UUID')
c.add_opt('new_book_tags', default=[], help=_('Tags to apply to books added to the library'))
c.add_opt('mark_new_books', default=False, help=_(