From 50e347137ac8faa8a246fb2dfcd3a31b9e1e19a1 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 12 Nov 2014 16:09:28 +0530
Subject: [PATCH] Restore auto-merging in the refactored add books

---
 src/calibre/db/cache.py               | 12 +++++
 src/calibre/db/utils.py               | 19 ++++---
 src/calibre/ebooks/metadata/worker.py |  3 --
 src/calibre/gui2/actions/add.py       | 62 +++++++++++-----------
 src/calibre/gui2/add2.py              | 76 ++++++++++++++++++++-------
 5 files changed, 112 insertions(+), 60 deletions(-)

diff --git a/src/calibre/db/cache.py b/src/calibre/db/cache.py
index fe9800332b..a73e569605 100644
--- a/src/calibre/db/cache.py
+++ b/src/calibre/db/cache.py
@@ -1785,6 +1785,18 @@ class Cache(object):
             author_map[icu_lower(author)].add(aid)
         return (author_map, at.col_book_map.copy(), self.fields['title'].table.book_col_map.copy())
 
+    @read_api
+    def update_data_for_find_identical_books(self, book_id, data):
+        author_map, author_book_map, title_map = data
+        title_map[book_id] = self._field_for('title', book_id)
+        at = self.fields['authors'].table
+        for aid in at.book_col_map.get(book_id, ()):
+            author_map[icu_lower(at.id_map[aid])].add(aid)
+            try:
+                author_book_map[aid].add(book_id)
+            except KeyError:
+                author_book_map[aid] = {book_id}
+
     @read_api
     def find_identical_books(self, mi, search_restriction='', book_ids=None):
         ''' Finds books that have a superset of the authors in mi and the same
diff --git a/src/calibre/db/utils.py b/src/calibre/db/utils.py
index 629581f8e5..8de30b6eb1 100644
--- a/src/calibre/db/utils.py
+++ b/src/calibre/db/utils.py
@@ -55,15 +55,22 @@ def fuzzy_title(title):
 
 def find_identical_books(mi, data):
     author_map, aid_map, title_map = data
-    author_ids = set()
+    found_books = None
     for a in mi.authors:
-        author_ids |= author_map.get(icu_lower(a), set())
-    book_ids = set()
-    for aid in author_ids:
-        book_ids |= aid_map.get(aid, set())
+        author_ids = author_map.get(icu_lower(a))
+        if author_ids is None:
+            return set()
+        books_by_author = {book_id for aid in author_ids for book_id in aid_map.get(aid, ())}
+        if found_books is None:
+            found_books = books_by_author
+        else:
+            found_books &= books_by_author
+        if not found_books:
+            return set()
+
     ans = set()
     titleq = fuzzy_title(mi.title)
-    for book_id in book_ids:
+    for book_id in found_books:
         title = title_map.get(book_id, '')
         if fuzzy_title(title) == titleq:
             ans.add(book_id)
diff --git a/src/calibre/ebooks/metadata/worker.py b/src/calibre/ebooks/metadata/worker.py
index 27c4ff9d6e..a6161288d0 100644
--- a/src/calibre/ebooks/metadata/worker.py
+++ b/src/calibre/ebooks/metadata/worker.py
@@ -9,7 +9,6 @@ __docformat__ = 'restructuredtext en'
 import os, shutil, errno
 
 from calibre.customize.ui import run_plugins_on_import
-from calibre.db.utils import find_identical_books
 from calibre.ebooks.metadata.meta import metadata_from_formats
 from calibre.ebooks.metadata.opf2 import metadata_to_opf
 from calibre.utils.filenames import samefile
@@ -62,6 +61,4 @@ def read_metadata(paths, group_id, tdir, common_data=None):
     if common_data is not None:
         if isinstance(common_data, (set, frozenset)):
             duplicate_info = mi.title and icu_lower(mi.title) in common_data
-        else:
-            duplicate_info = find_identical_books(mi, common_data)
     return paths, opf, has_cover, duplicate_info
diff --git a/src/calibre/gui2/actions/add.py b/src/calibre/gui2/actions/add.py
index 08308d539f..7319a7ff7d 100644
--- a/src/calibre/gui2/actions/add.py
+++ b/src/calibre/gui2/actions/add.py
@@ -7,6 +7,7 @@ __docformat__ = 'restructuredtext en'
 
 import os
 from functools import partial
+from collections import defaultdict
 
 from PyQt5.Qt import QPixmap, QTimer
 
@@ -19,6 +20,7 @@ from calibre.gui2.dialogs.progress import ProgressDialog
 from calibre.gui2.widgets import IMAGE_EXTENSIONS
 from calibre.ebooks import BOOK_EXTENSIONS
 from calibre.utils.filenames import ascii_filename
+from calibre.utils.icu import sort_key
 from calibre.gui2.actions import InterfaceAction
 from calibre.gui2 import question_dialog
 from calibre.ebooks.metadata import MetaInformation
@@ -103,12 +105,13 @@ class AddAction(InterfaceAction):
                     _('Cannot add files as no books are selected'), show=True)
         ids = [view.model().id(r) for r in rows]
 
-        if len(ids) > 1 and not question_dialog(self.gui,
+        if len(ids) > 1 and not question_dialog(
+                self.gui,
                 _('Are you sure?'),
-            _('Are you sure you want to add the same'
-                ' files to all %d books? If the format'
-                ' already exists for a book, it will be replaced.')%len(ids)):
-                return
+                _('Are you sure you want to add the same'
+                  ' files to all %d books? If the format'
+                  ' already exists for a book, it will be replaced.')%len(ids)):
+            return
 
         books = choose_files(self.gui, 'add formats dialog dir',
                 _('Select book files'), filters=get_filters())
@@ -385,33 +388,30 @@ class AddAction(InterfaceAction):
                 self.gui.db_images.beginResetModel(), self.gui.db_images.endResetModel()
             self.gui.tags_view.recount()
 
-        # if getattr(self._adder, 'merged_books', False):
-        #     merged = defaultdict(list)
-        #     for title, author in self._adder.merged_books:
-        #         merged[author].append(title)
-        #     lines = []
-        #     for author in sorted(merged, key=sort_key):
-        #         lines.append(author)
-        #         for title in sorted(merged[author], key=sort_key):
-        #             lines.append('\t' + title)
-        #         lines.append('')
-        #     info_dialog(self.gui, _('Merged some books'),
-        #         _('The following %d duplicate books were found and incoming '
-        #             'book formats were processed and merged into your '
-        #             'Calibre database according to your automerge '
-        #             'settings:')%len(self._adder.merged_books),
-        #             det_msg='\n'.join(lines), show=True)
-        #
+        if adder.merged_books:
+            merged = defaultdict(list)
+            for title, author in adder.merged_books:
+                merged[author].append(title)
+            lines = []
+            for author in sorted(merged, key=sort_key):
+                lines.append(author)
+                for title in sorted(merged[author], key=sort_key):
+                    lines.append('\t' + title)
+                lines.append('')
+            info_dialog(self.gui, _('Merged some books'),
+                _('The following %d duplicate books were found and incoming '
+                    'book formats were processed and merged into your '
+                    'Calibre database according to your automerge '
+                    'settings:')%len(adder.merged_books),
+                    det_msg='\n'.join(lines), show=True)
 
-        # if getattr(self._adder, 'number_of_books_added', 0) > 0 or \
-        #         getattr(self._adder, 'merged_books', False):
-        #     # The formats of the current book could have changed if
-        #     # automerge is enabled
-        #     current_idx = self.gui.library_view.currentIndex()
-        #     if current_idx.isValid():
-        #         self.gui.library_view.model().current_changed(current_idx,
-        #                 current_idx)
-        #
+        if adder.number_of_books_added > 0 or adder.merged_books:
+            # The formats of the current book could have changed if
+            # automerge is enabled
+            current_idx = self.gui.library_view.currentIndex()
+            if current_idx.isValid():
+                self.gui.library_view.model().current_changed(current_idx,
+                        current_idx)
 
     def _add_from_device_adder(self, adder, on_card=None, model=None):
         self._files_added(adder, on_card=on_card)
diff --git a/src/calibre/gui2/add2.py b/src/calibre/gui2/add2.py
index e95df4a034..4bd7695c5d 100644
--- a/src/calibre/gui2/add2.py
+++ b/src/calibre/gui2/add2.py
@@ -17,9 +17,10 @@ from PyQt5.Qt import QObject, Qt, pyqtSignal
 from calibre import prints
 from calibre.customize.ui import run_plugins_on_postimport
 from calibre.db.adding import find_books_in_directory
+from calibre.db.utils import find_identical_books
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre.ebooks.metadata.opf2 import OPF
-from calibre.gui2 import error_dialog, warning_dialog
+from calibre.gui2 import error_dialog, warning_dialog, gprefs
 from calibre.gui2.dialogs.duplicates import DuplicatesQuestion
 from calibre.gui2.dialogs.progress import ProgressDialog
 from calibre.ptempfile import PersistentTemporaryDirectory
@@ -79,7 +80,8 @@ class Adder(QObject):
         self.report = []
         self.items = []
         self.added_book_ids = set()
-        self.added_duplicate_info = ({}, {}, {}) if self.add_formats_to_existing else set()
+        self.merged_books = set()
+        self.added_duplicate_info = set()
         self.pd.show()
 
         self.scan_thread = Thread(target=self.scan, name='ScanBooks')
@@ -98,7 +100,7 @@ class Adder(QObject):
         if not self.items:
             shutil.rmtree(self.tdir, ignore_errors=True)
         self.setParent(None)
-        self.added_duplicate_info = self.pool = self.items = self.duplicates = self.pd = self.db = self.dbref = self.tdir = self.file_groups = self.scan_thread = None  # noqa
+        self.find_identical_books_data = self.merged_books = self.added_duplicate_info = self.pool = self.items = self.duplicates = self.pd = self.db = self.dbref = self.tdir = self.file_groups = self.scan_thread = None  # noqa
         self.deleteLater()
 
     def tick(self):
@@ -188,14 +190,16 @@ class Adder(QObject):
         self.pd.value = 0
         self.pool = Pool(name='AddBooks') if self.pool is None else self.pool
         if self.db is not None:
-            data = self.db.data_for_find_identical_books() if self.add_formats_to_existing else self.db.data_for_has_book()
-            try:
-                self.pool.set_common_data(data)
-            except Failure as err:
-                error_dialog(self.pd, _('Cannot add books'), _(
-                'Failed to add any books, click "Show details" for more information.'),
-                det_msg=unicode(err.failure_message) + '\n' + unicode(err.details), show=True)
-                self.pd.canceled = True
+            if self.add_formats_to_existing:
+                self.find_identical_books_data = self.db.data_for_find_identical_books()
+            else:
+                try:
+                    self.pool.set_common_data(self.db.data_for_has_book())
+                except Failure as err:
+                    error_dialog(self.pd, _('Cannot add books'), _(
+                    'Failed to add any books, click "Show details" for more information.'),
+                    det_msg=unicode(err.failure_message) + '\n' + unicode(err.details), show=True)
+                    self.pd.canceled = True
         self.groups_to_add = iter(self.file_groups)
         self.do_one = self.do_one_group
         self.do_one_signal.emit()
@@ -302,13 +306,41 @@ class Adder(QObject):
             return
 
         if self.add_formats_to_existing:
-            pass  # TODO: Implement this
+            identical_book_ids = find_identical_books(mi, self.find_identical_books_data)
+            if identical_book_ids:
+                try:
+                    self.merge_books(mi, cover_path, paths, identical_book_ids)
+                except Exception:
+                    a = self.report.append
+                    a(''), a('-' * 70)
+                    a(_('Failed to merge the book: ') + mi.title)
+                    [a('\t' + f) for f in paths]
+                    a(_('With error:')), a(traceback.format_exc())
+            else:
+                self.add_book(mi, cover_path, paths)
         else:
             if duplicate_info or icu_lower(mi.title or _('Unknown')) in self.added_duplicate_info:
                 self.duplicates.append((mi, cover_path, paths))
             else:
                 self.add_book(mi, cover_path, paths)
 
+    def merge_books(self, mi, cover_path, paths, identical_book_ids):
+        self.merged_books.add((mi.title, ' & '.join(mi.authors)))
+        seen_fmts = set()
+        replace = gprefs['automerge'] == 'overwrite'
+        for identical_book_id in identical_book_ids:
+            ib_fmts = {fmt.upper() for fmt in self.db.formats(identical_book_id)}
+            seen_fmts |= ib_fmts
+            self.add_formats(identical_book_id, paths, mi, replace=replace)
+        if gprefs['automerge'] == 'new record':
+            incoming_fmts = {path.rpartition(os.extsep)[-1].upper() for path in paths}
+            if incoming_fmts.intersection(seen_fmts):
+                # There was at least one duplicate format so create a new
+                # record and put the incoming formats into it We should
+                # arguably put only the duplicate formats, but no real harm is
+                # done by having all formats
+                self.add_book(mi, cover_path, paths)
+
     def add_book(self, mi, cover_path, paths):
         try:
             cdata = None
@@ -324,20 +356,24 @@ class Adder(QObject):
             [a('\t' + f) for f in paths]
             a(_('With error:')), a(traceback.format_exc())
             return
-        else:
-            self.add_formats(book_id, paths, mi)
-        if self.add_formats_to_existing:
-            pass  # TODO: Implement this
-        else:
-            self.added_duplicate_info.add(icu_lower(mi.title or _('Unknown')))
+        self.add_formats(book_id, paths, mi)
+        try:
+            if self.add_formats_to_existing:
+                self.db.update_data_for_find_identical_books(book_id, self.find_identical_books_data)
+            else:
+                self.added_duplicate_info.add(icu_lower(mi.title or _('Unknown')))
+        except Exception:
+            # Ignore this exception since all it means is that duplicate
+            # detection/automerge will fail for this book.
+            traceback.print_exc()
 
-    def add_formats(self, book_id, paths, mi):
+    def add_formats(self, book_id, paths, mi, replace=True):
         fmap = {p.rpartition(os.path.extsep)[-1].lower():p for p in paths}
         for fmt, path in fmap.iteritems():
             # The onimport plugins have already been run by the read metadata
             # worker
             try:
-                if self.db.add_format(book_id, fmt, path, run_hooks=False):
+                if self.db.add_format(book_id, fmt, path, run_hooks=False, replace=replace):
                     run_plugins_on_postimport(self.dbref(), book_id, fmt)
             except Exception:
                 a = self.report.append