From 50e347137ac8faa8a246fb2dfcd3a31b9e1e19a1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 12 Nov 2014 16:09:28 +0530 Subject: [PATCH] Restore auto-merging in the refactored add books --- src/calibre/db/cache.py | 12 +++++ src/calibre/db/utils.py | 19 ++++--- src/calibre/ebooks/metadata/worker.py | 3 -- src/calibre/gui2/actions/add.py | 62 +++++++++++----------- src/calibre/gui2/add2.py | 76 ++++++++++++++++++++------- 5 files changed, 112 insertions(+), 60 deletions(-) diff --git a/src/calibre/db/cache.py b/src/calibre/db/cache.py index fe9800332b..a73e569605 100644 --- a/src/calibre/db/cache.py +++ b/src/calibre/db/cache.py @@ -1785,6 +1785,18 @@ class Cache(object): author_map[icu_lower(author)].add(aid) return (author_map, at.col_book_map.copy(), self.fields['title'].table.book_col_map.copy()) + @read_api + def update_data_for_find_identical_books(self, book_id, data): + author_map, author_book_map, title_map = data + title_map[book_id] = self._field_for('title', book_id) + at = self.fields['authors'].table + for aid in at.book_col_map.get(book_id, ()): + author_map[icu_lower(at.id_map[aid])].add(aid) + try: + author_book_map[aid].add(book_id) + except KeyError: + author_book_map[aid] = {book_id} + @read_api def find_identical_books(self, mi, search_restriction='', book_ids=None): ''' Finds books that have a superset of the authors in mi and the same diff --git a/src/calibre/db/utils.py b/src/calibre/db/utils.py index 629581f8e5..8de30b6eb1 100644 --- a/src/calibre/db/utils.py +++ b/src/calibre/db/utils.py @@ -55,15 +55,22 @@ def fuzzy_title(title): def find_identical_books(mi, data): author_map, aid_map, title_map = data - author_ids = set() + found_books = None for a in mi.authors: - author_ids |= author_map.get(icu_lower(a), set()) - book_ids = set() - for aid in author_ids: - book_ids |= aid_map.get(aid, set()) + author_ids = author_map.get(icu_lower(a)) + if author_ids is None: + return set() + books_by_author = {book_id for aid in author_ids for book_id in aid_map.get(aid, ())} + if found_books is None: + found_books = books_by_author + else: + found_books &= books_by_author + if not found_books: + return set() + ans = set() titleq = fuzzy_title(mi.title) - for book_id in book_ids: + for book_id in found_books: title = title_map.get(book_id, '') if fuzzy_title(title) == titleq: ans.add(book_id) diff --git a/src/calibre/ebooks/metadata/worker.py b/src/calibre/ebooks/metadata/worker.py index 27c4ff9d6e..a6161288d0 100644 --- a/src/calibre/ebooks/metadata/worker.py +++ b/src/calibre/ebooks/metadata/worker.py @@ -9,7 +9,6 @@ __docformat__ = 'restructuredtext en' import os, shutil, errno from calibre.customize.ui import run_plugins_on_import -from calibre.db.utils import find_identical_books from calibre.ebooks.metadata.meta import metadata_from_formats from calibre.ebooks.metadata.opf2 import metadata_to_opf from calibre.utils.filenames import samefile @@ -62,6 +61,4 @@ def read_metadata(paths, group_id, tdir, common_data=None): if common_data is not None: if isinstance(common_data, (set, frozenset)): duplicate_info = mi.title and icu_lower(mi.title) in common_data - else: - duplicate_info = find_identical_books(mi, common_data) return paths, opf, has_cover, duplicate_info diff --git a/src/calibre/gui2/actions/add.py b/src/calibre/gui2/actions/add.py index 08308d539f..7319a7ff7d 100644 --- a/src/calibre/gui2/actions/add.py +++ b/src/calibre/gui2/actions/add.py @@ -7,6 +7,7 @@ __docformat__ = 'restructuredtext en' import os from functools import partial +from collections import defaultdict from PyQt5.Qt import QPixmap, QTimer @@ -19,6 +20,7 @@ from calibre.gui2.dialogs.progress import ProgressDialog from calibre.gui2.widgets import IMAGE_EXTENSIONS from calibre.ebooks import BOOK_EXTENSIONS from calibre.utils.filenames import ascii_filename +from calibre.utils.icu import sort_key from calibre.gui2.actions import InterfaceAction from calibre.gui2 import question_dialog from calibre.ebooks.metadata import MetaInformation @@ -103,12 +105,13 @@ class AddAction(InterfaceAction): _('Cannot add files as no books are selected'), show=True) ids = [view.model().id(r) for r in rows] - if len(ids) > 1 and not question_dialog(self.gui, + if len(ids) > 1 and not question_dialog( + self.gui, _('Are you sure?'), - _('Are you sure you want to add the same' - ' files to all %d books? If the format' - ' already exists for a book, it will be replaced.')%len(ids)): - return + _('Are you sure you want to add the same' + ' files to all %d books? If the format' + ' already exists for a book, it will be replaced.')%len(ids)): + return books = choose_files(self.gui, 'add formats dialog dir', _('Select book files'), filters=get_filters()) @@ -385,33 +388,30 @@ class AddAction(InterfaceAction): self.gui.db_images.beginResetModel(), self.gui.db_images.endResetModel() self.gui.tags_view.recount() - # if getattr(self._adder, 'merged_books', False): - # merged = defaultdict(list) - # for title, author in self._adder.merged_books: - # merged[author].append(title) - # lines = [] - # for author in sorted(merged, key=sort_key): - # lines.append(author) - # for title in sorted(merged[author], key=sort_key): - # lines.append('\t' + title) - # lines.append('') - # info_dialog(self.gui, _('Merged some books'), - # _('The following %d duplicate books were found and incoming ' - # 'book formats were processed and merged into your ' - # 'Calibre database according to your automerge ' - # 'settings:')%len(self._adder.merged_books), - # det_msg='\n'.join(lines), show=True) - # + if adder.merged_books: + merged = defaultdict(list) + for title, author in adder.merged_books: + merged[author].append(title) + lines = [] + for author in sorted(merged, key=sort_key): + lines.append(author) + for title in sorted(merged[author], key=sort_key): + lines.append('\t' + title) + lines.append('') + info_dialog(self.gui, _('Merged some books'), + _('The following %d duplicate books were found and incoming ' + 'book formats were processed and merged into your ' + 'Calibre database according to your automerge ' + 'settings:')%len(adder.merged_books), + det_msg='\n'.join(lines), show=True) - # if getattr(self._adder, 'number_of_books_added', 0) > 0 or \ - # getattr(self._adder, 'merged_books', False): - # # The formats of the current book could have changed if - # # automerge is enabled - # current_idx = self.gui.library_view.currentIndex() - # if current_idx.isValid(): - # self.gui.library_view.model().current_changed(current_idx, - # current_idx) - # + if adder.number_of_books_added > 0 or adder.merged_books: + # The formats of the current book could have changed if + # automerge is enabled + current_idx = self.gui.library_view.currentIndex() + if current_idx.isValid(): + self.gui.library_view.model().current_changed(current_idx, + current_idx) def _add_from_device_adder(self, adder, on_card=None, model=None): self._files_added(adder, on_card=on_card) diff --git a/src/calibre/gui2/add2.py b/src/calibre/gui2/add2.py index e95df4a034..4bd7695c5d 100644 --- a/src/calibre/gui2/add2.py +++ b/src/calibre/gui2/add2.py @@ -17,9 +17,10 @@ from PyQt5.Qt import QObject, Qt, pyqtSignal from calibre import prints from calibre.customize.ui import run_plugins_on_postimport from calibre.db.adding import find_books_in_directory +from calibre.db.utils import find_identical_books from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.opf2 import OPF -from calibre.gui2 import error_dialog, warning_dialog +from calibre.gui2 import error_dialog, warning_dialog, gprefs from calibre.gui2.dialogs.duplicates import DuplicatesQuestion from calibre.gui2.dialogs.progress import ProgressDialog from calibre.ptempfile import PersistentTemporaryDirectory @@ -79,7 +80,8 @@ class Adder(QObject): self.report = [] self.items = [] self.added_book_ids = set() - self.added_duplicate_info = ({}, {}, {}) if self.add_formats_to_existing else set() + self.merged_books = set() + self.added_duplicate_info = set() self.pd.show() self.scan_thread = Thread(target=self.scan, name='ScanBooks') @@ -98,7 +100,7 @@ class Adder(QObject): if not self.items: shutil.rmtree(self.tdir, ignore_errors=True) self.setParent(None) - self.added_duplicate_info = self.pool = self.items = self.duplicates = self.pd = self.db = self.dbref = self.tdir = self.file_groups = self.scan_thread = None # noqa + self.find_identical_books_data = self.merged_books = self.added_duplicate_info = self.pool = self.items = self.duplicates = self.pd = self.db = self.dbref = self.tdir = self.file_groups = self.scan_thread = None # noqa self.deleteLater() def tick(self): @@ -188,14 +190,16 @@ class Adder(QObject): self.pd.value = 0 self.pool = Pool(name='AddBooks') if self.pool is None else self.pool if self.db is not None: - data = self.db.data_for_find_identical_books() if self.add_formats_to_existing else self.db.data_for_has_book() - try: - self.pool.set_common_data(data) - except Failure as err: - error_dialog(self.pd, _('Cannot add books'), _( - 'Failed to add any books, click "Show details" for more information.'), - det_msg=unicode(err.failure_message) + '\n' + unicode(err.details), show=True) - self.pd.canceled = True + if self.add_formats_to_existing: + self.find_identical_books_data = self.db.data_for_find_identical_books() + else: + try: + self.pool.set_common_data(self.db.data_for_has_book()) + except Failure as err: + error_dialog(self.pd, _('Cannot add books'), _( + 'Failed to add any books, click "Show details" for more information.'), + det_msg=unicode(err.failure_message) + '\n' + unicode(err.details), show=True) + self.pd.canceled = True self.groups_to_add = iter(self.file_groups) self.do_one = self.do_one_group self.do_one_signal.emit() @@ -302,13 +306,41 @@ class Adder(QObject): return if self.add_formats_to_existing: - pass # TODO: Implement this + identical_book_ids = find_identical_books(mi, self.find_identical_books_data) + if identical_book_ids: + try: + self.merge_books(mi, cover_path, paths, identical_book_ids) + except Exception: + a = self.report.append + a(''), a('-' * 70) + a(_('Failed to merge the book: ') + mi.title) + [a('\t' + f) for f in paths] + a(_('With error:')), a(traceback.format_exc()) + else: + self.add_book(mi, cover_path, paths) else: if duplicate_info or icu_lower(mi.title or _('Unknown')) in self.added_duplicate_info: self.duplicates.append((mi, cover_path, paths)) else: self.add_book(mi, cover_path, paths) + def merge_books(self, mi, cover_path, paths, identical_book_ids): + self.merged_books.add((mi.title, ' & '.join(mi.authors))) + seen_fmts = set() + replace = gprefs['automerge'] == 'overwrite' + for identical_book_id in identical_book_ids: + ib_fmts = {fmt.upper() for fmt in self.db.formats(identical_book_id)} + seen_fmts |= ib_fmts + self.add_formats(identical_book_id, paths, mi, replace=replace) + if gprefs['automerge'] == 'new record': + incoming_fmts = {path.rpartition(os.extsep)[-1].upper() for path in paths} + if incoming_fmts.intersection(seen_fmts): + # There was at least one duplicate format so create a new + # record and put the incoming formats into it We should + # arguably put only the duplicate formats, but no real harm is + # done by having all formats + self.add_book(mi, cover_path, paths) + def add_book(self, mi, cover_path, paths): try: cdata = None @@ -324,20 +356,24 @@ class Adder(QObject): [a('\t' + f) for f in paths] a(_('With error:')), a(traceback.format_exc()) return - else: - self.add_formats(book_id, paths, mi) - if self.add_formats_to_existing: - pass # TODO: Implement this - else: - self.added_duplicate_info.add(icu_lower(mi.title or _('Unknown'))) + self.add_formats(book_id, paths, mi) + try: + if self.add_formats_to_existing: + self.db.update_data_for_find_identical_books(book_id, self.find_identical_books_data) + else: + self.added_duplicate_info.add(icu_lower(mi.title or _('Unknown'))) + except Exception: + # Ignore this exception since all it means is that duplicate + # detection/automerge will fail for this book. + traceback.print_exc() - def add_formats(self, book_id, paths, mi): + def add_formats(self, book_id, paths, mi, replace=True): fmap = {p.rpartition(os.path.extsep)[-1].lower():p for p in paths} for fmt, path in fmap.iteritems(): # The onimport plugins have already been run by the read metadata # worker try: - if self.db.add_format(book_id, fmt, path, run_hooks=False): + if self.db.add_format(book_id, fmt, path, run_hooks=False, replace=replace): run_plugins_on_postimport(self.dbref(), book_id, fmt) except Exception: a = self.report.append