From 73ef5b9991b89f857aeb1f477d7399dfe24be9bf Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 17 Apr 2011 14:20:34 -0600 Subject: [PATCH] Add merging of identify results if they have a common identifier. Turned off for now --- .../ebooks/metadata/sources/identify.py | 46 ++++++++++++++++++- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py index 82e9a14d12..1fb1a74679 100644 --- a/src/calibre/ebooks/metadata/sources/identify.py +++ b/src/calibre/ebooks/metadata/sources/identify.py @@ -114,8 +114,12 @@ class ISBNMerge(object): return self.results - def merge_metadata_results(self): - ' Merge results with identical title and authors ' + def merge_metadata_results(self, merge_on_identifiers=False): + ''' + Merge results with identical title and authors or an identical + identifier + ''' + # First title/author groups = {} for result in self.results: title = lower(result.title if result.title else '') @@ -135,6 +139,44 @@ class ISBNMerge(object): result = rgroup[0] self.results.append(result) + if merge_on_identifiers: + # Now identifiers + groups, empty = {}, [] + for result in self.results: + key = set() + for typ, val in result.identifiers.iteritems(): + if typ and val: + key.add((typ, val)) + if key: + key = frozenset(key) + match = None + for candidate in list(groups): + if candidate.intersection(key): + # We have at least one identifier in common + match = candidate.union(key) + results = groups.pop(candidate) + results.append(result) + groups[match] = results + break + if match is None: + groups[key] = [result] + else: + empty.append(result) + + if len(groups) != len(self.results): + self.results = [] + for rgroup in groups.itervalues(): + rel = [r.average_source_relevance for r in rgroup] + if len(rgroup) > 1: + result = self.merge(rgroup, None, do_asr=False) + result.average_source_relevance = sum(rel)/len(rel) + elif rgroup: + result = rgroup[0] + self.results.append(result) + + if empty: + self.results.extend(empty) + self.results.sort(key=attrgetter('average_source_relevance')) def merge_isbn_results(self):