Add merging of identify results if they have a common identifier. Turned off for now

This commit is contained in:
Kovid Goyal 2011-04-17 14:20:34 -06:00
parent a244767f49
commit 73ef5b9991

View File

@ -114,8 +114,12 @@ class ISBNMerge(object):
return self.results
def merge_metadata_results(self):
' Merge results with identical title and authors '
def merge_metadata_results(self, merge_on_identifiers=False):
'''
Merge results with identical title and authors or an identical
identifier
'''
# First title/author
groups = {}
for result in self.results:
title = lower(result.title if result.title else '')
@ -135,6 +139,44 @@ class ISBNMerge(object):
result = rgroup[0]
self.results.append(result)
if merge_on_identifiers:
# Now identifiers
groups, empty = {}, []
for result in self.results:
key = set()
for typ, val in result.identifiers.iteritems():
if typ and val:
key.add((typ, val))
if key:
key = frozenset(key)
match = None
for candidate in list(groups):
if candidate.intersection(key):
# We have at least one identifier in common
match = candidate.union(key)
results = groups.pop(candidate)
results.append(result)
groups[match] = results
break
if match is None:
groups[key] = [result]
else:
empty.append(result)
if len(groups) != len(self.results):
self.results = []
for rgroup in groups.itervalues():
rel = [r.average_source_relevance for r in rgroup]
if len(rgroup) > 1:
result = self.merge(rgroup, None, do_asr=False)
result.average_source_relevance = sum(rel)/len(rel)
elif rgroup:
result = rgroup[0]
self.results.append(result)
if empty:
self.results.extend(empty)
self.results.sort(key=attrgetter('average_source_relevance'))
def merge_isbn_results(self):