Implement FpFindOverlap function

This commit is contained in:
Zoe Roux 2026-04-14 17:51:31 +02:00
parent 9ac833b11b
commit 3874ff7238
No known key found for this signature in database

View File

@ -4,13 +4,30 @@ import (
"math/bits"
)
/// See how acoustid handles comparision:
//// https://bitbucket.org/acoustid/acoustid-server/src/cb303c2a3588ff055b7669cf6f1711a224ab9183/postgresql/acoustid_compare.c?at=master
const (
MinOverlapDuration = 15.0
MinSilenceDuration = 2.0
// Correlation threshold (0.0-1.0) above which a match is considered valid.
// Each fingerprint sub-band has 32 bits; we consider a match if fewer than
// this fraction of bits differ on average.
MatchThreshold = 0.35
// Uses the AcoustID-style formula: 1.0 - 2.0 * biterror / (32 * length),
// where random noise scores ~0.0 and identical audio scores 1.0.
MatchThreshold = 0.1
// Number of most-significant bits used as a hash key for offset voting.
// Matches AcoustID's MATCH_BITS. The top bits of a chromaprint value are
// the most discriminative (classifiers are ordered by importance).
MatchBits = 14
// Chromaprint encodes silence as this specific value.
// We skip it during offset voting to avoid false matches.
SilenceValue = 627964279
// Number of samples per correlation block (~2 seconds at 7.8125 samples/s).
// Segments are evaluated in blocks of this size to find contiguous matching runs.
CorrBlockSize = 16
)
type Overlap struct {
@ -30,20 +47,188 @@ func hammingDistance(a, b uint32) int {
return bits.OnesCount32(a ^ b)
}
// segmentCorrelation computes a similarity score between two aligned
// fingerprint slices using the AcoustID formula.
// Returns a value in [0.0, 1.0] where 0.0 means completely different
// (or random noise) and 1.0 means identical.
func segmentCorrelation(fp1 []uint32, fp2 []uint32) float64 {
length := min(len(fp1), len(fp2))
diffBits := 0
for i := range length {
diffBits += hammingDistance(fp1[i], fp2[i])
if length == 0 {
return 0
}
return 1.0 - float64(diffBits)/float64(length*32)
biterror := 0
for i := range length {
biterror += hammingDistance(fp1[i], fp2[i])
}
score := 1.0 - 2.0*float64(biterror)/float64(32*length)
return max(0, score)
}
func matchStrip(v uint32) uint16 {
return uint16(v >> (32 - MatchBits))
}
// findBestOffset discovers the time offset that best aligns two fingerprints.
//
// It follows AcoustID's match_fingerprints2 approach:
// 1. Hash each fingerprint value by its top 14 bits into a fixed-size table,
// storing the last seen position for each hash bucket.
// 2. For each hash bucket present in both tables, vote for the offset
// (position_in_fp1 - position_in_fp2).
// 3. The offset with the most votes wins.
// 4. A diversity check rejects matches caused by repetitive/silent audio.
func findBestOffset(fp1, fp2 []uint32) *int {
offsets1 := make(map[uint16]int)
offsets2 := make(map[uint16]int)
for i, v := range fp1 {
if v == SilenceValue {
continue
}
key := matchStrip(v)
offsets1[key] = i + 1
}
for i, v := range fp2 {
if v == SilenceValue {
continue
}
key := matchStrip(v)
offsets2[key] = i + 1
}
if len(offsets1) == 0 || len(offsets2) == 0 {
return nil
}
votes := make(map[int]int)
topCount := 0
topOffset := 0
for key, a := range offsets1 {
b, ok := offsets2[key]
if !ok {
continue
}
offset := a - b
votes[offset]++
if votes[offset] > topCount {
topCount = votes[offset]
topOffset = offset
}
}
// Diversity check: reject if the top offset got very few votes relative
// to the number of unique values. This filters out repetitive audio
// (silence, static noise) that would produce spurious matches.
// (at least 2% of values must match with said offset)
if topCount < max(len(offsets1), len(offsets2))*2/100 {
return nil
}
return new(topOffset)
}
// alignFingerprints returns the sub-slices of fp1 and fp2 that overlap
// when fp1 is shifted by `offset` positions relative to fp2.
// offset = position_in_fp1 - position_in_fp2.
// Also returns the starting indices in fp1 and fp2.
func alignFingerprints(fp1, fp2 []uint32, offset int) ([]uint32, []uint32, int, int) {
start1 := 0
start2 := 0
if offset > 0 {
start1 = offset
} else {
start2 = -offset
}
length := min(len(fp1)-start1, len(fp2)-start2)
if length <= 0 {
return nil, nil, 0, 0
}
return fp1[start1 : start1+length], fp2[start2 : start2+length], start1, start2
}
// findMatchingRuns divides the aligned fingerprints into fixed-size blocks,
// computes the correlation of each block, and finds contiguous runs of
// blocks whose correlation exceeds MatchThreshold. Each run that is at least
// MinOverlapDuration long is returned as an Overlap.
func findMatchingRuns(fp1, fp2 []uint32, start1, start2 int) []Overlap {
length := min(len(fp1), len(fp2))
minSamples := secToSamples(MinOverlapDuration)
if length < minSamples {
return nil
}
nblocks := length / CorrBlockSize
blockCorr := make([]float64, nblocks)
for b := range nblocks {
lo := b * CorrBlockSize
hi := lo + CorrBlockSize
blockCorr[b] = segmentCorrelation(fp1[lo:hi], fp2[lo:hi])
}
// Find contiguous runs of blocks above threshold.
var overlaps []Overlap
inRun := false
runStart := 0
// Handle a run that extends to the last block.
nblocks++
blockCorr = append(blockCorr, MatchThreshold)
for b := range nblocks {
if blockCorr[b] >= MatchThreshold {
inRun = true
runStart = min(runStart, b)
continue
}
if !inRun {
continue
}
inRun = false
start := runStart * CorrBlockSize
end := b * CorrBlockSize
if end-start >= minSamples {
corr := segmentCorrelation(fp1[start:end], fp2[start:end])
overlaps = append(overlaps, Overlap{
StartFirst: samplesToSec(start1 + start),
StartSecond: samplesToSec(start2 + start),
Duration: samplesToSec(end - start),
Accuracy: max(0, min(int(corr*100), 100)),
})
}
}
return overlaps
}
// FpFindOverlap finds all similar segments (like shared intro music) between
// two chromaprint fingerprints.
//
// 1. Hash each fingerprint value by its top 14 bits to find the best
// time-offset alignment between the two fingerprints (like
// AcoustID's match_fingerprints2)
// 2. Align the fingerprints at that offset.
// 3. Divide the aligned region into ~2-second blocks and compute correlation
// per block using the AcoustID scoring formula.
// 4. Find contiguous runs of high-correlation blocks that are at least
// MinOverlapDuration long.
func FpFindOverlap(fp1 []uint32, fp2 []uint32) ([]Overlap, error) {
return nil, nil
offset := findBestOffset(fp1, fp2)
if offset == nil {
return nil, nil
}
a1, a2, s1, s2 := alignFingerprints(fp1, fp2, *offset)
if len(a1) == 0 {
return nil, nil
}
runs := findMatchingRuns(a1, a2, s1, s2)
return runs, nil
}
func FpFindContain(fp1 []uint32, fp2 []uint32) (*Match, error) {
func FpFindContain(haystack []uint32, needle []uint32) (*Match, error) {
return nil, nil
}