diff --git a/transcoder/src/fingerprints_compare.go b/transcoder/src/fingerprints_compare.go index 5f61a217..2f6cb95d 100644 --- a/transcoder/src/fingerprints_compare.go +++ b/transcoder/src/fingerprints_compare.go @@ -2,7 +2,6 @@ package src import ( "context" - "fmt" "log/slog" "math/bits" ) @@ -30,16 +29,6 @@ const ( // Number of samples per correlation block (~2 seconds at 7.8125 samples/s). // Segments are evaluated in blocks of this size to find contiguous matching runs. CorrBlockSize = 16 - - // Number of samples used in boundary refinement windows (~0.5 seconds). - // A smaller window gives sub-block precision while keeping noise low. - RefineWindowSize = 4 - - // Boundary refinement should be stricter than coarse block detection, - // otherwise short transitional content (speech/title cards) can be pulled - // into a matching run. Require both a higher score and sustained windows. - RefineThreshold = 0.3 - RefineConsecutiveWindows = 3 ) type Overlap struct { @@ -134,7 +123,7 @@ func findBestOffset(ctx context.Context, fp1, fp2 []uint32) *int { // to the number of unique values. This filters out repetitive audio // (silence, static noise) that would produce spurious matches. // (at least 2% of values must match with said offset) - percent := float64(topCount) / float64(max(len(offsets1), len(offsets2))) + percent := float64(topCount) / float64(min(len(offsets1), len(offsets2))) if percent < 2./100 { slog.WarnContext( ctx, @@ -187,7 +176,6 @@ func findMatchingRuns(fp1, fp2 []uint32, start1, start2 int) []Overlap { hi := lo + CorrBlockSize blockCorr[b] = segmentCorrelation(fp1[lo:hi], fp2[lo:hi]) } - fmt.Printf("bloc corr %v\n", blockCorr) // Find contiguous runs of blocks above threshold. var overlaps []Overlap @@ -213,10 +201,9 @@ func findMatchingRuns(fp1, fp2 []uint32, start1, start2 int) []Overlap { inRun = false start := runStart * CorrBlockSize // the current `b` doesn't match, don't include it. - // also remove the previous one to be sure we don't skip content. - end := (b - 2) * CorrBlockSize + // also remove the previous ones to be sure we don't skip content. + end := (b - 3) * CorrBlockSize if end-start >= minSamples { - // start, end = refineRunBounds(fp1, fp2, start, end) corr := segmentCorrelation(fp1[start:end], fp2[start:end]) overlaps = append(overlaps, Overlap{ StartFirst: samplesToSec(start1 + start), @@ -230,56 +217,6 @@ func findMatchingRuns(fp1, fp2 []uint32, start1, start2 int) []Overlap { return overlaps } -// refineRunBounds improves coarse block-aligned [start, end) boundaries -// using short sample windows around each edge. It only scans ±1 block around -// each edge, so the fast block-based pass remains the dominant cost. -// -// To avoid expanding runs into transitional content, refinement requires -// a stricter per-window threshold and a short streak of consecutive windows. -func refineRunBounds(fp1, fp2 []uint32, start, end int) (int, int) { - length := min(len(fp1), len(fp2)) - window := RefineWindowSize - - startSearchLo := max(0, start-CorrBlockSize) - startSearchHi := min(start+CorrBlockSize, length-window) - refinedStart := startSearchHi - streak := 0 - for i := startSearchLo; i <= startSearchHi; i++ { - if segmentCorrelation(fp1[i:i+window], fp2[i:i+window]) >= RefineThreshold { - streak++ - if streak >= RefineConsecutiveWindows { - refinedStart = i - streak + 1 - break - } - } else { - streak = 0 - } - } - - endSearchLo := max(0, end-CorrBlockSize-window) - endSearchHi := min(end+CorrBlockSize-window, length-window) - refinedEnd := endSearchLo - streak = 0 - for i := endSearchHi; i >= endSearchLo; i-- { - if segmentCorrelation(fp1[i:i+window], fp2[i:i+window]) >= RefineThreshold { - streak++ - if streak >= RefineConsecutiveWindows { - refinedEnd = i + window + streak - 1 - break - } - } else { - streak = 0 - } - } - - fmt.Printf("before (%v, %v), after (%v, %v)\n", start, end, refinedStart, refinedEnd) - if refinedEnd <= refinedStart { - return start, end - } - - return refinedStart, refinedEnd -} - // FpFindOverlap finds all similar segments (like shared intro music) between // two chromaprint fingerprints. //