From 3334e3a1ed579bc28bd2659265e32e3bf8d5e498 Mon Sep 17 00:00:00 2001 From: Zoe Roux Date: Wed, 15 Apr 2026 23:26:21 +0200 Subject: [PATCH] Only extract valid chapterprints --- api/src/controllers/video-metadata.ts | 2 +- api/src/websockets.ts | 11 ++++- transcoder/src/chapters.go | 63 +++++++++++++------------- transcoder/src/fingerprints_compare.go | 21 ++++++--- transcoder/src/info.go | 6 ++- 5 files changed, 60 insertions(+), 43 deletions(-) diff --git a/api/src/controllers/video-metadata.ts b/api/src/controllers/video-metadata.ts index cd16d007..8add27cf 100644 --- a/api/src/controllers/video-metadata.ts +++ b/api/src/controllers/video-metadata.ts @@ -1,5 +1,5 @@ import { getLogger } from "@logtape/logtape"; -import { eq, and } from "drizzle-orm"; +import { and, eq } from "drizzle-orm"; import { Elysia, t } from "elysia"; import slugify from "slugify"; import { auth } from "~/auth"; diff --git a/api/src/websockets.ts b/api/src/websockets.ts index e885cc70..f2f7ac0a 100644 --- a/api/src/websockets.ts +++ b/api/src/websockets.ts @@ -1,3 +1,4 @@ +import { getLogger } from "@logtape/logtape"; import type { TObject, TString } from "@sinclair/typebox"; import { eq } from "drizzle-orm"; import Elysia, { type TSchema, t } from "elysia"; @@ -8,6 +9,8 @@ import { prepareVideo } from "./controllers/video-metadata"; import { getVideos } from "./controllers/videos"; import { videos } from "./db/schema"; +const logger = getLogger(); + const actionMap = { ping: handler({ message(ws) { @@ -60,7 +63,13 @@ const actionMap = { userId: ws.data.jwt.sub, }); const next = vid?.next?.video; - if (next) await prepareVideo(next, ws.data.headers.authorization!); + if (!next) { + logger.info("No next video to prepare for ${slug}", { + slug: vid.path, + }); + return + } + await prepareVideo(next, ws.data.headers.authorization!); } }, }), diff --git a/transcoder/src/chapters.go b/transcoder/src/chapters.go index 6c083373..3f174e09 100644 --- a/transcoder/src/chapters.go +++ b/transcoder/src/chapters.go @@ -117,7 +117,7 @@ func (s *MetadataService) matchByChapterprints( startOffset = max(info.Duration-FpEndDuration, 0) } - match, err := FpFindContain(fp, needle) + match, err := FpFindContain(ctx, fp, needle) if err != nil { slog.WarnContext(ctx, "failed to find chapterprint in fingerprint", "err", err) continue @@ -166,12 +166,6 @@ func (s *MetadataService) matchByOverlap( var candidates []Chapter for _, intro := range intros { - fp, err := ExtractSegment(fingerprint.Start, intro.StartFirst, intro.StartFirst+intro.Duration) - if err != nil { - slog.WarnContext(ctx, "failed to extract intro segment", "err", err) - continue - } - slog.InfoContext(ctx, "Identified intro", "start", intro.StartFirst, "duration", intro.Duration) candidates = append(candidates, Chapter{ Id: info.Id, @@ -179,28 +173,23 @@ func (s *MetadataService) matchByOverlap( EndTime: float32(intro.StartFirst + intro.Duration), Name: "", Type: Intro, - Fingerprint: fp, + Fingerprint: fingerprint.Start, MatchAccuracy: new(int32(intro.Accuracy)), }) } for _, cred := range credits { - fp, err := ExtractSegment(fingerprint.End, cred.StartFirst, cred.StartFirst+cred.Duration) - if err != nil { - slog.WarnContext(ctx, "failed to extract credits segment", "err", err) - continue - } - endOffset := info.Duration - samplesToSec(len(fingerprint.End)) slog.InfoContext(ctx, "Identified credits", "start", endOffset+cred.StartFirst, "duration", cred.Duration, "end_offset", endOffset) candidates = append(candidates, Chapter{ - Id: info.Id, - StartTime: float32(endOffset + cred.StartFirst), - EndTime: float32(endOffset + cred.StartFirst + cred.Duration), - Name: "", - Type: Credits, - Fingerprint: fp, - MatchAccuracy: new(int32(cred.Accuracy)), + Id: info.Id, + StartTime: float32(endOffset + cred.StartFirst), + EndTime: float32(endOffset + cred.StartFirst + cred.Duration), + Name: "", + Type: Credits, + Fingerprint: fingerprint.End, + FingerprintOffset: endOffset, + MatchAccuracy: new(int32(cred.Accuracy)), }) } @@ -228,6 +217,7 @@ func mergeChapters(info *MediaInfo, candidates []Chapter) []Chapter { chapters[i].Type = cand.Type } chapters[i].Fingerprint = cand.Fingerprint + chapters[i].FingerprintOffset = cand.FingerprintOffset chapters[i].FingerprintId = cand.FingerprintId chapters[i].MatchAccuracy = cand.MatchAccuracy merged = true @@ -243,14 +233,15 @@ func mergeChapters(info *MediaInfo, candidates []Chapter) []Chapter { cand.EndTime = float32(info.Duration) } chapters = insertChapter(chapters, Chapter{ - Id: info.Id, - StartTime: cand.StartTime, - EndTime: cand.EndTime, - Name: "", - Type: cand.Type, - Fingerprint: cand.Fingerprint, - FingerprintId: cand.FingerprintId, - MatchAccuracy: cand.MatchAccuracy, + Id: info.Id, + StartTime: cand.StartTime, + EndTime: cand.EndTime, + Name: "", + Type: cand.Type, + Fingerprint: cand.Fingerprint, + FingerprintOffset: cand.FingerprintOffset, + FingerprintId: cand.FingerprintId, + MatchAccuracy: cand.MatchAccuracy, }, info.Duration) } } @@ -329,16 +320,24 @@ func (s *MetadataService) saveChapters(ctx context.Context, infoId int32, chapte } defer tx.Rollback(ctx) - // Delete existing chapters _, err = tx.Exec(ctx, `delete from gocoder.chapters where id = $1`, infoId) if err != nil { return fmt.Errorf("failed to delete existing chapters: %w", err) } - // Insert new chapters for _, c := range chapters { if c.FingerprintId == nil && c.Fingerprint != nil { - fpId, err := s.StoreChapterprint(ctx, c.Fingerprint) + fp, err := ExtractSegment( + c.Fingerprint, + float64(c.StartTime)-c.FingerprintOffset, + float64(c.EndTime)-c.FingerprintOffset, + ) + if err != nil { + slog.WarnContext(ctx, "failed to extract chapter segment", "err", err) + continue + } + + fpId, err := s.StoreChapterprint(ctx, fp) if err != nil { slog.WarnContext(ctx, "failed to store intro chapterprint", "err", err) } else { diff --git a/transcoder/src/fingerprints_compare.go b/transcoder/src/fingerprints_compare.go index 25ecfe0a..642fd47a 100644 --- a/transcoder/src/fingerprints_compare.go +++ b/transcoder/src/fingerprints_compare.go @@ -11,7 +11,6 @@ import ( const ( MinOverlapDuration = 15.0 - MinSilenceDuration = 2.0 // Correlation threshold (0.0-1.0) above which a match is considered valid. // Uses the AcoustID-style formula: 1.0 - 2.0 * biterror / (32 * length), @@ -83,7 +82,7 @@ func matchStrip(v uint32) uint16 { // (position_in_fp1 - position_in_fp2). // 3. The offset with the most votes wins. // 4. A diversity check rejects matches caused by repetitive/silent audio. -func findBestOffset(fp1, fp2 []uint32) *int { +func findBestOffset(ctx context.Context, fp1, fp2 []uint32) *int { offsets1 := make(map[uint16]int) offsets2 := make(map[uint16]int) @@ -128,9 +127,18 @@ func findBestOffset(fp1, fp2 []uint32) *int { // to the number of unique values. This filters out repetitive audio // (silence, static noise) that would produce spurious matches. // (at least 2% of values must match with said offset) - if topCount < max(len(offsets1), len(offsets2))*2/100 { + percent := float64(topCount) / float64(max(len(offsets1), len(offsets2))) + if percent < 2./100 { + slog.WarnContext( + ctx, + "Diversity check failed, ignoring potential offset", + "offset", topOffset, + "percent", percent, + "vote_count", topCount, + ) return nil } + slog.DebugContext(ctx, "Identified offset", "offset", topOffset, "percent", percent) return new(topOffset) } @@ -258,9 +266,8 @@ func refineRunBounds(fp1, fp2 []uint32, start, end int) (int, int) { // 4. Find contiguous runs of high-correlation blocks that are at least // MinOverlapDuration long. func FpFindOverlap(ctx context.Context, fp1 []uint32, fp2 []uint32) ([]Overlap, error) { - offset := findBestOffset(fp1, fp2) + offset := findBestOffset(ctx, fp1, fp2) if offset == nil { - slog.InfoContext(ctx, "no good offset found") return nil, nil } @@ -273,8 +280,8 @@ func FpFindOverlap(ctx context.Context, fp1 []uint32, fp2 []uint32) ([]Overlap, return runs, nil } -func FpFindContain(haystack []uint32, needle []uint32) (*Match, error) { - offset := findBestOffset(haystack, needle) +func FpFindContain(ctx context.Context, haystack []uint32, needle []uint32) (*Match, error) { + offset := findBestOffset(ctx, haystack, needle) if offset == nil || *offset < 0 || *offset+len(needle) < len(haystack) { return nil, nil } diff --git a/transcoder/src/info.go b/transcoder/src/info.go index 5a00f9c5..cc2e445a 100644 --- a/transcoder/src/info.go +++ b/transcoder/src/info.go @@ -155,9 +155,11 @@ type Chapter struct { /// The type value is used to mark special chapters (opening/credits...) Type ChapterType `json:"type" db:"type"` /// Reference to the chapterprint used for fingerprint matching. - FingerprintId *int32 `json:"-" db:"fingerprint_id"` + FingerprintId *int32 `json:"-" db:"fingerprint_id"` /// Only used internally, never fetched from db. - Fingerprint []uint32 `json:"-" db:"-"` + Fingerprint []uint32 `json:"-" db:"-"` + /// Only used internally, never fetched from db + FingerprintOffset float64 `json:"-" db:"-"` /// Accuracy of the fingerprint match (0-100). MatchAccuracy *int32 `json:"matchAccuracy,omitempty" db:"match_accuracy"` }