diff --git a/transcoder/src/hwaccel.go b/transcoder/src/hwaccel.go index 561176b7..f0fe30eb 100644 --- a/transcoder/src/hwaccel.go +++ b/transcoder/src/hwaccel.go @@ -12,7 +12,7 @@ func DetectHardwareAccel() HwAccelT { } log.Printf("Using hardware acceleration: %s", name) - // superfast or ultrafast would produce a file extremly big so we prever to ignore them. Fast is available on all hw accel modes + // superfast or ultrafast would produce a file extremely big so we prefer to ignore them. Fast is available on all hwaccel modes // so we use that by default. // vaapi does not have any presets so this flag is unused for vaapi hwaccel. preset := GetEnvOr("GOCODER_PRESET", "fast") @@ -25,9 +25,9 @@ func DetectHardwareAccel() HwAccelT { EncodeFlags: []string{ "-c:v", "libx264", "-preset", preset, - // sc_threshold is a scene detection mechanisum used to create a keyframe when the scene changes + // sc_threshold is a scene detection mechanism used to create a keyframe when the scene changes // this is on by default and inserts keyframes where we don't want to (it also breaks force_key_frames) - // we disable it to prevents whole scenes from behing removed due to the -f segment failing to find the corresonding keyframe + // we disable it to prevents whole scenes from being removed due to the -f segment failing to find the corresponding keyframe "-sc_threshold", "0", // force 8bits output (by default it keeps the same as the source but 10bits is not playable on some devices) "-pix_fmt", "yuv420p", @@ -53,16 +53,23 @@ func DetectHardwareAccel() HwAccelT { // see https://trac.ffmpeg.org/wiki/Hardware/VAAPI#Encoding for more info // we also need to force the format to be nv12 since 10bits is not supported via hwaccel. // this filter is equivalent to this pseudocode: - // if (vaapi) { - // hwupload, passthrough, keep vaapi as is - // convert whatever to nv12 on GPU + // + // if (format != nv12 && format != vaapi) { + // convert_to_nv12() (in cpu space, the format is invalid only if hw-decode failed) + // hwupload (send to gpu space) + // scale_to(width, height, output_format="nv12") (output format doesn't change since we are already in nv12) // } else { - // convert whatever to nv12 on CPU - // hwupload to vaapi(nv12) - // convert whatever to nv12 on GPU // scale_vaapi doesn't support passthrough option, so it has to make a copy + // _format_has_nothing_to_do (do nothing since the format is already valid) + // _hwupload (the format is already a gpu space format so this is also a no-op) + // scale_to(width, height, output_format="nv12") (if we are already in nv12 it's only a scale, otherwise it converts to it) + // note: nv12 is 8bit format while vaapi can be 10bits or anything else // } // See https://www.reddit.com/r/ffmpeg/comments/1bqn60w/hardware_accelerated_decoding_without_hwdownload/ for more info ScaleFilter: "format=nv12|vaapi,hwupload,scale_vaapi=%d:%d:format=nv12", + // Exactly the same as above but without the scaling. + // Yes scale_vaapi is used to convert the format (the `format` filter will pick a format for + // the next filter so it doesn't work if there is no next filter). + NoResizeFilter: "format=nv12|vaapi,hwupload,scale_vaapi=format=nv12", } case "qsv", "intel": return HwAccelT{ @@ -77,7 +84,8 @@ func DetectHardwareAccel() HwAccelT { "-preset", preset, }, // see note on ScaleFilter of the vaapi HwAccel, this is the same filter but adapted to qsv - ScaleFilter: "format=nv12|qsv,hwupload,scale_qsv=%d:%d:format=nv12", + ScaleFilter: "format=nv12|qsv,hwupload,scale_qsv=%d:%d:format=nv12", + NoResizeFilter: "format=nv12|qsv,hwupload,scale_qsv=format=nv12", } case "nvidia": return HwAccelT{ @@ -91,11 +99,12 @@ func DetectHardwareAccel() HwAccelT { EncodeFlags: []string{ "-c:v", "h264_nvenc", "-preset", preset, - // the exivalent of -sc_threshold on nvidia. + // the equivalent of -sc_threshold on nvidia. "-no-scenecut", "1", }, // see note on ScaleFilter of the vaapi HwAccel, this is the same filter but adapted to cuda - ScaleFilter: "format=nv12|cuda,hwupload,scale_cuda=%d:%d:format=nv12", + ScaleFilter: "format=nv12|cuda,hwupload,scale_cuda=%d:%d:format=nv12", + NoResizeFilter: "format=nv12|cuda,hwupload,scale_cuda=format=nv12", } default: log.Printf("No hardware accelerator named: %s", name) diff --git a/transcoder/src/settings.go b/transcoder/src/settings.go index 29c53265..474e7eb0 100644 --- a/transcoder/src/settings.go +++ b/transcoder/src/settings.go @@ -22,10 +22,11 @@ type SettingsT struct { } type HwAccelT struct { - Name string - DecodeFlags []string - EncodeFlags []string - ScaleFilter string + Name string + DecodeFlags []string + EncodeFlags []string + NoResizeFilter string + ScaleFilter string } var Settings = SettingsT{ diff --git a/transcoder/src/videostream.go b/transcoder/src/videostream.go index 42832e93..55c4d13f 100644 --- a/transcoder/src/videostream.go +++ b/transcoder/src/videostream.go @@ -81,6 +81,8 @@ func (vs *VideoStream) getTranscodeArgs(segments string) []string { "-vf", fmt.Sprintf(Settings.HwAccel.ScaleFilter, width, vs.quality.Height()), ) } else { + args = append(args, "-vf", Settings.HwAccel.NoResizeFilter) + // NoResize doesn't have bitrate info, fallback to a know quality higher or equal. for _, q := range Qualities { if q.Height() >= vs.video.Height { @@ -90,7 +92,7 @@ func (vs *VideoStream) getTranscodeArgs(segments string) []string { } } args = append(args, - // Even less sure but bufsize are 5x the avergae bitrate since the average bitrate is only + // Even less sure but bufsize are 5x the average bitrate since the average bitrate is only // useful for hls segments. "-bufsize", fmt.Sprint(quality.MaxBitrate()*5), "-b:v", fmt.Sprint(quality.AverageBitrate()), @@ -100,7 +102,7 @@ func (vs *VideoStream) getTranscodeArgs(segments string) []string { // without this option, some hardware encoders uses others i-frames and the -f segment can't cut at them. "-forced-idr", "1", "-force_key_frames", segments, - // make ffmpeg globaly less buggy + // make ffmpeg globally less buggy "-strict", "-2", ) return args