From 9eecbae05de306880f8b298696b2545b1cddc55d Mon Sep 17 00:00:00 2001 From: Yiran Duan Date: Mon, 13 May 2024 14:53:18 +0200 Subject: [PATCH] Enh/runner audio normalization (#1329) * Add commands for audio normalization; pass 1 realised in a new action * Pass 1 realised in a new action * Make sure transcoding with audio normalization does the same to video. * Integrate audio normalization into transcode. Apply it only when exactly 1 video exists for the stream. * Pass 1: properly return error message * Pass 1: 1. synchronise reading from stdin before extracting info; 2. make sure to close everything in case of error * Audio normalization: log when applied * Remove the unused action AudioNormalization --------- Co-authored-by: Dawin <73998830+DawinYurtseven@users.noreply.github.com> --- runner/actions/transcode.go | 101 ++++++++++++++++++++++++++++++++++++ runner/cmd.yaml | 12 +++++ runner/config/cmd.go | 9 +++- 3 files changed, 121 insertions(+), 1 deletion(-) diff --git a/runner/actions/transcode.go b/runner/actions/transcode.go index 2026dc7ac..f12a81e2a 100644 --- a/runner/actions/transcode.go +++ b/runner/actions/transcode.go @@ -1,13 +1,18 @@ package actions import ( + "bufio" + "bytes" "context" + "encoding/json" "errors" "fmt" "log/slog" "os" "os/exec" + "regexp" "strings" + "sync" "time" ) @@ -67,7 +72,27 @@ func (a *ActionProvider) TranscodeAction() *Action { i++ } + // Pass 1 of audio normalization. + // Audio normalization is only applied, when only one video of the stream exists. Reasons for this: + // 1. Multiple videos existing for one stream is typically caused by a shutdown of a runner. This does not happen frequently. + // 2. It's much more inefficient to apply the audio normalization operation for more than one file: + // 2.1 Instead of 2 passes, 3 passes are needed: concat - get parameter - execute; + // 2.2 Video files need to be stored 3 times instead of twice (including the raw .ts files), at least temporarily + // (Extracting and only operating/storing the audio is unacceptable due to the problem mentioned in one comment of this answer: https://stackoverflow.com/a/27413824) + var info *InfoForAudioNormalization = nil + if len(fileName) == 1 { + info, err = getInfoForAudioNormalization(ctx, a.Cmd.AudioNormalize1, fileName[0]) + } + cmd := fmt.Sprintf(a.Cmd.Transcoding, filenames, outputName) + // Pass 2 of audio normalization + // Applied only when pass 1 is successfully executed + // It does the same to the video, and additionally normalizes the audio with the given parameters from pass 1 + if info != nil { + cmd = fmt.Sprintf(a.Cmd.AudioNormalize2, filenames, + info.InputI, info.InputTp, info.InputLra, info.InputThresh, info.TargetOffset, outputName) + log.Info("Transcoding with audio normalization", "files", files) + } c := exec.CommandContext(ctx, "ffmpeg", strings.Split(cmd, " ")...) c.Stderr = os.Stderr err = c.Start() @@ -79,3 +104,79 @@ func (a *ActionProvider) TranscodeAction() *Action { }, } } + +type InfoForAudioNormalization struct { + InputI string `json:"input_i"` + InputTp string `json:"input_tp"` + InputLra string `json:"input_lra"` + InputThresh string `json:"input_thresh"` + OutputI string `json:"output_i"` + OutputTp string `json:"output_tp"` + OutputLra string `json:"output_lra"` + OutputThresh string `json:"output_thresh"` + NormalizationType string `json:"normalization_type"` + TargetOffset string `json:"target_offset"` +} + +func getInfoForAudioNormalization(ctx context.Context, cmdFmt string, filename string) (*InfoForAudioNormalization, error) { + // Errors during pass 1 won't propagate to outside. + // But errors will prevent pass 2 from executing, ultimately resulting in the video not undergoing audio normalization. + cmd := fmt.Sprintf(cmdFmt, filename) + c := exec.CommandContext(ctx, "ffmpeg", strings.Split(cmd, " ")...) + c.Stderr = os.Stderr + stdoutPipe, err := c.StdoutPipe() + if err != nil { + return nil, err + } + defer stdoutPipe.Close() + + err = c.Start() + if err != nil { + return nil, err + } + + var output bytes.Buffer + scanner := bufio.NewScanner(stdoutPipe) + + var wg sync.WaitGroup + wg.Add(1) + + go func() { // Reads the output from FFmpeg + defer wg.Done() + for scanner.Scan() { + line := scanner.Text() + output.WriteString(line + "\n") + } + }() + + err = c.Wait() + if err != nil { + return nil, err + } + + wg.Wait() + + info := &InfoForAudioNormalization{} + err = extractAndParseJSON(output.String(), info) + if err != nil { + return nil, err + } + return info, err +} + +func extractAndParseJSON(output string, info *InfoForAudioNormalization) error { + re := regexp.MustCompile(`(?s)\{.*}`) // Finds JSON data from the output + matches := re.FindStringSubmatch(output) + + if len(matches) == 0 { + return fmt.Errorf("no JSON data found") + } + + jsonData := matches[0] + err := json.Unmarshal([]byte(jsonData), info) + if err != nil { + return err + } + + return nil +} diff --git a/runner/cmd.yaml b/runner/cmd.yaml index 24775b2e5..d4e171cdc 100644 --- a/runner/cmd.yaml +++ b/runner/cmd.yaml @@ -1,5 +1,17 @@ stream: '-y -hide_banner -nostats %v -t %.0f -i %v -c:v copy -c:a copy -f mpegts %v -c:v libx264 -preset veryfast -tune zerolatency -maxrate 2500k -bufsize 3000k -g 60 -r 30 -x264-params keyint=60:scenecut=0 -c:a aac -ar 44100 -b:a 128k -f hls -hls_time 2 -hls_list_size 3600 -hls_playlist_type event -hls_flags append_list -hls_segment_filename %v/%%05d.ts %v' +SeparateAudioFast: "-i %v -vn -c:a copy %v" +SeparateAudio: "-i %v -vn %v" + +# Two-pass audio normalization with FFmpeg loudnorm +# Loudnorm configuration "I=-23:TP=-2:LRA=7" -- Chosen according to EBU R128 +## First pass: Audio analyse, get parameters needed in second pass +AudioNormalize1: "-i %v -nostats -y -af loudnorm=I=-23:TP=-2:LRA=7:print_format=json -f null -" +## Second pass: Apply normalization to the audio +AudioNormalize2: "-i %v -af loudnorm=I=-23:TP=-2:LRA=7:measured_i=%v:measured_tp=%v:measured_lra=%v:measured_thresh=%v:offset=%v:linear=true:print_format=summary -c:a aac -c:v libx264 -crf 0 -probesize 100M -analyzeduration 250M %v" + + Transcoding: '-i %v -c:v libx264 -c:a copy -crf 0 -probesize 100M -analyzeduration 250M %v' SilenceDetect: '-nostats -i %v -af silencedetect=n=-15dB:d=30 -f null -' + diff --git a/runner/config/cmd.go b/runner/config/cmd.go index 7bba50b73..6526a37f5 100644 --- a/runner/config/cmd.go +++ b/runner/config/cmd.go @@ -9,9 +9,16 @@ import ( type CmdList struct { //this is for adding extra parameters + + Stream string `Default:"-y -hide_banner -nostats %x -t &.0f -i %s -c:v copy -c:a copy -f mpegts %x -c:v libx264 -preset veryfast -tune zerolatency -maxrate 2500k -bufsize 3000k -g 60 -r 30 -x264-params keyint=60:scenecut=0 -c:a aac -ar 44100 -b:a 128k -f hls -hls_time 2 -hls_list_size 3600 -hls_playlist_type event -hls_flags append_list -hls_segment_filename %x %x"` + SeparateAudioFast string `Default:"-i %v -vn -c:a copy %v"` + SeparateAudio string `Default:"-i %v -vn %v"` + AudioNormalize1 string `Default:"-i %v -nostats -y -af loudnorm=I=-23:TP=-2:LRA=7:print_format=json -f null -"` + AudioNormalize2 string `Default:"-i %v -af loudnorm=I=-23:TP=-2:LRA=7:measured_i=%v:measured_tp=%v:measured_lra=%v:measured_thresh=%v:offset=%v:linear=true:print_format=summary -c:a aac -c:v libx264 -crf 0 -probesize 100M -analyzeduration 250M %v"` + Transcoding string `Default:"-i %v -c:v libx264 %v"` Stream string `Default:"-y -hide_banner -nostats %x -t &.0f -i %s -c:v copy -c:a copy -f mpegts %x -c:v libx264 -preset veryfast -tune zerolatency -maxrate 2500k -bufsize 3000k -g 60 -r 30 -x264-params keyint=60:scenecut=0 -c:a aac -ar 44100 -b:a 128k -f hls -hls_time 2 -hls_list_size 3600 -hls_playlist_type event -hls_flags append_list -hls_segment_filename %x %x"` - Transcoding string `Default:"-i %v -c:v libx264 %v"` SilenceDetect string `Default:"-nostats -i %v -af silencedetect=n=-15dB:d=30 -f null -"` + } func NewCmd(log *slog.Logger) *CmdList {