Enh/runner audio normalization (#1329)

* Add commands for audio normalization; pass 1 realised in a new action * Pass 1 realised in a new action * Make sure transcoding with audio normalization does the same to video. * Integrate audio normalization into transcode. Apply it only when exactly 1 video exists for the stream. * Pass 1: properly return error message * Pass 1: 1. synchronise reading from stdin before extracting info; 2. make sure to close everything in case of error * Audio normalization: log when applied * Remove the unused action AudioNormalization --------- Co-authored-by: Dawin <[email protected]>
TUM-Dev · May 13, 2024 · 9eecbae · 9eecbae
1 parent 80e0bb1
commit 9eecbae
Show file tree

Hide file tree

Showing 3 changed files with 121 additions and 1 deletion.
diff --git a/runner/actions/transcode.go b/runner/actions/transcode.go
@@ -1,13 +1,18 @@
 package actions
 
 import (
+	"bufio"
+	"bytes"
 	"context"
+	"encoding/json"
 	"errors"
 	"fmt"
 	"log/slog"
 	"os"
 	"os/exec"
+	"regexp"
 	"strings"
+	"sync"
 	"time"
 )
 
@@ -67,7 +72,27 @@ func (a *ActionProvider) TranscodeAction() *Action {
 				i++
 			}
 
+			// Pass 1 of audio normalization.
+			// Audio normalization is only applied, when only one video of the stream exists. Reasons for this:
+			// 1. Multiple videos existing for one stream is typically caused by a shutdown of a runner. This does not happen frequently.
+			// 2. It's much more inefficient to apply the audio normalization operation for more than one file:
+			// 2.1 Instead of 2 passes, 3 passes are needed: concat - get parameter - execute;
+			// 2.2 Video files need to be stored 3 times instead of twice (including the raw .ts files), at least temporarily
+			//		(Extracting and only operating/storing the audio is unacceptable due to the problem mentioned in one comment of this answer: https://stackoverflow.com/a/27413824)
+			var info *InfoForAudioNormalization = nil
+			if len(fileName) == 1 {
+				info, err = getInfoForAudioNormalization(ctx, a.Cmd.AudioNormalize1, fileName[0])
+			}
+
 			cmd := fmt.Sprintf(a.Cmd.Transcoding, filenames, outputName)
+			// Pass 2 of audio normalization
+			// Applied only when pass 1 is successfully executed
+			// It does the same to the video, and additionally normalizes the audio with the given parameters from pass 1
+			if info != nil {
+				cmd = fmt.Sprintf(a.Cmd.AudioNormalize2, filenames,
+					info.InputI, info.InputTp, info.InputLra, info.InputThresh, info.TargetOffset, outputName)
+				log.Info("Transcoding with audio normalization", "files", files)
+			}
 			c := exec.CommandContext(ctx, "ffmpeg", strings.Split(cmd, " ")...)
 			c.Stderr = os.Stderr
 			err = c.Start()
@@ -79,3 +104,79 @@ func (a *ActionProvider) TranscodeAction() *Action {
 		},
 	}
 }
+
+type InfoForAudioNormalization struct {
+	InputI            string `json:"input_i"`
+	InputTp           string `json:"input_tp"`
+	InputLra          string `json:"input_lra"`
+	InputThresh       string `json:"input_thresh"`
+	OutputI           string `json:"output_i"`
+	OutputTp          string `json:"output_tp"`
+	OutputLra         string `json:"output_lra"`
+	OutputThresh      string `json:"output_thresh"`
+	NormalizationType string `json:"normalization_type"`
+	TargetOffset      string `json:"target_offset"`
+}
+
+func getInfoForAudioNormalization(ctx context.Context, cmdFmt string, filename string) (*InfoForAudioNormalization, error) {
+	// Errors during pass 1 won't propagate to outside.
+	// But errors will prevent pass 2 from executing, ultimately resulting in the video not undergoing audio normalization.
+	cmd := fmt.Sprintf(cmdFmt, filename)
+	c := exec.CommandContext(ctx, "ffmpeg", strings.Split(cmd, " ")...)
+	c.Stderr = os.Stderr
+	stdoutPipe, err := c.StdoutPipe()
+	if err != nil {
+		return nil, err
+	}
+	defer stdoutPipe.Close()
+
+	err = c.Start()
+	if err != nil {
+		return nil, err
+	}
+
+	var output bytes.Buffer
+	scanner := bufio.NewScanner(stdoutPipe)
+
+	var wg sync.WaitGroup
+	wg.Add(1)
+
+	go func() { // Reads the output from FFmpeg
+		defer wg.Done()
+		for scanner.Scan() {
+			line := scanner.Text()
+			output.WriteString(line + "\n")
+		}
+	}()
+
+	err = c.Wait()
+	if err != nil {
+		return nil, err
+	}
+
+	wg.Wait()
+
+	info := &InfoForAudioNormalization{}
+	err = extractAndParseJSON(output.String(), info)
+	if err != nil {
+		return nil, err
+	}
+	return info, err
+}
+
+func extractAndParseJSON(output string, info *InfoForAudioNormalization) error {
+	re := regexp.MustCompile(`(?s)\{.*}`) // Finds JSON data from the output
+	matches := re.FindStringSubmatch(output)
+
+	if len(matches) == 0 {
+		return fmt.Errorf("no JSON data found")
+	}
+
+	jsonData := matches[0]
+	err := json.Unmarshal([]byte(jsonData), info)
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
diff --git a/runner/cmd.yaml b/runner/cmd.yaml
@@ -1,5 +1,17 @@
 stream: '-y -hide_banner -nostats %v -t %.0f -i %v -c:v copy -c:a copy -f mpegts %v -c:v libx264 -preset veryfast -tune zerolatency -maxrate 2500k -bufsize 3000k -g 60 -r 30 -x264-params keyint=60:scenecut=0 -c:a aac -ar 44100 -b:a 128k -f hls -hls_time 2 -hls_list_size 3600 -hls_playlist_type event -hls_flags append_list -hls_segment_filename %v/%%05d.ts %v'
 
+SeparateAudioFast: "-i %v -vn -c:a copy %v"
+SeparateAudio: "-i %v -vn %v"
+
+# Two-pass audio normalization with FFmpeg loudnorm
+# Loudnorm configuration "I=-23:TP=-2:LRA=7" -- Chosen according to EBU R128
+## First pass: Audio analyse, get parameters needed in second pass
+AudioNormalize1: "-i %v -nostats -y -af loudnorm=I=-23:TP=-2:LRA=7:print_format=json -f null -"
+## Second pass: Apply normalization to the audio
+AudioNormalize2: "-i %v -af loudnorm=I=-23:TP=-2:LRA=7:measured_i=%v:measured_tp=%v:measured_lra=%v:measured_thresh=%v:offset=%v:linear=true:print_format=summary -c:a aac -c:v libx264 -crf 0 -probesize 100M -analyzeduration 250M %v"
+
+
 Transcoding: '-i %v -c:v libx264 -c:a copy -crf 0 -probesize 100M -analyzeduration 250M %v'
 
 SilenceDetect: '-nostats -i %v -af silencedetect=n=-15dB:d=30 -f null -'
+
diff --git a/runner/config/cmd.go b/runner/config/cmd.go
@@ -9,9 +9,16 @@ import (
 
 type CmdList struct {
 	//this is for adding extra parameters
+
+	Stream            string `Default:"-y -hide_banner -nostats %x -t &.0f -i %s -c:v copy -c:a copy -f mpegts %x -c:v libx264 -preset veryfast -tune zerolatency -maxrate 2500k -bufsize 3000k -g 60 -r 30 -x264-params keyint=60:scenecut=0 -c:a aac -ar 44100 -b:a 128k -f hls -hls_time 2 -hls_list_size 3600 -hls_playlist_type event -hls_flags append_list -hls_segment_filename %x %x"`
+	SeparateAudioFast string `Default:"-i %v -vn -c:a copy %v"`
+	SeparateAudio     string `Default:"-i %v -vn %v"`
+	AudioNormalize1   string `Default:"-i %v -nostats -y -af loudnorm=I=-23:TP=-2:LRA=7:print_format=json -f null -"`
+	AudioNormalize2   string `Default:"-i %v -af loudnorm=I=-23:TP=-2:LRA=7:measured_i=%v:measured_tp=%v:measured_lra=%v:measured_thresh=%v:offset=%v:linear=true:print_format=summary -c:a aac -c:v libx264 -crf 0 -probesize 100M -analyzeduration 250M %v"`
+	Transcoding       string `Default:"-i %v -c:v libx264 %v"`
 	Stream        string `Default:"-y -hide_banner -nostats %x -t &.0f -i %s -c:v copy -c:a copy -f mpegts %x -c:v libx264 -preset veryfast -tune zerolatency -maxrate 2500k -bufsize 3000k -g 60 -r 30 -x264-params keyint=60:scenecut=0 -c:a aac -ar 44100 -b:a 128k -f hls -hls_time 2 -hls_list_size 3600 -hls_playlist_type event -hls_flags append_list -hls_segment_filename %x %x"`
-	Transcoding   string `Default:"-i %v -c:v libx264 %v"`
 	SilenceDetect string `Default:"-nostats -i %v -af silencedetect=n=-15dB:d=30 -f null -"`
+
 }
 
 func NewCmd(log *slog.Logger) *CmdList {