Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enh/runner audio normalization #1329

Merged
merged 9 commits into from
May 13, 2024
101 changes: 101 additions & 0 deletions runner/actions/transcode.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
package actions

import (
"bufio"
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"log/slog"
"os"
"os/exec"
"regexp"
"strings"
"sync"
"time"
)

Expand Down Expand Up @@ -67,7 +72,27 @@ func (a *ActionProvider) TranscodeAction() *Action {
i++
}

// Pass 1 of audio normalization.
// Audio normalization is only applied, when only one video of the stream exists. Reasons for this:
// 1. Multiple videos existing for one stream is typically caused by a shutdown of a runner. This does not happen frequently.
// 2. It's much more inefficient to apply the audio normalization operation for more than one file:
// 2.1 Instead of 2 passes, 3 passes are needed: concat - get parameter - execute;
// 2.2 Video files need to be stored 3 times instead of twice (including the raw .ts files), at least temporarily
// (Extracting and only operating/storing the audio is unacceptable due to the problem mentioned in one comment of this answer: https://stackoverflow.com/a/27413824)
var info *InfoForAudioNormalization = nil
if len(fileName) == 1 {
info, err = getInfoForAudioNormalization(ctx, a.Cmd.AudioNormalize1, fileName[0])
}

cmd := fmt.Sprintf(a.Cmd.Transcoding, filenames, outputName)
// Pass 2 of audio normalization
// Applied only when pass 1 is successfully executed
// It does the same to the video, and additionally normalizes the audio with the given parameters from pass 1
if info != nil {
cmd = fmt.Sprintf(a.Cmd.AudioNormalize2, filenames,
info.InputI, info.InputTp, info.InputLra, info.InputThresh, info.TargetOffset, outputName)
log.Info("Transcoding with audio normalization", "files", files)
}
c := exec.CommandContext(ctx, "ffmpeg", strings.Split(cmd, " ")...)
c.Stderr = os.Stderr
err = c.Start()
Expand All @@ -79,3 +104,79 @@ func (a *ActionProvider) TranscodeAction() *Action {
},
}
}

type InfoForAudioNormalization struct {
InputI string `json:"input_i"`
InputTp string `json:"input_tp"`
InputLra string `json:"input_lra"`
InputThresh string `json:"input_thresh"`
OutputI string `json:"output_i"`
OutputTp string `json:"output_tp"`
OutputLra string `json:"output_lra"`
OutputThresh string `json:"output_thresh"`
NormalizationType string `json:"normalization_type"`
TargetOffset string `json:"target_offset"`
}

func getInfoForAudioNormalization(ctx context.Context, cmdFmt string, filename string) (*InfoForAudioNormalization, error) {
// Errors during pass 1 won't propagate to outside.
// But errors will prevent pass 2 from executing, ultimately resulting in the video not undergoing audio normalization.
cmd := fmt.Sprintf(cmdFmt, filename)
c := exec.CommandContext(ctx, "ffmpeg", strings.Split(cmd, " ")...)
c.Stderr = os.Stderr
stdoutPipe, err := c.StdoutPipe()
if err != nil {
return nil, err
}
defer stdoutPipe.Close()

err = c.Start()
if err != nil {
return nil, err
}

var output bytes.Buffer
scanner := bufio.NewScanner(stdoutPipe)

var wg sync.WaitGroup
wg.Add(1)

go func() { // Reads the output from FFmpeg
defer wg.Done()
for scanner.Scan() {
line := scanner.Text()
output.WriteString(line + "\n")
}
}()

err = c.Wait()
if err != nil {
return nil, err
}

wg.Wait()

info := &InfoForAudioNormalization{}
err = extractAndParseJSON(output.String(), info)
if err != nil {
return nil, err
}
return info, err
}

func extractAndParseJSON(output string, info *InfoForAudioNormalization) error {
re := regexp.MustCompile(`(?s)\{.*}`) // Finds JSON data from the output
matches := re.FindStringSubmatch(output)

if len(matches) == 0 {
return fmt.Errorf("no JSON data found")
}

jsonData := matches[0]
err := json.Unmarshal([]byte(jsonData), info)
if err != nil {
return err
}

return nil
}
12 changes: 12 additions & 0 deletions runner/cmd.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
stream: '-y -hide_banner -nostats %v -t %.0f -i %v -c:v copy -c:a copy -f mpegts %v -c:v libx264 -preset veryfast -tune zerolatency -maxrate 2500k -bufsize 3000k -g 60 -r 30 -x264-params keyint=60:scenecut=0 -c:a aac -ar 44100 -b:a 128k -f hls -hls_time 2 -hls_list_size 3600 -hls_playlist_type event -hls_flags append_list -hls_segment_filename %v/%%05d.ts %v'

SeparateAudioFast: "-i %v -vn -c:a copy %v"
SeparateAudio: "-i %v -vn %v"

# Two-pass audio normalization with FFmpeg loudnorm
# Loudnorm configuration "I=-23:TP=-2:LRA=7" -- Chosen according to EBU R128
## First pass: Audio analyse, get parameters needed in second pass
AudioNormalize1: "-i %v -nostats -y -af loudnorm=I=-23:TP=-2:LRA=7:print_format=json -f null -"
## Second pass: Apply normalization to the audio
AudioNormalize2: "-i %v -af loudnorm=I=-23:TP=-2:LRA=7:measured_i=%v:measured_tp=%v:measured_lra=%v:measured_thresh=%v:offset=%v:linear=true:print_format=summary -c:a aac -c:v libx264 -crf 0 -probesize 100M -analyzeduration 250M %v"


Transcoding: '-i %v -c:v libx264 -c:a copy -crf 0 -probesize 100M -analyzeduration 250M %v'

SilenceDetect: '-nostats -i %v -af silencedetect=n=-15dB:d=30 -f null -'

9 changes: 8 additions & 1 deletion runner/config/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,16 @@ import (

type CmdList struct {
//this is for adding extra parameters

Stream string `Default:"-y -hide_banner -nostats %x -t &.0f -i %s -c:v copy -c:a copy -f mpegts %x -c:v libx264 -preset veryfast -tune zerolatency -maxrate 2500k -bufsize 3000k -g 60 -r 30 -x264-params keyint=60:scenecut=0 -c:a aac -ar 44100 -b:a 128k -f hls -hls_time 2 -hls_list_size 3600 -hls_playlist_type event -hls_flags append_list -hls_segment_filename %x %x"`
SeparateAudioFast string `Default:"-i %v -vn -c:a copy %v"`
SeparateAudio string `Default:"-i %v -vn %v"`
AudioNormalize1 string `Default:"-i %v -nostats -y -af loudnorm=I=-23:TP=-2:LRA=7:print_format=json -f null -"`
AudioNormalize2 string `Default:"-i %v -af loudnorm=I=-23:TP=-2:LRA=7:measured_i=%v:measured_tp=%v:measured_lra=%v:measured_thresh=%v:offset=%v:linear=true:print_format=summary -c:a aac -c:v libx264 -crf 0 -probesize 100M -analyzeduration 250M %v"`
Transcoding string `Default:"-i %v -c:v libx264 %v"`
Stream string `Default:"-y -hide_banner -nostats %x -t &.0f -i %s -c:v copy -c:a copy -f mpegts %x -c:v libx264 -preset veryfast -tune zerolatency -maxrate 2500k -bufsize 3000k -g 60 -r 30 -x264-params keyint=60:scenecut=0 -c:a aac -ar 44100 -b:a 128k -f hls -hls_time 2 -hls_list_size 3600 -hls_playlist_type event -hls_flags append_list -hls_segment_filename %x %x"`
Transcoding string `Default:"-i %v -c:v libx264 %v"`
SilenceDetect string `Default:"-nostats -i %v -af silencedetect=n=-15dB:d=30 -f null -"`

}

func NewCmd(log *slog.Logger) *CmdList {
Expand Down