Skip to content

Commit

Permalink
Enh/runner audio normalization (#1329)
Browse files Browse the repository at this point in the history
* Add commands for audio normalization; pass 1 realised in a new action

* Pass 1 realised in a new action

* Make sure transcoding with audio normalization does the same to video.

* Integrate audio normalization into transcode. Apply it only when exactly 1 video exists for the stream.

* Pass 1: properly return error message

* Pass 1: 1. synchronise reading from stdin before extracting info; 2. make sure to close everything in case of error

* Audio normalization: log when applied

* Remove the unused action AudioNormalization

---------

Co-authored-by: Dawin <[email protected]>
  • Loading branch information
YiranDuan721 and DawinYurtseven authored May 13, 2024
1 parent 80e0bb1 commit 9eecbae
Show file tree
Hide file tree
Showing 3 changed files with 121 additions and 1 deletion.
101 changes: 101 additions & 0 deletions runner/actions/transcode.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
package actions

import (
"bufio"
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"log/slog"
"os"
"os/exec"
"regexp"
"strings"
"sync"
"time"
)

Expand Down Expand Up @@ -67,7 +72,27 @@ func (a *ActionProvider) TranscodeAction() *Action {
i++
}

// Pass 1 of audio normalization.
// Audio normalization is only applied, when only one video of the stream exists. Reasons for this:
// 1. Multiple videos existing for one stream is typically caused by a shutdown of a runner. This does not happen frequently.
// 2. It's much more inefficient to apply the audio normalization operation for more than one file:
// 2.1 Instead of 2 passes, 3 passes are needed: concat - get parameter - execute;
// 2.2 Video files need to be stored 3 times instead of twice (including the raw .ts files), at least temporarily
// (Extracting and only operating/storing the audio is unacceptable due to the problem mentioned in one comment of this answer: https://stackoverflow.com/a/27413824)
var info *InfoForAudioNormalization = nil
if len(fileName) == 1 {
info, err = getInfoForAudioNormalization(ctx, a.Cmd.AudioNormalize1, fileName[0])
}

cmd := fmt.Sprintf(a.Cmd.Transcoding, filenames, outputName)
// Pass 2 of audio normalization
// Applied only when pass 1 is successfully executed
// It does the same to the video, and additionally normalizes the audio with the given parameters from pass 1
if info != nil {
cmd = fmt.Sprintf(a.Cmd.AudioNormalize2, filenames,
info.InputI, info.InputTp, info.InputLra, info.InputThresh, info.TargetOffset, outputName)
log.Info("Transcoding with audio normalization", "files", files)
}
c := exec.CommandContext(ctx, "ffmpeg", strings.Split(cmd, " ")...)
c.Stderr = os.Stderr
err = c.Start()
Expand All @@ -79,3 +104,79 @@ func (a *ActionProvider) TranscodeAction() *Action {
},
}
}

type InfoForAudioNormalization struct {
InputI string `json:"input_i"`
InputTp string `json:"input_tp"`
InputLra string `json:"input_lra"`
InputThresh string `json:"input_thresh"`
OutputI string `json:"output_i"`
OutputTp string `json:"output_tp"`
OutputLra string `json:"output_lra"`
OutputThresh string `json:"output_thresh"`
NormalizationType string `json:"normalization_type"`
TargetOffset string `json:"target_offset"`
}

func getInfoForAudioNormalization(ctx context.Context, cmdFmt string, filename string) (*InfoForAudioNormalization, error) {
// Errors during pass 1 won't propagate to outside.
// But errors will prevent pass 2 from executing, ultimately resulting in the video not undergoing audio normalization.
cmd := fmt.Sprintf(cmdFmt, filename)
c := exec.CommandContext(ctx, "ffmpeg", strings.Split(cmd, " ")...)
c.Stderr = os.Stderr
stdoutPipe, err := c.StdoutPipe()
if err != nil {
return nil, err
}
defer stdoutPipe.Close()

err = c.Start()
if err != nil {
return nil, err
}

var output bytes.Buffer
scanner := bufio.NewScanner(stdoutPipe)

var wg sync.WaitGroup
wg.Add(1)

go func() { // Reads the output from FFmpeg
defer wg.Done()
for scanner.Scan() {
line := scanner.Text()
output.WriteString(line + "\n")
}
}()

err = c.Wait()
if err != nil {
return nil, err
}

wg.Wait()

info := &InfoForAudioNormalization{}
err = extractAndParseJSON(output.String(), info)
if err != nil {
return nil, err
}
return info, err
}

func extractAndParseJSON(output string, info *InfoForAudioNormalization) error {
re := regexp.MustCompile(`(?s)\{.*}`) // Finds JSON data from the output
matches := re.FindStringSubmatch(output)

if len(matches) == 0 {
return fmt.Errorf("no JSON data found")
}

jsonData := matches[0]
err := json.Unmarshal([]byte(jsonData), info)
if err != nil {
return err
}

return nil
}
12 changes: 12 additions & 0 deletions runner/cmd.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
stream: '-y -hide_banner -nostats %v -t %.0f -i %v -c:v copy -c:a copy -f mpegts %v -c:v libx264 -preset veryfast -tune zerolatency -maxrate 2500k -bufsize 3000k -g 60 -r 30 -x264-params keyint=60:scenecut=0 -c:a aac -ar 44100 -b:a 128k -f hls -hls_time 2 -hls_list_size 3600 -hls_playlist_type event -hls_flags append_list -hls_segment_filename %v/%%05d.ts %v'

SeparateAudioFast: "-i %v -vn -c:a copy %v"
SeparateAudio: "-i %v -vn %v"

# Two-pass audio normalization with FFmpeg loudnorm
# Loudnorm configuration "I=-23:TP=-2:LRA=7" -- Chosen according to EBU R128
## First pass: Audio analyse, get parameters needed in second pass
AudioNormalize1: "-i %v -nostats -y -af loudnorm=I=-23:TP=-2:LRA=7:print_format=json -f null -"
## Second pass: Apply normalization to the audio
AudioNormalize2: "-i %v -af loudnorm=I=-23:TP=-2:LRA=7:measured_i=%v:measured_tp=%v:measured_lra=%v:measured_thresh=%v:offset=%v:linear=true:print_format=summary -c:a aac -c:v libx264 -crf 0 -probesize 100M -analyzeduration 250M %v"


Transcoding: '-i %v -c:v libx264 -c:a copy -crf 0 -probesize 100M -analyzeduration 250M %v'

SilenceDetect: '-nostats -i %v -af silencedetect=n=-15dB:d=30 -f null -'

9 changes: 8 additions & 1 deletion runner/config/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,16 @@ import (

type CmdList struct {
//this is for adding extra parameters

Stream string `Default:"-y -hide_banner -nostats %x -t &.0f -i %s -c:v copy -c:a copy -f mpegts %x -c:v libx264 -preset veryfast -tune zerolatency -maxrate 2500k -bufsize 3000k -g 60 -r 30 -x264-params keyint=60:scenecut=0 -c:a aac -ar 44100 -b:a 128k -f hls -hls_time 2 -hls_list_size 3600 -hls_playlist_type event -hls_flags append_list -hls_segment_filename %x %x"`
SeparateAudioFast string `Default:"-i %v -vn -c:a copy %v"`
SeparateAudio string `Default:"-i %v -vn %v"`
AudioNormalize1 string `Default:"-i %v -nostats -y -af loudnorm=I=-23:TP=-2:LRA=7:print_format=json -f null -"`
AudioNormalize2 string `Default:"-i %v -af loudnorm=I=-23:TP=-2:LRA=7:measured_i=%v:measured_tp=%v:measured_lra=%v:measured_thresh=%v:offset=%v:linear=true:print_format=summary -c:a aac -c:v libx264 -crf 0 -probesize 100M -analyzeduration 250M %v"`
Transcoding string `Default:"-i %v -c:v libx264 %v"`
Stream string `Default:"-y -hide_banner -nostats %x -t &.0f -i %s -c:v copy -c:a copy -f mpegts %x -c:v libx264 -preset veryfast -tune zerolatency -maxrate 2500k -bufsize 3000k -g 60 -r 30 -x264-params keyint=60:scenecut=0 -c:a aac -ar 44100 -b:a 128k -f hls -hls_time 2 -hls_list_size 3600 -hls_playlist_type event -hls_flags append_list -hls_segment_filename %x %x"`
Transcoding string `Default:"-i %v -c:v libx264 %v"`
SilenceDetect string `Default:"-nostats -i %v -af silencedetect=n=-15dB:d=30 -f null -"`

}

func NewCmd(log *slog.Logger) *CmdList {
Expand Down

0 comments on commit 9eecbae

Please sign in to comment.