Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Audio visualization helpers #474

Merged
merged 27 commits into from
Oct 27, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
19906ee
Create FloatRingBuffer.swift
hiroshihorie Sep 4, 2024
cc30821
Processor
hiroshihorie Sep 4, 2024
a5ba133
Optimize
hiroshihorie Sep 4, 2024
6c26a4a
Merge branch 'main' into hiroshi/audio-visualize
hiroshihorie Sep 10, 2024
c09022d
Optimize
hiroshihorie Sep 10, 2024
3492254
Merge branch 'main' into hiroshi/audio-visualize
hiroshihorie Sep 20, 2024
6ebd537
Merge branch 'main' into hiroshi/audio-visualize
hiroshihorie Sep 24, 2024
fec59e1
Apply windowType
hiroshihorie Sep 24, 2024
fa2b5d0
Merge branch 'main' into hiroshi/audio-visualize
hiroshihorie Sep 29, 2024
6588a95
centering, min max db param
hiroshihorie Sep 29, 2024
0a8f253
smoothing 1
hiroshihorie Sep 29, 2024
8e3dbe9
smoothing 2
hiroshihorie Sep 29, 2024
053aa0c
Optimize
hiroshihorie Sep 29, 2024
ebaf87d
Merge branch 'main' into hiroshi/audio-visualize
hiroshihorie Oct 2, 2024
e5c6df2
Optimize
hiroshihorie Oct 3, 2024
0c41cfa
logarithmic
hiroshihorie Oct 3, 2024
df8f3a1
Generic ring buffer
hiroshihorie Oct 3, 2024
d039b2c
Merge branch 'main' into hiroshi/audio-visualize
hiroshihorie Oct 7, 2024
ec06595
convert
hiroshihorie Oct 8, 2024
398e365
Merge branch 'main' into hiroshi/audio-visualize
hiroshihorie Oct 8, 2024
a69566d
Merge branch 'main' into hiroshi/audio-visualize
hiroshihorie Oct 19, 2024
883a640
Merge branch 'main' into hiroshi/audio-visualize
hiroshihorie Oct 24, 2024
f84d836
Merge branch 'main' into hiroshi/audio-visualize
hiroshihorie Oct 24, 2024
b118f59
fix audio processing adapter
hiroshihorie Oct 27, 2024
6935094
Merge branch 'main' into hiroshi/audio-visualize
hiroshihorie Oct 27, 2024
da40d9f
Merge branch 'hiroshi/audio-visualize' of https://github.com/livekit/…
hiroshihorie Oct 27, 2024
0bb44be
Merge branch 'main' into hiroshi/audio-visualize
hiroshihorie Oct 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 118 additions & 1 deletion Sources/LiveKit/Convenience/AudioProcessing.swift
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ public extension LKAudioBuffer {
guard let targetBufferPointer = pcmBuffer.floatChannelData else { return nil }

// Optimized version
var normalizationFactor: Float = 1.0 / 32768.0
let factor = Float(Int16.max)
var normalizationFactor: Float = 1.0 / factor // Or use 32768.0

for i in 0 ..< channels {
vDSP_vsmul(rawBuffer(forChannel: i),
Expand Down Expand Up @@ -98,3 +99,119 @@ public extension Sequence where Iterator.Element == AudioLevel {
peak: totalSums.peakSum / Float(count))
}
}

public class AudioVisualizeProcessor {
static let bufferSize = 1024

// MARK: - Public

public let minFrequency: Float
public let maxFrequency: Float
public let minDB: Float
public let maxDB: Float
public let bandsCount: Int
public let isCentered: Bool
public let smoothingFactor: Float

public private(set) var bands: [Float]?

// MARK: - Private

private let ringBuffer = FloatRingBuffer(size: AudioVisualizeProcessor.bufferSize)
private let processor: FFTProcessor

public init(minFrequency: Float = 10,
maxFrequency: Float = 8000,
minDB: Float = -32.0,
maxDB: Float = 32.0,
bandsCount: Int = 100,
isCentered: Bool = false,
smoothingFactor: Float = 0.3) // Smoothing factor for smoother transitions
{
self.minFrequency = minFrequency
self.maxFrequency = maxFrequency
self.minDB = minDB
self.maxDB = maxDB
self.bandsCount = bandsCount
self.isCentered = isCentered
self.smoothingFactor = smoothingFactor

processor = FFTProcessor(bufferSize: Self.bufferSize)
bands = [Float](repeating: 0.0, count: bandsCount)
}

public func add(pcmBuffer: AVAudioPCMBuffer) {
guard let floatChannelData = pcmBuffer.floatChannelData else { return }

// Get the float array.
let floats = Array(UnsafeBufferPointer(start: floatChannelData[0], count: Int(pcmBuffer.frameLength)))
ringBuffer.write(floats)

// Get full-size buffer if available, otherwise return
guard let buffer = ringBuffer.read() else { return }

// Process FFT and compute frequency bands
let fftRes = processor.process(buffer: buffer)
let bands = fftRes.computeBands(
minFrequency: 0,
maxFrequency: maxFrequency,
bandsCount: bandsCount,
sampleRate: Float(pcmBuffer.format.sampleRate)
)

let headroom = maxDB - minDB

// Normalize magnitudes to decibel ratio using a functional approach
var normalizedBands = bands.magnitudes.map { magnitude in
let magnitudeDB = max(0, magnitude.toDecibels + abs(minDB))
return min(1.0, magnitudeDB / headroom)
}

// If centering is enabled, rearrange the normalized bands
if isCentered {
normalizedBands.sort(by: >)
normalizedBands = centerBands(normalizedBands)
}

// Smooth transition using an easing function
self.bands = zip(self.bands ?? [], normalizedBands).map { old, new in
_smoothTransition(from: old, to: new, factor: smoothingFactor)
}
}

/// Centers the sorted bands by placing higher values in the middle.
private func centerBands(_ sortedBands: [Float]) -> [Float] {
var centeredBands = [Float](repeating: 0, count: sortedBands.count)
var leftIndex = sortedBands.count / 2
var rightIndex = leftIndex

for (index, value) in sortedBands.enumerated() {
if index % 2 == 0 {
// Place value to the right
centeredBands[rightIndex] = value
rightIndex += 1
} else {
// Place value to the left
leftIndex -= 1
centeredBands[leftIndex] = value
}
}

return centeredBands
}

/// Applies an easing function to smooth the transition.
private func _smoothTransition(from oldValue: Float, to newValue: Float, factor: Float) -> Float {
// Calculate the delta change between the old and new value
let delta = newValue - oldValue
// Apply an ease-in-out cubic easing curve
let easedFactor = _easeInOutCubic(t: factor)
// Calculate and return the smoothed value
return oldValue + delta * easedFactor
}

/// Easing function: ease-in-out cubic
private func _easeInOutCubic(t: Float) -> Float {
t < 0.5 ? 4 * t * t * t : 1 - pow(-2 * t + 2, 3) / 2
}
}
25 changes: 8 additions & 17 deletions Sources/LiveKit/Protocols/AudioRenderer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -29,26 +29,17 @@ public protocol AudioRenderer {
func render(pcmBuffer: AVAudioPCMBuffer)
}

class AudioRendererAdapter: NSObject, LKRTCAudioRenderer {
private weak var target: AudioRenderer?
private let targetHashValue: Int
class AudioRendererAdapter: MulticastDelegate<AudioRenderer>, LKRTCAudioRenderer {
//
typealias Delegate = AudioRenderer

init(target: AudioRenderer) {
self.target = target
targetHashValue = ObjectIdentifier(target).hashValue
init() {
super.init(label: "AudioRendererAdapter")
}

func render(pcmBuffer: AVAudioPCMBuffer) {
target?.render(pcmBuffer: pcmBuffer)
}
// MARK: - LKRTCAudioRenderer

// Proxy the equality operators
override func isEqual(_ object: Any?) -> Bool {
guard let other = object as? AudioRendererAdapter else { return false }
return targetHashValue == other.targetHashValue
}

override var hash: Int {
targetHashValue
func render(pcmBuffer: AVAudioPCMBuffer) {
notify { $0.render(pcmBuffer: pcmBuffer) }
}
}
188 changes: 188 additions & 0 deletions Sources/LiveKit/Support/FFTProcessor.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
/*
* Copyright 2024 LiveKit
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import Accelerate
import Foundation

extension Float {
var nyquistFrequency: Float { self / 2.0 }

var toDecibels: Float {
let minMagnitude: Float = 1e-7
return 20 * log10(max(magnitude, minMagnitude))
}
}

public struct FFTComputeBandsResult {
let count: Int
let magnitudes: [Float]
let frequencies: [Float]
}

public class FFTResult {
public let magnitudes: [Float]
private let scaleType: FFTProcessor.ScaleType

init(magnitudes: [Float], scaleType: FFTProcessor.ScaleType) {
self.magnitudes = magnitudes
self.scaleType = scaleType
}

func computeBands(minFrequency: Float, maxFrequency: Float, bandsCount: Int, sampleRate: Float) -> FFTComputeBandsResult {
let actualMaxFrequency = min(sampleRate.nyquistFrequency, maxFrequency)
var bandMagnitudes = [Float](repeating: 0.0, count: bandsCount)
var bandFrequencies = [Float](repeating: 0.0, count: bandsCount)

let magLowerRange = _magnitudeIndex(for: minFrequency, sampleRate: sampleRate)
let magUpperRange = _magnitudeIndex(for: actualMaxFrequency, sampleRate: sampleRate)
let ratio = Float(magUpperRange - magLowerRange) / Float(bandsCount)

for i in 0 ..< bandsCount {
let magsStartIdx = Int(floorf(Float(i) * ratio)) + magLowerRange
let magsEndIdx = Int(floorf(Float(i + 1) * ratio)) + magLowerRange

let count = magsEndIdx - magsStartIdx
if count > 0 {
if scaleType == .linear {
// Linear scale averaging
bandMagnitudes[i] = _computeAverage(magnitudes, magsStartIdx, magsEndIdx)
}
} else {
// Single value case
bandMagnitudes[i] = magnitudes[magsStartIdx]
}

// Compute average frequency
bandFrequencies[i] = _averageFrequencyInRange(magsStartIdx, magsEndIdx, sampleRate: sampleRate)
}

return FFTComputeBandsResult(count: bandsCount, magnitudes: bandMagnitudes, frequencies: bandFrequencies)
}

@inline(__always) private func _magnitudeIndex(for frequency: Float, sampleRate: Float) -> Int {
Int(Float(magnitudes.count) * frequency / sampleRate.nyquistFrequency)
}

@inline(__always) private func _computeAverage(_ array: [Float], _ startIdx: Int, _ stopIdx: Int) -> Float {
var mean: Float = 0
let count = stopIdx - startIdx
array.withUnsafeBufferPointer { bufferPtr in
let ptr = bufferPtr.baseAddress! + startIdx
vDSP_meanv(ptr, 1, &mean, UInt(count))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vDSP_Length(count)

}
return mean
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you can simplify this function using ArraySlice to give responsibility for safe pointer arithmetic back to swift instead:

    @inline(__always) private func _computeAverage(_ array: ArraySlice<Float>) -> Float {
        var mean: Float = 0
        array.withUnsafeBufferPointer { bufferPtr in
            vDSP_meanv(bufferPtr.baseAddress!, 1, &mean, vDSP_Length(array.count))
        }
        return mean
    }

then you call it above with _computeAverage(magnitudes[magsStartIdx..<magsEndIdx])


@inline(__always) private func _computeBandwidth(for sampleRate: Float) -> Float {
sampleRate.nyquistFrequency / Float(magnitudes.count)
}

@inline(__always) private func _averageFrequencyInRange(_ startIndex: Int, _ endIndex: Int, sampleRate: Float) -> Float {
let bandwidth = _computeBandwidth(for: sampleRate)
return (bandwidth * Float(startIndex) + bandwidth * Float(endIndex)) / 2
}
}

class FFTProcessor {
public enum WindowType {
case none
case hanning
case hamming
}

public enum ScaleType {
case linear
case logarithmic
}

public let bufferSize: Int
public let windowType: WindowType
public let scaleType: ScaleType

private let bufferHalfSize: Int
private let bufferLog2Size: Int
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you should consider declaring these int types as vDSP_Length instead. At the very least, they should be UInt here and cast to vDSP_Length when passed to accelerate later

private var window: [Float] = []
private var fftSetup: FFTSetup
private var complexBuffer: DSPSplitComplex
private var realPointer: UnsafeMutablePointer<Float>
private var imaginaryPointer: UnsafeMutablePointer<Float>
private var zeroDBReference: Float = 1.0

init(bufferSize: Int, scaleType: ScaleType = .linear, windowType: WindowType = .hanning) {
self.bufferSize = bufferSize
self.scaleType = scaleType
self.windowType = windowType

bufferHalfSize = bufferSize / 2
bufferLog2Size = Int(log2f(Float(bufferSize)))

fftSetup = vDSP_create_fftsetup(UInt(bufferLog2Size), FFTRadix(FFT_RADIX2))!

realPointer = .allocate(capacity: bufferHalfSize)
imaginaryPointer = .allocate(capacity: bufferHalfSize)

realPointer.initialize(repeating: 0.0, count: bufferHalfSize)
imaginaryPointer.initialize(repeating: 0.0, count: bufferHalfSize)

complexBuffer = DSPSplitComplex(realp: realPointer, imagp: imaginaryPointer)
setupWindow()
}

deinit {
vDSP_destroy_fftsetup(fftSetup)
realPointer.deallocate()
imaginaryPointer.deallocate()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there a reason you're managing raw pointers in this class rather than just using arrays and nested withUnsafeMutableBufferPointer calls as in the apple sample code? looks scary to me 🤷 https://developer.apple.com/documentation/accelerate/vdsp/fast_fourier_transforms/finding_the_component_frequencies_in_a_composite_sine_wave

}

private func setupWindow() {
window = [Float](repeating: 1.0, count: bufferSize)
switch windowType {
case .none:
break
case .hanning:
vDSP_hann_window(&window, UInt(bufferSize), Int32(vDSP_HANN_NORM))
case .hamming:
vDSP_hamm_window(&window, UInt(bufferSize), 0)
}
}

func process(buffer: [Float]) -> FFTResult {
guard buffer.count == bufferSize else {
fatalError("Input buffer size mismatch.")
}

// Create a new array to hold the windowed buffer
var windowedBuffer = [Float](repeating: 0.0, count: bufferSize)

// Multiply the input buffer by the window coefficients
vDSP_vmul(buffer, 1, window, 1, &windowedBuffer, 1, UInt(bufferSize))

// Convert the real input to split complex form
windowedBuffer.withUnsafeBufferPointer { bufferPtr in
let complexPtr = UnsafeRawPointer(bufferPtr.baseAddress!).bindMemory(to: DSPComplex.self, capacity: bufferHalfSize)
vDSP_ctoz(complexPtr, 2, &complexBuffer, 1, UInt(bufferHalfSize))
}

// Perform the FFT
vDSP_fft_zrip(fftSetup, &complexBuffer, 1, UInt(bufferLog2Size), Int32(FFT_FORWARD))

// Calculate magnitudes
var magnitudes = [Float](repeating: 0.0, count: bufferHalfSize)
vDSP_zvabs(&complexBuffer, 1, &magnitudes, 1, UInt(bufferHalfSize))

return FFTResult(magnitudes: magnitudes, scaleType: scaleType)
}
}
Loading
Loading