livekit · hiroshihorie · Oct 27, 2024 · Sep 4, 2024 · Sep 4, 2024 · Sep 4, 2024
diff --git a/Sources/LiveKit/Convenience/AudioProcessing.swift b/Sources/LiveKit/Convenience/AudioProcessing.swift
@@ -43,7 +43,8 @@ public extension LKAudioBuffer {
         guard let targetBufferPointer = pcmBuffer.floatChannelData else { return nil }
 
         // Optimized version
-        var normalizationFactor: Float = 1.0 / 32768.0
+        let factor = Float(Int16.max)
+        var normalizationFactor: Float = 1.0 / factor // Or use 32768.0
 
         for i in 0 ..< channels {
             vDSP_vsmul(rawBuffer(forChannel: i),
@@ -98,3 +99,119 @@ public extension Sequence where Iterator.Element == AudioLevel {
                           peak: totalSums.peakSum / Float(count))
     }
 }
+
+public class AudioVisualizeProcessor {
+    static let bufferSize = 1024
+
+    // MARK: - Public
+
+    public let minFrequency: Float
+    public let maxFrequency: Float
+    public let minDB: Float
+    public let maxDB: Float
+    public let bandsCount: Int
+    public let isCentered: Bool
+    public let smoothingFactor: Float
+
+    public private(set) var bands: [Float]?
+
+    // MARK: - Private
+
+    private let ringBuffer = FloatRingBuffer(size: AudioVisualizeProcessor.bufferSize)
+    private let processor: FFTProcessor
+
+    public init(minFrequency: Float = 10,
+                maxFrequency: Float = 8000,
+                minDB: Float = -32.0,
+                maxDB: Float = 32.0,
+                bandsCount: Int = 100,
+                isCentered: Bool = false,
+                smoothingFactor: Float = 0.3) // Smoothing factor for smoother transitions
+    {
+        self.minFrequency = minFrequency
+        self.maxFrequency = maxFrequency
+        self.minDB = minDB
+        self.maxDB = maxDB
+        self.bandsCount = bandsCount
+        self.isCentered = isCentered
+        self.smoothingFactor = smoothingFactor
+
+        processor = FFTProcessor(bufferSize: Self.bufferSize)
+        bands = [Float](repeating: 0.0, count: bandsCount)
+    }
+
+    public func add(pcmBuffer: AVAudioPCMBuffer) {
+        guard let floatChannelData = pcmBuffer.floatChannelData else { return }
+
+        // Get the float array.
+        let floats = Array(UnsafeBufferPointer(start: floatChannelData[0], count: Int(pcmBuffer.frameLength)))
+        ringBuffer.write(floats)
+
+        // Get full-size buffer if available, otherwise return
+        guard let buffer = ringBuffer.read() else { return }
+
+        // Process FFT and compute frequency bands
+        let fftRes = processor.process(buffer: buffer)
+        let bands = fftRes.computeBands(
+            minFrequency: 0,
+            maxFrequency: maxFrequency,
+            bandsCount: bandsCount,
+            sampleRate: Float(pcmBuffer.format.sampleRate)
+        )
+
+        let headroom = maxDB - minDB
+
+        // Normalize magnitudes to decibel ratio using a functional approach
+        var normalizedBands = bands.magnitudes.map { magnitude in
+            let magnitudeDB = max(0, magnitude.toDecibels + abs(minDB))
+            return min(1.0, magnitudeDB / headroom)
+        }
+
+        // If centering is enabled, rearrange the normalized bands
+        if isCentered {
+            normalizedBands.sort(by: >)
+            normalizedBands = centerBands(normalizedBands)
+        }
+
+        // Smooth transition using an easing function
+        self.bands = zip(self.bands ?? [], normalizedBands).map { old, new in
+            _smoothTransition(from: old, to: new, factor: smoothingFactor)
+        }
+    }
+
+    /// Centers the sorted bands by placing higher values in the middle.
+    private func centerBands(_ sortedBands: [Float]) -> [Float] {
+        var centeredBands = [Float](repeating: 0, count: sortedBands.count)
+        var leftIndex = sortedBands.count / 2
+        var rightIndex = leftIndex
+
+        for (index, value) in sortedBands.enumerated() {
+            if index % 2 == 0 {
+                // Place value to the right
+                centeredBands[rightIndex] = value
+                rightIndex += 1
+            } else {
+                // Place value to the left
+                leftIndex -= 1
+                centeredBands[leftIndex] = value
+            }
+        }
+
+        return centeredBands
+    }
+
+    /// Applies an easing function to smooth the transition.
+    private func _smoothTransition(from oldValue: Float, to newValue: Float, factor: Float) -> Float {
+        // Calculate the delta change between the old and new value
+        let delta = newValue - oldValue
+        // Apply an ease-in-out cubic easing curve
+        let easedFactor = _easeInOutCubic(t: factor)
+        // Calculate and return the smoothed value
+        return oldValue + delta * easedFactor
+    }
+
+    /// Easing function: ease-in-out cubic
+    private func _easeInOutCubic(t: Float) -> Float {
+        t < 0.5 ? 4 * t * t * t : 1 - pow(-2 * t + 2, 3) / 2
+    }
+}
diff --git a/Sources/LiveKit/Protocols/AudioRenderer.swift b/Sources/LiveKit/Protocols/AudioRenderer.swift
@@ -29,26 +29,17 @@ public protocol AudioRenderer {
     func render(pcmBuffer: AVAudioPCMBuffer)
 }
 
-class AudioRendererAdapter: NSObject, LKRTCAudioRenderer {
-    private weak var target: AudioRenderer?
-    private let targetHashValue: Int
+class AudioRendererAdapter: MulticastDelegate<AudioRenderer>, LKRTCAudioRenderer {
+    //
+    typealias Delegate = AudioRenderer
 
-    init(target: AudioRenderer) {
-        self.target = target
-        targetHashValue = ObjectIdentifier(target).hashValue
+    init() {
+        super.init(label: "AudioRendererAdapter")
     }
 
-    func render(pcmBuffer: AVAudioPCMBuffer) {
-        target?.render(pcmBuffer: pcmBuffer)
-    }
+    // MARK: - LKRTCAudioRenderer
 
-    // Proxy the equality operators
-    override func isEqual(_ object: Any?) -> Bool {
-        guard let other = object as? AudioRendererAdapter else { return false }
-        return targetHashValue == other.targetHashValue
-    }
-
-    override var hash: Int {
-        targetHashValue
+    func render(pcmBuffer: AVAudioPCMBuffer) {
+        notify { $0.render(pcmBuffer: pcmBuffer) }
     }
 }
diff --git a/Sources/LiveKit/Support/FFTProcessor.swift b/Sources/LiveKit/Support/FFTProcessor.swift
@@ -0,0 +1,188 @@
+/*
+ * Copyright 2024 LiveKit
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import Accelerate
+import Foundation
+
+extension Float {
+    var nyquistFrequency: Float { self / 2.0 }
+
+    var toDecibels: Float {
+        let minMagnitude: Float = 1e-7
+        return 20 * log10(max(magnitude, minMagnitude))
+    }
+}
+
+public struct FFTComputeBandsResult {
+    let count: Int
+    let magnitudes: [Float]
+    let frequencies: [Float]
+}
+
+public class FFTResult {
+    public let magnitudes: [Float]
+    private let scaleType: FFTProcessor.ScaleType
+
+    init(magnitudes: [Float], scaleType: FFTProcessor.ScaleType) {
+        self.magnitudes = magnitudes
+        self.scaleType = scaleType
+    }
+
+    func computeBands(minFrequency: Float, maxFrequency: Float, bandsCount: Int, sampleRate: Float) -> FFTComputeBandsResult {
+        let actualMaxFrequency = min(sampleRate.nyquistFrequency, maxFrequency)
+        var bandMagnitudes = [Float](repeating: 0.0, count: bandsCount)
+        var bandFrequencies = [Float](repeating: 0.0, count: bandsCount)
+
+        let magLowerRange = _magnitudeIndex(for: minFrequency, sampleRate: sampleRate)
+        let magUpperRange = _magnitudeIndex(for: actualMaxFrequency, sampleRate: sampleRate)
+        let ratio = Float(magUpperRange - magLowerRange) / Float(bandsCount)
+
+        for i in 0 ..< bandsCount {
+            let magsStartIdx = Int(floorf(Float(i) * ratio)) + magLowerRange
+            let magsEndIdx = Int(floorf(Float(i + 1) * ratio)) + magLowerRange
+
+            let count = magsEndIdx - magsStartIdx
+            if count > 0 {
+                if scaleType == .linear {
+                    // Linear scale averaging
+                    bandMagnitudes[i] = _computeAverage(magnitudes, magsStartIdx, magsEndIdx)
+                }
+            } else {
+                // Single value case
+                bandMagnitudes[i] = magnitudes[magsStartIdx]
+            }
+
+            // Compute average frequency
+            bandFrequencies[i] = _averageFrequencyInRange(magsStartIdx, magsEndIdx, sampleRate: sampleRate)
+        }
+
+        return FFTComputeBandsResult(count: bandsCount, magnitudes: bandMagnitudes, frequencies: bandFrequencies)
+    }
+
+    @inline(__always) private func _magnitudeIndex(for frequency: Float, sampleRate: Float) -> Int {
+        Int(Float(magnitudes.count) * frequency / sampleRate.nyquistFrequency)
+    }
+
+    @inline(__always) private func _computeAverage(_ array: [Float], _ startIdx: Int, _ stopIdx: Int) -> Float {
+        var mean: Float = 0
+        let count = stopIdx - startIdx
+        array.withUnsafeBufferPointer { bufferPtr in
+            let ptr = bufferPtr.baseAddress! + startIdx
+            vDSP_meanv(ptr, 1, &mean, UInt(count))
+        }
+        return mean
+    }
+
+    @inline(__always) private func _computeBandwidth(for sampleRate: Float) -> Float {
+        sampleRate.nyquistFrequency / Float(magnitudes.count)
+    }
+
+    @inline(__always) private func _averageFrequencyInRange(_ startIndex: Int, _ endIndex: Int, sampleRate: Float) -> Float {
+        let bandwidth = _computeBandwidth(for: sampleRate)
+        return (bandwidth * Float(startIndex) + bandwidth * Float(endIndex)) / 2
+    }
+}
+
+class FFTProcessor {
+    public enum WindowType {
+        case none
+        case hanning
+        case hamming
+    }
+
+    public enum ScaleType {
+        case linear
+        case logarithmic
+    }
+
+    public let bufferSize: Int
+    public let windowType: WindowType
+    public let scaleType: ScaleType
+
+    private let bufferHalfSize: Int
+    private let bufferLog2Size: Int
+    private var window: [Float] = []
+    private var fftSetup: FFTSetup
+    private var complexBuffer: DSPSplitComplex
+    private var realPointer: UnsafeMutablePointer<Float>
+    private var imaginaryPointer: UnsafeMutablePointer<Float>
+    private var zeroDBReference: Float = 1.0
+
+    init(bufferSize: Int, scaleType: ScaleType = .linear, windowType: WindowType = .hanning) {
+        self.bufferSize = bufferSize
+        self.scaleType = scaleType
+        self.windowType = windowType
+
+        bufferHalfSize = bufferSize / 2
+        bufferLog2Size = Int(log2f(Float(bufferSize)))
+
+        fftSetup = vDSP_create_fftsetup(UInt(bufferLog2Size), FFTRadix(FFT_RADIX2))!
+
+        realPointer = .allocate(capacity: bufferHalfSize)
+        imaginaryPointer = .allocate(capacity: bufferHalfSize)
+
+        realPointer.initialize(repeating: 0.0, count: bufferHalfSize)
+        imaginaryPointer.initialize(repeating: 0.0, count: bufferHalfSize)
+
+        complexBuffer = DSPSplitComplex(realp: realPointer, imagp: imaginaryPointer)
+        setupWindow()
+    }
+
+    deinit {
+        vDSP_destroy_fftsetup(fftSetup)
+        realPointer.deallocate()
+        imaginaryPointer.deallocate()
+    }
+
+    private func setupWindow() {
+        window = [Float](repeating: 1.0, count: bufferSize)
+        switch windowType {
+        case .none:
+            break
+        case .hanning:
+            vDSP_hann_window(&window, UInt(bufferSize), Int32(vDSP_HANN_NORM))
+        case .hamming:
+            vDSP_hamm_window(&window, UInt(bufferSize), 0)
+        }
+    }
+
+    func process(buffer: [Float]) -> FFTResult {
+        guard buffer.count == bufferSize else {
+            fatalError("Input buffer size mismatch.")
+        }
+
+        // Create a new array to hold the windowed buffer
+        var windowedBuffer = [Float](repeating: 0.0, count: bufferSize)
+
+        // Multiply the input buffer by the window coefficients
+        vDSP_vmul(buffer, 1, window, 1, &windowedBuffer, 1, UInt(bufferSize))
+
+        // Convert the real input to split complex form
+        windowedBuffer.withUnsafeBufferPointer { bufferPtr in
+            let complexPtr = UnsafeRawPointer(bufferPtr.baseAddress!).bindMemory(to: DSPComplex.self, capacity: bufferHalfSize)
+            vDSP_ctoz(complexPtr, 2, &complexBuffer, 1, UInt(bufferHalfSize))
+        }
+
+        // Perform the FFT
+        vDSP_fft_zrip(fftSetup, &complexBuffer, 1, UInt(bufferLog2Size), Int32(FFT_FORWARD))
+
+        // Calculate magnitudes
+        var magnitudes = [Float](repeating: 0.0, count: bufferHalfSize)
+        vDSP_zvabs(&complexBuffer, 1, &magnitudes, 1, UInt(bufferHalfSize))
+
+        return FFTResult(magnitudes: magnitudes, scaleType: scaleType)
+    }
+}