livekit · hiroshihorie · Jan 6, 2025 · Jan 6, 2025 · Jan 8, 2025 · Jan 8, 2025
diff --git a/Package.swift b/Package.swift
@@ -18,7 +18,7 @@ let package = Package(
     ],
     dependencies: [
         // LK-Prefixed Dynamic WebRTC XCFramework
-        .package(url: "https://github.com/livekit/webrtc-xcframework.git", exact: "125.6422.11"),
+        .package(url: "https://github.com/livekit/webrtc-xcframework.git", exact: "125.6422.12-exp.4"),
         .package(url: "https://github.com/apple/swift-protobuf.git", from: "1.26.0"),
         .package(url: "https://github.com/apple/swift-log.git", from: "1.5.4"),
         // Only used for DocC generation

diff --git a/[email protected] b/[email protected]
@@ -20,7 +20,7 @@ let package = Package(
     ],
     dependencies: [
         // LK-Prefixed Dynamic WebRTC XCFramework
-        .package(url: "https://github.com/livekit/webrtc-xcframework.git", exact: "125.6422.11"),
+        .package(url: "https://github.com/livekit/webrtc-xcframework.git", exact: "125.6422.12-exp.4"),
         .package(url: "https://github.com/apple/swift-protobuf.git", from: "1.26.0"),
         .package(url: "https://github.com/apple/swift-log.git", from: "1.5.4"),
         // Only used for DocC generation

diff --git a/Sources/LiveKit/Track/AudioManager.swift b/Sources/LiveKit/Track/AudioManager.swift
@@ -24,39 +24,6 @@ internal import LiveKitWebRTC
 @_implementationOnly import LiveKitWebRTC
 #endif
 
-// Wrapper for LKRTCAudioBuffer
-@objc
-public class LKAudioBuffer: NSObject {
-    private let _audioBuffer: LKRTCAudioBuffer
-
-    @objc
-    public var channels: Int { _audioBuffer.channels }
-
-    @objc
-    public var frames: Int { _audioBuffer.frames }
-
-    @objc
-    public var framesPerBand: Int { _audioBuffer.framesPerBand }
-
-    @objc
-    public var bands: Int { _audioBuffer.bands }
-
-    @objc
-    @available(*, deprecated, renamed: "rawBuffer(forChannel:)")
-    public func rawBuffer(for channel: Int) -> UnsafeMutablePointer<Float> {
-        _audioBuffer.rawBuffer(forChannel: channel)
-    }
-
-    @objc
-    public func rawBuffer(forChannel channel: Int) -> UnsafeMutablePointer<Float> {
-        _audioBuffer.rawBuffer(forChannel: channel)
-    }
-
-    init(audioBuffer: LKRTCAudioBuffer) {
-        _audioBuffer = audioBuffer
-    }
-}
-
 // Audio Session Configuration related
 public class AudioManager: Loggable {
     // MARK: - Public
@@ -68,6 +35,19 @@ public class AudioManager: Loggable {
     #endif
 
     public typealias DeviceUpdateFunc = (_ audioManager: AudioManager) -> Void
+    public typealias OnEngineWillStart = (_ audioManager: AudioManager, _ engine: AVAudioEngine, _ playoutEnabled: Bool, _ recordingEnabled: Bool) -> Void
+    public typealias OnEngineWillConnectInput = (_ audioManager: AudioManager,
+                                                 _ engine: AVAudioEngine,
+                                                 _ src: AVAudioNode,
+                                                 _ dst: AVAudioNode,
+                                                 _ format: AVAudioFormat) -> Bool
+    public typealias OnEngineWillConnectOutput = (_ audioManager: AudioManager,
+                                                  _ engine: AVAudioEngine,
+                                                  _ src: AVAudioNode,
+                                                  _ dst: AVAudioNode,
+                                                  _ format: AVAudioFormat) -> Bool
+
+    public typealias OnSpeechActivityEvent = (_ audioManager: AudioManager, _ event: SpeechActivityEvent) -> Void
 
     #if os(iOS) || os(visionOS) || os(tvOS)
 
@@ -215,13 +195,113 @@ public class AudioManager: Loggable {
 
     public var onDeviceUpdate: DeviceUpdateFunc? {
         didSet {
-            RTC.audioDeviceModule.setDevicesUpdatedHandler { [weak self] in
+            RTC.audioDeviceModule.setDevicesDidUpdateCallback { [weak self] in
                 guard let self else { return }
                 self.onDeviceUpdate?(self)
             }
         }
     }
 
+    /// Provide custom implementation for internal AVAudioEngine's input configuration.
+    /// Buffers flow from `src` to `dst`. Preferred format to connect node is provided as `format`.
+    /// Return true if custom implementation is provided, otherwise default implementation will be used.
+    public var onEngineWillConnectInput: OnEngineWillConnectInput? {
+        didSet {
+            RTC.audioDeviceModule.setOnEngineWillConnectInputCallback { [weak self] engine, src, dst, format in
+                guard let self else { return false }
+                return self.onEngineWillConnectInput?(self, engine, src, dst, format) ?? false
+            }
+        }
+    }
+
+    /// Provide custom implementation for internal AVAudioEngine's output configuration.
+    /// Buffers flow from `src` to `dst`. Preferred format to connect node is provided as `format`.
+    /// Return true if custom implementation is provided, otherwise default implementation will be used.
+    public var onEngineWillConnectOutput: OnEngineWillConnectOutput? {
+        didSet {
+            RTC.audioDeviceModule.setOnEngineWillConnectOutputCallback { [weak self] engine, src, dst, format in
+                guard let self else { return false }
+                return self.onEngineWillConnectOutput?(self, engine, src, dst, format) ?? false
+            }
+        }
+    }
+
+    /// Detect voice activity even if the mic is muted.
+    /// Internal audio engine must be initialized by calling ``prepareRecording()`` or
+    /// connecting to a room and subscribing to a remote audio track or publishing a local audio track.
+    public var onMutedSpeechActivityEvent: OnSpeechActivityEvent? {
+        didSet {
+            RTC.audioDeviceModule.setSpeechActivityCallback { [weak self] event in
+                guard let self else { return }
+                self.onMutedSpeechActivityEvent?(self, event.toLKType())
+            }
+        }
+    }
+
+    public var isManualRenderingMode: Bool {
+        get { RTC.audioDeviceModule.isManualRenderingMode }
+        set {
+            let result = RTC.audioDeviceModule.setManualRenderingMode(newValue)
+            if !result {
+                log("Failed to set manual rendering mode", .error)
+            }
+        }
+    }
+
+    /// Enables advanced ducking which ducks other audio based on the presence of voice activity from local and remote chat participants.
+    /// Default: true.
+    public var isAdvancedDuckingEnabled: Bool {
+        get { RTC.audioDeviceModule.isAdvancedDuckingEnabled }
+        set { RTC.audioDeviceModule.isAdvancedDuckingEnabled = newValue }
+    }
+
+    /// The ducking(audio reducing) level of other audio.
+    @available(iOS 17, macOS 14.0, visionOS 1.0, *)
+    public var duckingLevel: AudioDuckingLevel {
+        get { AudioDuckingLevel(rawValue: RTC.audioDeviceModule.duckingLevel) ?? .default }
+        set { RTC.audioDeviceModule.duckingLevel = newValue.rawValue }
+    }
+
+    // MARK: - Recording
+
+    /// Initialize recording (mic input) and pre-warm voice processing etc.
+    /// Mic permission is required and dialog will appear if not already granted.
+    public func prepareRecording() {
+        RTC.audioDeviceModule.initRecording()
+    }
+
+    /// Starts mic input to the SDK even without any ``Room`` or a connection.
+    /// Audio buffers will flow into ``LocalAudioTrack/add(audioRenderer:)`` and ``capturePostProcessingDelegate``.
+    public func startLocalRecording() {
+        RTC.audioDeviceModule.initAndStartRecording()
+    }
+
+    // MARK: Internal for testing
+
+    func initPlayout() {
+        RTC.audioDeviceModule.initPlayout()
+    }
+
+    func startPlayout() {
+        RTC.audioDeviceModule.startPlayout()
+    }
+
+    func stopPlayout() {
+        RTC.audioDeviceModule.stopPlayout()
+    }
+
+    func initRecording() {
+        RTC.audioDeviceModule.initRecording()
+    }
+
+    func startRecording() {
+        RTC.audioDeviceModule.startRecording()
+    }
+
+    func stopRecording() {
+        RTC.audioDeviceModule.stopRecording()
+    }
+
     // MARK: - Internal
 
     enum `Type` {
@@ -231,42 +311,39 @@ public class AudioManager: Loggable {
 
     let state = StateSync(State())
 
-    // MARK: - Private
-
-    private let _configureRunner = SerialRunnerActor<Void>()
+    init() {
+        RTC.audioDeviceModule.setOnEngineWillStartCallback { [weak self] _, isPlayoutEnabled, isRecordingEnabled in
+            guard let self else { return }
+            self.log("OnEngineWillStart isPlayoutEnabled: \(isPlayoutEnabled), isRecordingEnabled: \(isRecordingEnabled)")
 
-    #if os(iOS) || os(visionOS) || os(tvOS)
-    private func _asyncConfigure(newState: State, oldState: State) async throws {
-        try await _configureRunner.run {
-            self.log("\(oldState) -> \(newState)")
-            let configureFunc = newState.customConfigureFunc ?? self.defaultConfigureAudioSessionFunc
-            configureFunc(newState, oldState)
+            #if os(iOS) || os(visionOS) || os(tvOS)
+            self.log("Configuring audio session...")
+            // Backward compatibility
+            let configureFunc = self.state.customConfigureFunc ?? self.defaultConfigureAudioSessionFunc
+            let simulatedState = AudioManager.State(localTracksCount: isRecordingEnabled ? 1 : 0, remoteTracksCount: isPlayoutEnabled ? 1 : 0)
+            configureFunc(simulatedState, AudioManager.State())
+            #endif
         }
     }
-    #endif
+
+    // MARK: - Private
 
     func trackDidStart(_ type: Type) async throws {
-        let (newState, oldState) = state.mutate { state in
+        state.mutate { state in
             let oldState = state
             if type == .local { state.localTracksCount += 1 }
             if type == .remote { state.remoteTracksCount += 1 }
             return (state, oldState)
         }
-        #if os(iOS) || os(visionOS) || os(tvOS)
-        try await _asyncConfigure(newState: newState, oldState: oldState)
-        #endif
     }
 
     func trackDidStop(_ type: Type) async throws {
-        let (newState, oldState) = state.mutate { state in
+        state.mutate { state in
             let oldState = state
             if type == .local { state.localTracksCount = max(state.localTracksCount - 1, 0) }
             if type == .remote { state.remoteTracksCount = max(state.remoteTracksCount - 1, 0) }
             return (state, oldState)
         }
-        #if os(iOS) || os(visionOS) || os(tvOS)
-        try await _asyncConfigure(newState: newState, oldState: oldState)
-        #endif
     }
 
     #if os(iOS) || os(visionOS) || os(tvOS)

diff --git a/Sources/LiveKit/Types/AudioBuffer.swift b/Sources/LiveKit/Types/AudioBuffer.swift
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2025 LiveKit
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if swift(>=5.9)
+internal import LiveKitWebRTC
+#else
+@_implementationOnly import LiveKitWebRTC
+#endif
+
+import Foundation
+
+// Wrapper for LKRTCAudioBuffer
+@objc
+public class LKAudioBuffer: NSObject {
+    private let _audioBuffer: LKRTCAudioBuffer
+
+    @objc
+    public var channels: Int { _audioBuffer.channels }
+
+    @objc
+    public var frames: Int { _audioBuffer.frames }
+
+    @objc
+    public var framesPerBand: Int { _audioBuffer.framesPerBand }
+
+    @objc
+    public var bands: Int { _audioBuffer.bands }
+
+    @objc
+    @available(*, deprecated, renamed: "rawBuffer(forChannel:)")
+    public func rawBuffer(for channel: Int) -> UnsafeMutablePointer<Float> {
+        _audioBuffer.rawBuffer(forChannel: channel)
+    }
+
+    @objc
+    public func rawBuffer(forChannel channel: Int) -> UnsafeMutablePointer<Float> {
+        _audioBuffer.rawBuffer(forChannel: channel)
+    }
+
+    init(audioBuffer: LKRTCAudioBuffer) {
+        _audioBuffer = audioBuffer
+    }
+}
diff --git a/Sources/LiveKit/Types/AudioDuckingLevel.swift b/Sources/LiveKit/Types/AudioDuckingLevel.swift
@@ -0,0 +1,22 @@
+/*
+ * Copyright 2025 LiveKit
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public enum AudioDuckingLevel: Int {
+    case `default` = 0
+    case min = 10
+    case mid = 20
+    case max = 30
+}
diff --git a/Sources/LiveKit/Types/AudioSessionConfiguration.swift b/Sources/LiveKit/Types/AudioSessionConfiguration.swift
@@ -37,11 +37,11 @@ public extension AudioSessionConfiguration {
                                                     mode: .spokenAudio)
 
     static let playAndRecordSpeaker = AudioSessionConfiguration(category: .playAndRecord,
-                                                                categoryOptions: [.allowBluetooth, .allowBluetoothA2DP, .allowAirPlay],
+                                                                categoryOptions: [.mixWithOthers, .allowBluetooth, .allowBluetoothA2DP, .allowAirPlay],
                                                                 mode: .videoChat)
 
     static let playAndRecordReceiver = AudioSessionConfiguration(category: .playAndRecord,
-                                                                 categoryOptions: [.allowBluetooth, .allowBluetoothA2DP, .allowAirPlay],
+                                                                 categoryOptions: [.mixWithOthers, .allowBluetooth, .allowBluetoothA2DP, .allowAirPlay],
                                                                  mode: .voiceChat)
 }
 

diff --git a/Sources/LiveKit/Types/SpeechActivityEvent.swift b/Sources/LiveKit/Types/SpeechActivityEvent.swift
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2025 LiveKit
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if swift(>=5.9)
+internal import LiveKitWebRTC
+#else
+@_implementationOnly import LiveKitWebRTC
+#endif
+
+public enum SpeechActivityEvent {
+    case started
+    case ended
+}
+
+extension RTCSpeechActivityEvent {
+    func toLKType() -> SpeechActivityEvent {
+        switch self {
+        case .started: return .started
+        case .ended: return .ended
+        @unknown default: return .ended
+        }
+    }
+}