From 3df68d4f42a18abfd8365b83ca8fe0d11b9ea43a Mon Sep 17 00:00:00 2001
From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com>
Date: Fri, 11 Oct 2024 20:41:44 +0900
Subject: [PATCH 01/15] Revert "Stop recording on mute (turn off mic indicator)
 (#55)"

This reverts commit c0209ef4d07b51285457d11f038486a79f4c2d5c.
---
 audio/audio_send_stream.cc          |  5 --
 audio/audio_send_stream.h           |  1 -
 audio/audio_state.cc                | 72 ++++++-----------------------
 audio/audio_state.h                 |  5 --
 audio/channel_send.cc               |  4 +-
 audio/channel_send.h                |  2 -
 call/audio_send_stream.h            |  1 -
 call/audio_state.h                  |  3 --
 media/engine/webrtc_voice_engine.cc |  3 --
 media/engine/webrtc_voice_engine.h  |  3 +-
 10 files changed, 17 insertions(+), 82 deletions(-)

diff --git a/audio/audio_send_stream.cc b/audio/audio_send_stream.cc
index 0804bc8286..59b0ea5b5e 100644
--- a/audio/audio_send_stream.cc
+++ b/audio/audio_send_stream.cc
@@ -415,11 +415,6 @@ void AudioSendStream::SetMuted(bool muted) {
   channel_send_->SetInputMute(muted);
 }
 
-bool AudioSendStream::GetMuted() {
-  RTC_DCHECK_RUN_ON(&worker_thread_checker_);
-  return channel_send_->InputMute();
-}
-
 webrtc::AudioSendStream::Stats AudioSendStream::GetStats() const {
   return GetStats(true);
 }
diff --git a/audio/audio_send_stream.h b/audio/audio_send_stream.h
index b7c265fa30..a37c8fd452 100644
--- a/audio/audio_send_stream.h
+++ b/audio/audio_send_stream.h
@@ -94,7 +94,6 @@ class AudioSendStream final : public webrtc::AudioSendStream,
                           int payload_frequency,
                           int event,
                           int duration_ms) override;
-  bool GetMuted() override;
   void SetMuted(bool muted) override;
   webrtc::AudioSendStream::Stats GetStats() const override;
   webrtc::AudioSendStream::Stats GetStats(
diff --git a/audio/audio_state.cc b/audio/audio_state.cc
index c715bc4cb9..9ff74d6faa 100644
--- a/audio/audio_state.cc
+++ b/audio/audio_state.cc
@@ -98,26 +98,22 @@ void AudioState::AddSendingStream(webrtc::AudioSendStream* stream,
   UpdateAudioTransportWithSendingStreams();
 
   // Make sure recording is initialized; start recording if enabled.
-  if (ShouldRecord()) {
-    auto* adm = config_.audio_device_module.get();
-    if (!adm->Recording()) {
-      if (adm->InitRecording() == 0) {
-        if (recording_enabled_) {
-
-          // TODO: Verify if the following windows only logic is still required.
+  auto* adm = config_.audio_device_module.get();
+  if (!adm->Recording()) {
+    if (adm->InitRecording() == 0) {
+      if (recording_enabled_) {
 #if defined(WEBRTC_WIN)
-          if (adm->BuiltInAECIsAvailable() && !adm->Playing()) {
-            if (!adm->PlayoutIsInitialized()) {
-              adm->InitPlayout();
-            }
-            adm->StartPlayout();
+        if (adm->BuiltInAECIsAvailable() && !adm->Playing()) {
+          if (!adm->PlayoutIsInitialized()) {
+            adm->InitPlayout();
           }
-#endif
-          adm->StartRecording();
+          adm->StartPlayout();
         }
-      } else {
-        RTC_DLOG_F(LS_ERROR) << "Failed to initialize recording.";
+#endif
+        adm->StartRecording();
       }
+    } else {
+      RTC_DLOG_F(LS_ERROR) << "Failed to initialize recording.";
     }
   }
 }
@@ -127,10 +123,7 @@ void AudioState::RemoveSendingStream(webrtc::AudioSendStream* stream) {
   auto count = sending_streams_.erase(stream);
   RTC_DCHECK_EQ(1, count);
   UpdateAudioTransportWithSendingStreams();
-
-  bool should_record = ShouldRecord();
-  RTC_LOG(LS_INFO) << "RemoveSendingStream: should_record = " << should_record;
-  if (!should_record) {
+  if (sending_streams_.empty()) {
     config_.audio_device_module->StopRecording();
   }
 }
@@ -158,7 +151,7 @@ void AudioState::SetRecording(bool enabled) {
   if (recording_enabled_ != enabled) {
     recording_enabled_ = enabled;
     if (enabled) {
-      if (ShouldRecord()) {
+      if (!sending_streams_.empty()) {
         config_.audio_device_module->StartRecording();
       }
     } else {
@@ -218,43 +211,6 @@ void AudioState::UpdateNullAudioPollerState() {
     null_audio_poller_.Stop();
   }
 }
-
-void AudioState::OnMuteStreamChanged() {
-
-  auto* adm = config_.audio_device_module.get();
-  bool should_record = ShouldRecord();
-
-  RTC_LOG(LS_INFO) << "OnMuteStreamChanged: should_record = " << should_record;
-  if (should_record && !adm->Recording()) {
-    if (adm->InitRecording() == 0) {
-      adm->StartRecording();
-    }
-  } else if (!should_record && adm->Recording()) {
-    adm->StopRecording();
-  }
-}
-
-bool AudioState::ShouldRecord() {
-  RTC_LOG(LS_INFO) << "ShouldRecord";
-  // no streams to send
-  if (sending_streams_.empty()) {
-    RTC_LOG(LS_INFO) << "ShouldRecord: send stream = empty";
-    return false;
-  }
-
-  int stream_count = sending_streams_.size();
-
-  int muted_count = 0;
-  for (const auto& kv : sending_streams_) {
-    if (kv.first->GetMuted()) {
-      muted_count++;
-    }
-  }
-
-  RTC_LOG(LS_INFO) << "ShouldRecord: " << muted_count << " muted, " << stream_count << " sending";
-  return muted_count != stream_count;
-}
-
 }  // namespace internal
 
 rtc::scoped_refptr<AudioState> AudioState::Create(
diff --git a/audio/audio_state.h b/audio/audio_state.h
index f21cca771e..88aaaa3697 100644
--- a/audio/audio_state.h
+++ b/audio/audio_state.h
@@ -47,8 +47,6 @@ class AudioState : public webrtc::AudioState {
 
   void SetStereoChannelSwapping(bool enable) override;
 
-  void OnMuteStreamChanged() override;
-
   AudioDeviceModule* audio_device_module() {
     RTC_DCHECK(config_.audio_device_module);
     return config_.audio_device_module.get();
@@ -66,9 +64,6 @@ class AudioState : public webrtc::AudioState {
   void UpdateAudioTransportWithSendingStreams();
   void UpdateNullAudioPollerState() RTC_RUN_ON(&thread_checker_);
 
-  // Returns true when at least 1 stream exists and all streams are not muted.
-  bool ShouldRecord();
-
   SequenceChecker thread_checker_;
   SequenceChecker process_thread_checker_{SequenceChecker::kDetached};
   const webrtc::AudioState::Config config_;
diff --git a/audio/channel_send.cc b/audio/channel_send.cc
index bc474ee33d..4a2700177b 100644
--- a/audio/channel_send.cc
+++ b/audio/channel_send.cc
@@ -100,8 +100,6 @@ class ChannelSend : public ChannelSendInterface,
   // Muting, Volume and Level.
   void SetInputMute(bool enable) override;
 
-  bool InputMute() const override;
-
   // Stats.
   ANAStats GetANAStatistics() const override;
 
@@ -165,6 +163,8 @@ class ChannelSend : public ChannelSendInterface,
                    size_t payloadSize,
                    int64_t absolute_capture_timestamp_ms) override;
 
+  bool InputMute() const;
+
   int32_t SendRtpAudio(AudioFrameType frameType,
                        uint8_t payloadType,
                        uint32_t rtp_timestamp_without_offset,
diff --git a/audio/channel_send.h b/audio/channel_send.h
index c16be987b4..b6a6a37bf5 100644
--- a/audio/channel_send.h
+++ b/audio/channel_send.h
@@ -83,8 +83,6 @@ class ChannelSendInterface {
   virtual bool SendTelephoneEventOutband(int event, int duration_ms) = 0;
   virtual void OnBitrateAllocation(BitrateAllocationUpdate update) = 0;
   virtual int GetTargetBitrate() const = 0;
-
-  virtual bool InputMute() const = 0;
   virtual void SetInputMute(bool muted) = 0;
 
   virtual void ProcessAndEncodeAudio(
diff --git a/call/audio_send_stream.h b/call/audio_send_stream.h
index 7e73ab2ce6..9c2fad652f 100644
--- a/call/audio_send_stream.h
+++ b/call/audio_send_stream.h
@@ -190,7 +190,6 @@ class AudioSendStream : public AudioSender {
                                   int event,
                                   int duration_ms) = 0;
 
-  virtual bool GetMuted() = 0;
   virtual void SetMuted(bool muted) = 0;
 
   virtual Stats GetStats() const = 0;
diff --git a/call/audio_state.h b/call/audio_state.h
index 85f04758dd..79fb5cf981 100644
--- a/call/audio_state.h
+++ b/call/audio_state.h
@@ -59,9 +59,6 @@ class AudioState : public rtc::RefCountInterface {
 
   virtual void SetStereoChannelSwapping(bool enable) = 0;
 
-  // Notify the AudioState that a stream updated it's mute state.
-  virtual void OnMuteStreamChanged() = 0;
-
   static rtc::scoped_refptr<AudioState> Create(
       const AudioState::Config& config);
 
diff --git a/media/engine/webrtc_voice_engine.cc b/media/engine/webrtc_voice_engine.cc
index 1c6c31020d..efcdcb0bff 100644
--- a/media/engine/webrtc_voice_engine.cc
+++ b/media/engine/webrtc_voice_engine.cc
@@ -1698,9 +1698,6 @@ bool WebRtcVoiceSendChannel::MuteStream(uint32_t ssrc, bool muted) {
     ap->set_output_will_be_muted(all_muted);
   }
 
-  // Notfy the AudioState that the mute state has updated.
-  engine_->audio_state()->OnMuteStreamChanged();
-
   return true;
 }
 
diff --git a/media/engine/webrtc_voice_engine.h b/media/engine/webrtc_voice_engine.h
index 9d18ef6302..b28b9652bb 100644
--- a/media/engine/webrtc_voice_engine.h
+++ b/media/engine/webrtc_voice_engine.h
@@ -132,8 +132,6 @@ class WebRtcVoiceEngine final : public VoiceEngineInterface {
 
   absl::optional<webrtc::AudioDeviceModule::Stats> GetAudioDeviceStats()
       override;
-  // Moved to public so WebRtcVoiceMediaChannel can access it.
-  webrtc::AudioState* audio_state();
 
  private:
   // Every option that is "set" will be applied. Every option not "set" will be
@@ -147,6 +145,7 @@ class WebRtcVoiceEngine final : public VoiceEngineInterface {
 
   webrtc::AudioDeviceModule* adm();
   webrtc::AudioProcessing* apm() const;
+  webrtc::AudioState* audio_state();
 
   std::vector<AudioCodec> CollectCodecs(
       const std::vector<webrtc::AudioCodecSpec>& specs) const;

From d31187b293b7cc13163d0917194a283f9f1e0512 Mon Sep 17 00:00:00 2001
From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com>
Date: Thu, 5 Dec 2024 15:26:38 +0700
Subject: [PATCH 02/15] Connect voice engine mute to adm

---
 media/engine/webrtc_voice_engine.cc | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/media/engine/webrtc_voice_engine.cc b/media/engine/webrtc_voice_engine.cc
index efcdcb0bff..1d87f4e06a 100644
--- a/media/engine/webrtc_voice_engine.cc
+++ b/media/engine/webrtc_voice_engine.cc
@@ -1698,6 +1698,11 @@ bool WebRtcVoiceSendChannel::MuteStream(uint32_t ssrc, bool muted) {
     ap->set_output_will_be_muted(all_muted);
   }
 
+  webrtc::AudioDeviceModule* adm = engine()->adm();
+  if (adm) {
+    adm->SetMicrophoneMute(all_muted);
+  }
+
   return true;
 }
 

From a7282bdd4e6d3b4b54824701ebd52c6c28d79a75 Mon Sep 17 00:00:00 2001
From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com>
Date: Thu, 5 Dec 2024 15:27:34 +0700
Subject: [PATCH 03/15] AudioEngine

---
 modules/audio_device/audio_device_impl.cc     |  14 +-
 modules/audio_device/audio_engine_device.h    | 217 ++++
 modules/audio_device/audio_engine_device.mm   | 984 ++++++++++++++++++
 sdk/BUILD.gn                                  |   4 +-
 .../audio/RTCAudioSession+Private.h           |   3 +
 sdk/objc/components/audio/RTCAudioSession.h   |   4 +
 sdk/objc/components/audio/RTCAudioSession.mm  |  12 +
 .../src/audio/audio_device_module_ios.h       |   4 +-
 .../src/audio/audio_device_module_ios.mm      |   4 +-
 9 files changed, 1233 insertions(+), 13 deletions(-)
 create mode 100644 modules/audio_device/audio_engine_device.h
 create mode 100644 modules/audio_device/audio_engine_device.mm

diff --git a/modules/audio_device/audio_device_impl.cc b/modules/audio_device/audio_device_impl.cc
index 0cd86d7cdb..b5ec373ea1 100644
--- a/modules/audio_device/audio_device_impl.cc
+++ b/modules/audio_device/audio_device_impl.cc
@@ -34,11 +34,12 @@
 #if defined(WEBRTC_ENABLE_LINUX_PULSE)
 #include "modules/audio_device/linux/audio_device_pulse_linux.h"
 #endif
-#elif defined(WEBRTC_IOS)
-#include "sdk/objc/native/src/audio/audio_device_ios.h"
-#elif defined(WEBRTC_MAC)
-#include "modules/audio_device/mac/audio_device_mac.h"
 #endif
+
+#if defined(WEBRTC_IOS) || defined(WEBRTC_MAC)
+#include "modules/audio_device/audio_engine_device.h"
+#endif
+
 #if defined(WEBRTC_DUMMY_FILE_DEVICES)
 #include "modules/audio_device/dummy/file_audio_device.h"
 #include "modules/audio_device/dummy/file_audio_device_factory.h"
@@ -246,8 +247,7 @@ int32_t AudioDeviceModuleImpl::CreatePlatformSpecificObjects() {
 // iOS ADM implementation.
 #if defined(WEBRTC_IOS)
   if (audio_layer == kPlatformDefaultAudio) {
-    audio_device_.reset(
-        new ios_adm::AudioDeviceIOS(/*bypass_voice_processing=*/bypass_voice_processing_));
+    audio_device_.reset(new AudioEngineDevice(/*bypass_voice_processing=*/bypass_voice_processing_));
     RTC_LOG(LS_INFO) << "iPhone Audio APIs will be utilized.";
   }
 // END #if defined(WEBRTC_IOS)
@@ -255,7 +255,7 @@ int32_t AudioDeviceModuleImpl::CreatePlatformSpecificObjects() {
 // Mac OS X ADM implementation.
 #elif defined(WEBRTC_MAC)
   if (audio_layer == kPlatformDefaultAudio) {
-    audio_device_.reset(new AudioDeviceMac());
+    audio_device_.reset(new AudioEngineDevice(/*bypass_voice_processing=*/false));
     RTC_LOG(LS_INFO) << "Mac OS X Audio APIs will be utilized.";
   }
 #endif  // WEBRTC_MAC
diff --git a/modules/audio_device/audio_engine_device.h b/modules/audio_device/audio_engine_device.h
new file mode 100644
index 0000000000..27122d5d5a
--- /dev/null
+++ b/modules/audio_device/audio_engine_device.h
@@ -0,0 +1,217 @@
+/*
+ * Copyright 2024 LiveKit
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SDK_OBJC_NATIVE_SRC_AUDIO_AUDIO_DEVICE_AUDIOENGINE_H_
+#define SDK_OBJC_NATIVE_SRC_AUDIO_AUDIO_DEVICE_AUDIOENGINE_H_
+
+#include <atomic>
+#include <memory>
+
+#include "api/scoped_refptr.h"
+#include "api/sequence_checker.h"
+#include "api/task_queue/pending_task_safety_flag.h"
+#include "modules/audio_device/audio_device_generic.h"
+#include "rtc_base/buffer.h"
+#include "rtc_base/thread.h"
+#include "rtc_base/thread_annotations.h"
+#include "sdk/objc/base/RTCMacros.h"
+#include "sdk/objc/native/src/audio/audio_session_observer.h"
+
+RTC_FWD_DECL_OBJC_CLASS(RTC_OBJC_TYPE(RTCNativeAudioSessionDelegateAdapter));
+RTC_FWD_DECL_OBJC_CLASS(AVAudioEngine);
+RTC_FWD_DECL_OBJC_CLASS(AVAudioSourceNode);
+RTC_FWD_DECL_OBJC_CLASS(AVAudioSinkNode);
+RTC_FWD_DECL_OBJC_CLASS(AVAudioFormat);
+RTC_FWD_DECL_OBJC_CLASS(AVAudioMixerNode);
+RTC_FWD_DECL_OBJC_CLASS(AVAudioUnitEQ);
+
+namespace webrtc {
+
+class FineAudioBuffer;
+
+class AudioEngineDevice : public AudioDeviceGeneric,
+                          public AudioSessionObserver {
+ public:
+  explicit AudioEngineDevice(bool bypass_voice_processing);
+  ~AudioEngineDevice() override;
+
+  void AttachAudioBuffer(AudioDeviceBuffer* audioBuffer) override;
+
+  InitStatus Init() override;
+  int32_t Terminate() override;
+  bool Initialized() const override;
+
+  int32_t InitPlayout() override;
+  bool PlayoutIsInitialized() const override;
+
+  int32_t InitRecording() override;
+  bool RecordingIsInitialized() const override;
+
+  int32_t StartPlayout() override;
+  int32_t StopPlayout() override;
+  bool Playing() const override;
+
+  int32_t StartRecording() override;
+  int32_t StopRecording() override;
+  bool Recording() const override;
+
+  int32_t PlayoutDelay(uint16_t& delayMS) const override;
+  int32_t GetPlayoutUnderrunCount() const override { return -1; }
+
+  // int GetPlayoutAudioParameters(AudioParameters* params) const override;
+  // int GetRecordAudioParameters(AudioParameters* params) const override;
+
+  int32_t ActiveAudioLayer(
+      AudioDeviceModule::AudioLayer& audioLayer) const override;
+  int32_t PlayoutIsAvailable(bool& available) override;
+  int32_t RecordingIsAvailable(bool& available) override;
+  int16_t PlayoutDevices() override;
+  int16_t RecordingDevices() override;
+  int32_t PlayoutDeviceName(uint16_t index, char name[kAdmMaxDeviceNameSize],
+                            char guid[kAdmMaxGuidSize]) override;
+  int32_t RecordingDeviceName(uint16_t index, char name[kAdmMaxDeviceNameSize],
+                              char guid[kAdmMaxGuidSize]) override;
+  int32_t SetPlayoutDevice(uint16_t index) override;
+  int32_t SetPlayoutDevice(
+      AudioDeviceModule::WindowsDeviceType device) override;
+  int32_t SetRecordingDevice(uint16_t index) override;
+  int32_t SetRecordingDevice(
+      AudioDeviceModule::WindowsDeviceType device) override;
+  int32_t InitSpeaker() override;
+  bool SpeakerIsInitialized() const override;
+  int32_t InitMicrophone() override;
+  bool MicrophoneIsInitialized() const override;
+  int32_t SpeakerVolumeIsAvailable(bool& available) override;
+  int32_t SetSpeakerVolume(uint32_t volume) override;
+  int32_t SpeakerVolume(uint32_t& volume) const override;
+  int32_t MaxSpeakerVolume(uint32_t& maxVolume) const override;
+  int32_t MinSpeakerVolume(uint32_t& minVolume) const override;
+  int32_t MicrophoneVolumeIsAvailable(bool& available) override;
+  int32_t SetMicrophoneVolume(uint32_t volume) override;
+  int32_t MicrophoneVolume(uint32_t& volume) const override;
+  int32_t MaxMicrophoneVolume(uint32_t& maxVolume) const override;
+  int32_t MinMicrophoneVolume(uint32_t& minVolume) const override;
+  int32_t MicrophoneMuteIsAvailable(bool& available) override;
+  int32_t SetMicrophoneMute(bool enable) override;
+  int32_t MicrophoneMute(bool& enabled) const override;
+  int32_t SpeakerMuteIsAvailable(bool& available) override;
+  int32_t SetSpeakerMute(bool enable) override;
+  int32_t SpeakerMute(bool& enabled) const override;
+  int32_t StereoPlayoutIsAvailable(bool& available) override;
+  int32_t SetStereoPlayout(bool enable) override;
+  int32_t StereoPlayout(bool& enabled) const override;
+  int32_t StereoRecordingIsAvailable(bool& available) override;
+  int32_t SetStereoRecording(bool enable) override;
+  int32_t StereoRecording(bool& enabled) const override;
+
+  // AudioSessionObserver methods. May be called from any thread.
+  void OnInterruptionBegin() override;
+  void OnInterruptionEnd() override;
+  void OnValidRouteChange() override;
+  void OnCanPlayOrRecordChange(bool can_play_or_record) override;
+  void OnChangedOutputVolume() override;
+
+  bool IsInterrupted();
+
+  // AudioEngine observer methods. May be called from any thread.
+  void OnEngineConfigurationChange();
+
+ private:
+  struct EngineState {
+    bool input_enabled = false;
+    bool input_running = false;
+    bool output_enabled = false;
+    bool output_running = false;
+
+    bool input_muted = false;
+    bool is_interrupted = false;
+
+    bool operator==(const EngineState& rhs) const;
+    bool operator!=(const EngineState& rhs) const;
+
+    bool IsAnyEnabled() const { return input_enabled || output_enabled; }
+    bool IsAnyRunning() const { return input_running || output_running; }
+
+    bool IsAllEnabled() const { return input_enabled && output_enabled; }
+    bool IsAllRunning() const { return input_running && output_running; }
+  };
+
+  EngineState engine_state_ RTC_GUARDED_BY(thread_);
+
+  bool IsMicrophonePermissionGranted();
+  void SetEngineState(std::function<EngineState(EngineState)> state_transform);
+  void UpdateEngineState(EngineState old_state, EngineState new_state);
+
+  // Configures the audio session for WebRTC.
+  bool ConfigureAudioSession();
+
+  // Like above, but requires caller to already hold session lock.
+  bool ConfigureAudioSessionLocked();
+
+  // Unconfigures the audio session.
+  void UnconfigureAudioSession();
+
+  // Determines whether voice processing should be enabled or disabled.
+  const bool bypass_voice_processing_;
+
+  // Native I/O audio thread checker.
+  SequenceChecker io_thread_checker_;
+
+  // Thread that this object is created on.
+  rtc::Thread* thread_;
+
+  AudioDeviceBuffer* audio_device_buffer_;
+
+  AudioParameters playout_parameters_;
+  AudioParameters record_parameters_;
+
+  std::unique_ptr<FineAudioBuffer> fine_audio_buffer_;
+
+  // Set to true after successful call to Init(), false otherwise.
+  bool initialized_ RTC_GUARDED_BY(thread_);
+
+  // Audio interruption observer instance.
+  RTC_OBJC_TYPE(RTCNativeAudioSessionDelegateAdapter) * audio_session_observer_
+      RTC_GUARDED_BY(thread_);
+
+  // Set to true if we've activated the audio session.
+  bool has_configured_session_ RTC_GUARDED_BY(thread_);
+
+  // Avoids running pending task after `this` is Terminated.
+  rtc::scoped_refptr<PendingTaskSafetyFlag> safety_ =
+      PendingTaskSafetyFlag::Create();
+
+  // Ratio between mach tick units and nanosecond. Used to change mach tick
+  // units to nanoseconds.
+  double machTickUnitsToNanoseconds_;
+
+  // AVAudioEngine objects
+  AVAudioEngine* audio_engine_;
+  AVAudioFormat* rtc_internal_format_;  // Int16
+
+  // Output related
+  AVAudioSourceNode* source_node_;
+
+  // Input related nodes
+  AVAudioSinkNode* sink_node_;
+  AVAudioUnitEQ* input_eq_node_;
+  AVAudioMixerNode* input_mixer_node_;
+
+  void* configuration_observer_;
+};
+}  // namespace webrtc
+
+#endif  // SDK_OBJC_NATIVE_SRC_AUDIO_AUDIO_DEVICE_AUDIOENGINE_H_
diff --git a/modules/audio_device/audio_engine_device.mm b/modules/audio_device/audio_engine_device.mm
new file mode 100644
index 0000000000..cf4806cfe9
--- /dev/null
+++ b/modules/audio_device/audio_engine_device.mm
@@ -0,0 +1,984 @@
+/*
+ * Copyright 2024 LiveKit
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#import <AVFoundation/AVFoundation.h>
+#import <Foundation/Foundation.h>
+
+#include "audio_engine_device.h"
+
+#include <AudioUnit/AudioUnit.h>
+#include <mach/mach_time.h>
+#include <cmath>
+
+#include "api/array_view.h"
+#include "api/task_queue/pending_task_safety_flag.h"
+#include "modules/audio_device/fine_audio_buffer.h"
+#include "rtc_base/checks.h"
+#include "rtc_base/logging.h"
+#include "rtc_base/thread.h"
+#include "rtc_base/thread_annotations.h"
+#include "rtc_base/time_utils.h"
+#include "sdk/objc/native/src/audio/helpers.h"
+#include "system_wrappers/include/field_trial.h"
+#include "system_wrappers/include/metrics.h"
+
+#import "base/RTCLogging.h"
+
+#if defined(WEBRTC_IOS)
+#import "components/audio/RTCAudioSession+Private.h"
+#import "components/audio/RTCAudioSession.h"
+#import "components/audio/RTCAudioSessionConfiguration.h"
+#import "components/audio/RTCNativeAudioSessionDelegateAdapter.h"
+#endif
+
+namespace webrtc {
+
+#define LOGI() RTC_LOG(LS_INFO) << "AudioEngineDevice::"
+#define LOGE() RTC_LOG(LS_ERROR) << "AudioEngineDevice::"
+#define LOGW() RTC_LOG(LS_WARNING) << "AudioEngineDevice::"
+
+const UInt16 kFixedPlayoutDelayEstimate = 30;
+const UInt16 kFixedRecordDelayEstimate = 30;
+
+const uint32_t N_REC_SAMPLES_PER_SEC = 48000;
+const uint32_t N_PLAY_SAMPLES_PER_SEC = 48000;
+const uint32_t N_REC_CHANNELS = 1;   // default is mono recording
+const uint32_t N_PLAY_CHANNELS = 1;  // default is stereo playout
+
+using ios::CheckAndLogError;
+
+AudioEngineDevice::AudioEngineDevice(bool bypass_voice_processing)
+    : bypass_voice_processing_(bypass_voice_processing),
+      audio_device_buffer_(nullptr),
+      initialized_(false),
+      has_configured_session_(false) {
+  LOGI() << "bypass_voice_processing " << bypass_voice_processing_;
+
+  io_thread_checker_.Detach();
+  thread_ = rtc::Thread::Current();
+
+#if defined(WEBRTC_IOS)
+  audio_session_observer_ =
+      [[RTC_OBJC_TYPE(RTCNativeAudioSessionDelegateAdapter) alloc] initWithObserver:this];
+  // Subscribe to audio session events.
+  RTC_OBJC_TYPE(RTCAudioSession)* session = [RTC_OBJC_TYPE(RTCAudioSession) sharedInstance];
+  [session pushDelegate:audio_session_observer_];
+#endif
+
+  // Add observer for configuration changes
+  NSNotificationCenter* center = [NSNotificationCenter defaultCenter];
+  configuration_observer_ = (__bridge_retained void*)[center
+      addObserverForName:AVAudioEngineConfigurationChangeNotification
+                  object:audio_engine_
+                   queue:nil
+              usingBlock:^(NSNotification* notification) {
+                OnEngineConfigurationChange();
+              }];
+
+  mach_timebase_info_data_t tinfo;
+  mach_timebase_info(&tinfo);
+  machTickUnitsToNanoseconds_ = (double)tinfo.numer / tinfo.denom;
+}
+
+AudioEngineDevice::~AudioEngineDevice() {
+  RTC_DCHECK_RUN_ON(thread_);
+
+  if (configuration_observer_) {
+    [[NSNotificationCenter defaultCenter]
+        removeObserver:(__bridge_transfer id)configuration_observer_];
+    configuration_observer_ = nil;
+  }
+
+  safety_->SetNotAlive();
+
+  Terminate();
+  audio_session_observer_ = nil;
+}
+
+void AudioEngineDevice::AttachAudioBuffer(AudioDeviceBuffer* audioBuffer) {
+  LOGI() << "AttachAudioBuffer";
+  RTC_DCHECK(audioBuffer);
+  RTC_DCHECK_RUN_ON(thread_);
+  audio_device_buffer_ = audioBuffer;
+
+  // Fixes values for mac.
+  audio_device_buffer_->SetRecordingSampleRate(N_REC_SAMPLES_PER_SEC);
+  audio_device_buffer_->SetPlayoutSampleRate(N_PLAY_SAMPLES_PER_SEC);
+  audio_device_buffer_->SetRecordingChannels(N_REC_CHANNELS);
+  audio_device_buffer_->SetPlayoutChannels(N_PLAY_CHANNELS);
+
+  fine_audio_buffer_.reset(new FineAudioBuffer(audio_device_buffer_));
+}
+
+// MARK: - Main life cycle
+
+bool AudioEngineDevice::Initialized() const {
+  LOGI() << "Initialized";
+  RTC_DCHECK_RUN_ON(thread_);
+
+  return initialized_;
+}
+
+AudioDeviceGeneric::InitStatus AudioEngineDevice::Init() {
+  LOGI() << "Init";
+  io_thread_checker_.Detach();
+
+  RTC_DCHECK_RUN_ON(thread_);
+  if (initialized_) {
+    return InitStatus::OK;
+  }
+
+#if defined(WEBRTC_IOS)
+  RTC_OBJC_TYPE(RTCAudioSessionConfiguration)* config =
+      [RTC_OBJC_TYPE(RTCAudioSessionConfiguration) webRTCConfiguration];
+  playout_parameters_.reset(config.sampleRate, config.outputNumberOfChannels);
+  record_parameters_.reset(config.sampleRate, config.inputNumberOfChannels);
+#endif
+
+  rtc_internal_format_ = [[AVAudioFormat alloc] initWithCommonFormat:AVAudioPCMFormatInt16
+                                                          sampleRate:48000.0
+                                                            channels:1
+                                                         interleaved:YES];
+
+  initialized_ = true;
+
+  return InitStatus::OK;
+}
+
+int32_t AudioEngineDevice::Terminate() {
+  LOGI() << "Terminate";
+  RTC_DCHECK_RUN_ON(thread_);
+  if (!initialized_) {
+    return 0;
+  }
+
+  StopPlayout();
+  StopRecording();
+
+  initialized_ = false;
+  return 0;
+}
+
+// ----------------------------------------------------------------------------------------------------
+// Playout
+
+bool AudioEngineDevice::PlayoutIsInitialized() const {
+  LOGI() << "PlayoutIsInitialized";
+  RTC_DCHECK_RUN_ON(thread_);
+
+  return engine_state_.output_enabled;
+}
+
+bool AudioEngineDevice::Playing() const {
+  LOGI() << "Playing";
+  RTC_DCHECK_RUN_ON(thread_);
+
+  return engine_state_.output_running;
+}
+
+int32_t AudioEngineDevice::InitPlayout() {
+  LOGI() << "InitPlayout";
+  RTC_DCHECK_RUN_ON(thread_);
+  RTC_DCHECK(initialized_);
+  RTC_DCHECK(!engine_state_.output_enabled);
+  RTC_DCHECK(!engine_state_.output_running);
+
+  if (engine_state_.output_enabled) {
+    LOGW() << "InitPlayout: Already initialized";
+    return 0;
+  }
+
+  SetEngineState([](EngineState state) -> EngineState {
+    state.output_enabled = true;
+    return state;
+  });
+
+  return 0;
+}
+
+int32_t AudioEngineDevice::StartPlayout() {
+  LOGI() << "StartPlayout";
+  RTC_DCHECK_RUN_ON(thread_);
+  RTC_DCHECK(engine_state_.output_enabled);
+  RTC_DCHECK(!engine_state_.output_running);
+
+  if (!engine_state_.output_enabled) {
+    LOGW() << "StartPlayout: Not initialized";
+    return -1;
+  }
+
+  if (engine_state_.output_running) {
+    LOGW() << "StartPlayout: Already playing";
+    return 0;
+  }
+
+  if (fine_audio_buffer_) {
+    fine_audio_buffer_->ResetPlayout();
+  }
+
+  SetEngineState([](EngineState state) -> EngineState {
+    state.output_running = true;
+    return state;
+  });
+
+  return 0;
+}
+
+int32_t AudioEngineDevice::StopPlayout() {
+  LOGI() << "StopPlayout";
+  RTC_DCHECK_RUN_ON(thread_);
+
+  if (!engine_state_.output_enabled) {
+    LOGW() << "StopPlayout: Not initialized";
+    return -1;
+  }
+
+  if (!engine_state_.output_running) {
+    LOGW() << "StopPlayout: Already stopped";
+    return 0;
+  }
+
+  SetEngineState([](EngineState state) -> EngineState {
+    state.output_enabled = false;
+    state.output_running = false;
+    return state;
+  });
+
+  return 0;
+}
+
+// ----------------------------------------------------------------------------------------------------
+// Recording
+
+bool AudioEngineDevice::RecordingIsInitialized() const {
+  LOGI() << "RecordingIsInitialized";
+  RTC_DCHECK_RUN_ON(thread_);
+
+  return engine_state_.input_enabled;
+}
+
+bool AudioEngineDevice::Recording() const {
+  LOGI() << "Recording";
+  RTC_DCHECK_RUN_ON(thread_);
+
+  return engine_state_.input_running;
+}
+
+int32_t AudioEngineDevice::InitRecording() {
+  LOGI() << "InitRecording";
+  RTC_DCHECK_RUN_ON(thread_);
+  RTC_DCHECK(initialized_);
+  RTC_DCHECK(!engine_state_.input_enabled);
+  RTC_DCHECK(!engine_state_.input_running);
+
+  if (engine_state_.input_enabled) {
+    LOGW() << "InitRecording: Already initialized";
+    return 0;
+  }
+
+  SetEngineState([](EngineState state) -> EngineState {
+    state.input_enabled = true;
+    return state;
+  });
+
+  return 0;
+}
+
+int32_t AudioEngineDevice::StartRecording() {
+  LOGI() << "StartRecording";
+  RTC_DCHECK_RUN_ON(thread_);
+  RTC_DCHECK(engine_state_.input_enabled);
+  RTC_DCHECK(!engine_state_.input_running);
+
+  if (!engine_state_.input_enabled) {
+    LOGW() << "StartRecording: Not initialized";
+    return -1;
+  }
+
+  if (engine_state_.input_running) {
+    LOGW() << "StartRecording: Already recording";
+    return 0;
+  }
+
+  if (fine_audio_buffer_) {
+    fine_audio_buffer_->ResetRecord();
+  }
+
+  SetEngineState([](EngineState state) -> EngineState {
+    state.input_running = true;
+    state.input_muted = false;  // Always unmute
+    return state;
+  });
+
+  return 0;
+}
+
+int32_t AudioEngineDevice::StopRecording() {
+  LOGI() << "StopRecording";
+  RTC_DCHECK_RUN_ON(thread_);
+
+  if (!engine_state_.input_enabled) {
+    LOGW() << "StopRecording: Not initialized";
+    return -1;
+  }
+
+  if (!engine_state_.input_running) {
+    LOGW() << "StopRecording: Already stopped";
+    return 0;
+  }
+
+  SetEngineState([](EngineState state) -> EngineState {
+    state.input_enabled = false;
+    state.input_running = false;
+    return state;
+  });
+
+  return 0;
+}
+
+// ----------------------------------------------------------------------------------------------------
+// AudioSessionObserver
+
+void AudioEngineDevice::OnInterruptionBegin() {
+  LOGI() << "OnInterruptionBegin";
+
+  RTC_DCHECK(thread_);
+  thread_->PostTask(SafeTask(safety_, [this] {
+    this->SetEngineState([](EngineState state) -> EngineState {
+      state.is_interrupted = true;
+      return state;
+    });
+  }));
+}
+
+void AudioEngineDevice::OnInterruptionEnd() {
+  LOGI() << "OnInterruptionEnd";
+
+  RTC_DCHECK(thread_);
+  thread_->PostTask(SafeTask(safety_, [this] {
+    this->SetEngineState([](EngineState state) -> EngineState {
+      state.is_interrupted = false;
+      return state;
+    });
+  }));
+}
+
+void AudioEngineDevice::OnValidRouteChange() {
+  LOGI() << "OnValidRouteChange";
+  RTC_DCHECK(thread_);
+}
+
+void AudioEngineDevice::OnCanPlayOrRecordChange(bool can_play_or_record) {
+  LOGI() << "OnCanPlayOrRecordChange";
+  RTC_DCHECK(thread_);
+}
+
+void AudioEngineDevice::OnChangedOutputVolume() {
+  LOGI() << "OnChangedOutputVolume";
+  RTC_DCHECK(thread_);
+}
+
+// ----------------------------------------------------------------------------------------------------
+// Not Implemented
+
+bool AudioEngineDevice::IsInterrupted() {
+  LOGI() << "IsInterrupted";
+  RTC_DCHECK_RUN_ON(thread_);
+
+  return engine_state_.is_interrupted;
+}
+
+int32_t AudioEngineDevice::ActiveAudioLayer(AudioDeviceModule::AudioLayer& audioLayer) const {
+  LOGI() << "ActiveAudioLayer";
+  audioLayer = AudioDeviceModule::kPlatformDefaultAudio;
+
+  return 0;
+}
+
+int32_t AudioEngineDevice::InitSpeaker() {
+  LOGI() << "InitSpeaker";
+
+  return 0;
+}
+
+bool AudioEngineDevice::SpeakerIsInitialized() const {
+  LOGI() << "SpeakerIsInitialized";
+
+  return true;
+}
+
+int32_t AudioEngineDevice::SpeakerVolumeIsAvailable(bool& available) {
+  LOGI() << "SpeakerVolumeIsAvailable";
+  available = false;
+
+  return 0;
+}
+
+int32_t AudioEngineDevice::SetSpeakerVolume(uint32_t volume) {
+  LOGW() << "SetSpeakerVolume: Not implemented, value: " << volume;
+
+  return -1;
+}
+
+int32_t AudioEngineDevice::SpeakerVolume(uint32_t& volume) const {
+  LOGW() << "SpeakerVolume: Not implemented";
+
+  return -1;
+}
+
+int32_t AudioEngineDevice::MaxSpeakerVolume(uint32_t& maxVolume) const {
+  LOGW() << "MaxSpeakerVolume: Not implemented";
+
+  return -1;
+}
+
+int32_t AudioEngineDevice::MinSpeakerVolume(uint32_t& minVolume) const {
+  LOGW() << "MinSpeakerVolume: Not implemented";
+
+  return -1;
+}
+
+int32_t AudioEngineDevice::SpeakerMuteIsAvailable(bool& available) {
+  LOGI() << "SpeakerMuteIsAvailable";
+  available = false;
+
+  return 0;
+}
+
+int32_t AudioEngineDevice::SetSpeakerMute(bool enable) {
+  LOGI() << "SetSpeakerMute: " << enable;
+
+  return -1;
+}
+
+int32_t AudioEngineDevice::SpeakerMute(bool& enabled) const {
+  LOGW() << "SpeakerMute: Not implemented";
+
+  return -1;
+}
+
+int32_t AudioEngineDevice::InitMicrophone() {
+  LOGI() << "InitMicrophone";
+  RTC_DCHECK_RUN_ON(thread_);
+
+  return 0;
+}
+
+bool AudioEngineDevice::MicrophoneIsInitialized() const {
+  LOGI() << "MicrophoneIsInitialized";
+  RTC_DCHECK_RUN_ON(thread_);
+
+  return true;
+}
+
+// ----------------------------------------------------------------------------------------------------
+// Microphone Muting
+
+int32_t AudioEngineDevice::MicrophoneMuteIsAvailable(bool& available) {
+  RTC_DCHECK_RUN_ON(thread_);
+  LOGI() << "MicrophoneMuteIsAvailable";
+  available = true;
+  return 0;
+}
+
+int32_t AudioEngineDevice::SetMicrophoneMute(bool enable) {
+  RTC_DCHECK_RUN_ON(thread_);
+  LOGI() << "SetMicrophoneMute: " << enable;
+
+  SetEngineState([enable](EngineState state) -> EngineState {
+    state.input_muted = enable;
+    return state;
+  });
+
+  return 0;
+}
+
+int32_t AudioEngineDevice::MicrophoneMute(bool& enabled) const {
+  RTC_DCHECK_RUN_ON(thread_);
+  LOGI() << "MicrophoneMute";
+
+  enabled = engine_state_.input_muted;
+
+  return 0;
+}
+
+// ----------------------------------------------------------------------------------------------------
+// Stereo Playout
+
+int32_t AudioEngineDevice::StereoPlayoutIsAvailable(bool& available) {
+  LOGI() << "StereoPlayoutIsAvailable";
+  available = false;
+
+  return 0;
+}
+
+int32_t AudioEngineDevice::SetStereoPlayout(bool enable) {
+  LOGW() << "SetStereoPlayout: Not implemented, value:" << enable;
+
+  return -1;
+}
+
+int32_t AudioEngineDevice::StereoPlayout(bool& enabled) const {
+  LOGI() << "StereoPlayout";
+  enabled = false;
+
+  return 0;
+}
+
+// ----------------------------------------------------------------------------------------------------
+// Stereo Recording
+
+int32_t AudioEngineDevice::StereoRecordingIsAvailable(bool& available) {
+  LOGI() << "StereoPlayoutIsAvailable";
+  available = false;
+
+  return 0;
+}
+
+int32_t AudioEngineDevice::SetStereoRecording(bool enable) {
+  LOGW() << "SetStereoRecording: Not implemented, value: " << enable;
+
+  return -1;
+}
+
+int32_t AudioEngineDevice::StereoRecording(bool& enabled) const {
+  LOGI() << "StereoRecording";
+  enabled = false;
+
+  return 0;
+}
+
+// ----------------------------------------------------------------------------------------------------
+// Microphone Volume
+
+int32_t AudioEngineDevice::MicrophoneVolumeIsAvailable(bool& available) {
+  LOGI() << "MicrophoneVolumeIsAvailable";
+  available = false;
+
+  return 0;
+}
+
+int32_t AudioEngineDevice::SetMicrophoneVolume(uint32_t volume) {
+  LOGW() << "SetMicrophoneVolume: Not implemented, value: " << volume;
+
+  return -1;
+}
+
+int32_t AudioEngineDevice::MicrophoneVolume(uint32_t& volume) const {
+  LOGW() << "SetMicrophoneVolume: Not implemented";
+
+  return -1;
+}
+
+int32_t AudioEngineDevice::MaxMicrophoneVolume(uint32_t& maxVolume) const {
+  LOGW() << "SetMicrophoneVolume: Not implemented";
+
+  return -1;
+}
+
+int32_t AudioEngineDevice::MinMicrophoneVolume(uint32_t& minVolume) const {
+  LOGW() << "MinMicrophoneVolume: Not implemented";
+
+  return -1;
+}
+
+// ----------------------------------------------------------------------------------------------------
+// Playout Device
+
+int32_t AudioEngineDevice::PlayoutIsAvailable(bool& available) {
+  LOGI() << "PlayoutIsAvailable";
+  available = true;
+
+  return 0;
+}
+
+int32_t AudioEngineDevice::SetPlayoutDevice(uint16_t index) {
+  LOGW() << "SetPlayoutDevice: Not implemented, value: " << index;
+
+  return 0;
+}
+
+int32_t AudioEngineDevice::SetPlayoutDevice(AudioDeviceModule::WindowsDeviceType deviceType) {
+  LOGW() << "SetPlayoutDevice: Not implemented, value: " << deviceType;
+
+  return -1;
+}
+
+int32_t AudioEngineDevice::PlayoutDeviceName(uint16_t index, char name[kAdmMaxDeviceNameSize],
+                                             char guid[kAdmMaxGuidSize]) {
+  LOGW() << "PlayoutDeviceName: Not implemented";
+
+  return -1;
+}
+
+int16_t AudioEngineDevice::PlayoutDevices() {
+  LOGI() << "PlayoutDevices";
+
+  return (int16_t)1;
+}
+
+// ----------------------------------------------------------------------------------------------------
+// Recording Device
+
+int32_t AudioEngineDevice::RecordingDeviceName(uint16_t index, char name[kAdmMaxDeviceNameSize],
+                                               char guid[kAdmMaxGuidSize]) {
+  LOGW() << "RecordingDeviceName";
+
+  return -1;
+}
+
+int32_t AudioEngineDevice::SetRecordingDevice(uint16_t index) {
+  LOGI() << "SetRecordingDevice, index: " << index;
+
+  return 0;
+}
+
+int32_t AudioEngineDevice::SetRecordingDevice(AudioDeviceModule::WindowsDeviceType type) {
+  LOGI() << "SetRecordingDevice, type: " << type;
+
+  return -1;
+}
+
+int32_t AudioEngineDevice::RecordingIsAvailable(bool& available) {
+  LOGI() << "RecordingIsAvailable";
+
+  available = true;
+  return 0;
+}
+
+int16_t AudioEngineDevice::RecordingDevices() {
+  LOGI() << "RecordingDevices";
+
+  return (int16_t)1;
+}
+
+// ----------------------------------------------------------------------------------------------------
+// Misc
+
+int32_t AudioEngineDevice::PlayoutDelay(uint16_t& delayMS) const {
+  delayMS = kFixedPlayoutDelayEstimate;
+  return 0;
+}
+
+// ----------------------------------------------------------------------------------------------------
+// Private - Engine Related
+
+void AudioEngineDevice::OnEngineConfigurationChange() {
+  LOGI() << "OnEngineConfigurationChange";
+
+  thread_->PostTask(SafeTask(safety_, [this] {
+    RTC_DCHECK_RUN_ON(thread_);
+
+    EngineState previous_state = this->engine_state_;
+
+    this->SetEngineState([](EngineState state) -> EngineState {
+      return EngineState();  // Return default state to shutdown
+    });
+
+    this->SetEngineState([previous_state](EngineState state) -> EngineState {
+      return previous_state;  // Recover engine state
+    });
+  }));
+}
+
+bool AudioEngineDevice::IsMicrophonePermissionGranted() {
+  AVAuthorizationStatus status = [AVCaptureDevice authorizationStatusForMediaType:AVMediaTypeAudio];
+  return status == AVAuthorizationStatusAuthorized;
+}
+
+void AudioEngineDevice::SetEngineState(std::function<EngineState(EngineState)> state_transform) {
+  RTC_DCHECK_RUN_ON(thread_);
+
+  EngineState old_state = engine_state_;
+  EngineState new_state = state_transform(old_state);
+
+#if defined(WEBRTC_IOS)
+  if ((!old_state.output_enabled && new_state.output_enabled) ||
+      (!old_state.input_enabled && new_state.input_enabled)) {
+    RTC_OBJC_TYPE(RTCAudioSession)* session = [RTC_OBJC_TYPE(RTCAudioSession) sharedInstance];
+    [session lockForConfiguration];
+    [session notifyAudioEngineWillUpdateStateWithOutputEnabled:new_state.output_enabled
+                                                  inputEnabled:new_state.input_enabled];
+    ConfigureAudioSessionLocked();
+    [session unlockForConfiguration];
+  }
+
+  if (!old_state.output_enabled && new_state.output_enabled) {
+    RTC_OBJC_TYPE(RTCAudioSession)* session = [RTC_OBJC_TYPE(RTCAudioSession) sharedInstance];
+    bool is_category_record = session.category == AVAudioSessionCategoryPlayAndRecord ||
+                              session.category == AVAudioSessionCategoryRecord;
+
+    // Already enable input if mic perms are already granted.
+    if (!new_state.input_enabled && is_category_record) {
+      EngineState update_state = new_state;
+      update_state.input_enabled = true;
+      update_state.input_muted = true;
+      new_state = update_state;
+    }
+  }
+#endif
+
+  if (old_state == new_state) {
+    LOGI() << "SetEngineState: Nothing to update";
+    return;
+  }
+
+  // Checks
+  if (new_state.input_running) {
+    RTC_DCHECK(new_state.input_enabled);
+  }
+
+  if (new_state.output_running) {
+    RTC_DCHECK(new_state.output_enabled);
+  }
+
+  UpdateEngineState(old_state, new_state);
+  engine_state_ = new_state;
+}
+
+void AudioEngineDevice::UpdateEngineState(EngineState old_state, EngineState new_state) {
+  RTC_DCHECK_RUN_ON(thread_);
+
+  if (!old_state.IsAnyEnabled() && new_state.IsAnyEnabled()) {
+    LOGI() << "Creating AVAudioEngine...";
+    audio_engine_ = [[AVAudioEngine alloc] init];
+  }
+
+  bool did_change_audio_graph = (old_state.input_enabled != new_state.input_enabled) ||
+                                (old_state.output_enabled != new_state.output_enabled);
+
+  if (old_state.IsAnyRunning()) {
+    if (!new_state.IsAnyRunning() || did_change_audio_graph) {
+      LOGI() << "Stopping AVAudioEngine...";
+      [audio_engine_ stop];
+    } else if (!old_state.is_interrupted && new_state.is_interrupted) {
+      LOGI() << "Pausing AVAudioEngine...";
+      [audio_engine_ pause];
+    }
+  }
+
+  if (!old_state.output_enabled && new_state.output_enabled) {
+    LOGI() << "Enabling output for AVAudioEngine...";
+    RTC_DCHECK(!audio_engine_.running);
+
+    AVAudioFormat* output_format = [audio_engine_.outputNode outputFormatForBus:0];
+
+    AVAudioSourceNodeRenderBlock source_block =
+        ^OSStatus(BOOL* isSilence, const AudioTimeStamp* timestamp, AVAudioFrameCount frameCount,
+                  AudioBufferList* outputData) {
+          RTC_DCHECK(outputData->mNumberBuffers == 1);
+
+          int16_t* dest_buffer = (int16_t*)outputData->mBuffers[0].mData;
+
+          fine_audio_buffer_->GetPlayoutData(
+              rtc::ArrayView<int16_t>(static_cast<int16_t*>(dest_buffer), frameCount),
+              kFixedPlayoutDelayEstimate);
+
+          return noErr;
+        };
+
+    source_node_ = [[AVAudioSourceNode alloc] initWithFormat:rtc_internal_format_
+                                                 renderBlock:source_block];
+
+    [audio_engine_ attachNode:source_node_];
+
+    [audio_engine_ connect:source_node_ to:audio_engine_.mainMixerNode format:output_format];
+
+    [audio_engine_ connect:audio_engine_.mainMixerNode
+                        to:audio_engine_.outputNode
+                    format:output_format];
+
+  } else if (old_state.output_enabled && !new_state.output_enabled) {
+    LOGI() << "Disabling output for AVAudioEngine...";
+    RTC_DCHECK(!audio_engine_.running);
+
+    // Disconnect
+    [audio_engine_ disconnectNodeInput:source_node_];
+    [audio_engine_ disconnectNodeOutput:source_node_];
+    // Detach
+    [audio_engine_ detachNode:source_node_];
+    // Release
+    source_node_ = nil;
+  }
+
+  if (!old_state.input_enabled && new_state.input_enabled) {
+    LOGI() << "Enabling input for AVAudioEngine...";
+    RTC_DCHECK(!audio_engine_.running);
+
+    AVAudioFormat* input_format = [audio_engine_.inputNode outputFormatForBus:0];
+
+    input_eq_node_ = [[AVAudioUnitEQ alloc] initWithNumberOfBands:2];
+    [audio_engine_ attachNode:input_eq_node_];
+
+    input_mixer_node_ = [[AVAudioMixerNode alloc] init];
+    [audio_engine_ attachNode:input_mixer_node_];
+
+    AVAudioSinkNodeReceiverBlock sink_block = ^OSStatus(const AudioTimeStamp* timestamp,
+                                                        AVAudioFrameCount frameCount,
+                                                        const AudioBufferList* inputData) {
+      RTC_DCHECK(inputData->mNumberBuffers == 1);
+
+      const int64_t capture_time_ns = timestamp->mHostTime * machTickUnitsToNanoseconds_;
+      const int16_t* rtc_buffer = (int16_t*)inputData->mBuffers[0].mData;
+
+      fine_audio_buffer_->DeliverRecordedData(rtc::ArrayView<const int16_t>(rtc_buffer, frameCount),
+                                              kFixedRecordDelayEstimate, capture_time_ns);
+
+      return noErr;
+    };
+
+    sink_node_ = [[AVAudioSinkNode alloc] initWithReceiverBlock:sink_block];
+    [audio_engine_ attachNode:sink_node_];
+
+    // InputNode -> InputEQNode -> InputMixerNode -> SinkNode -> RTC
+    [audio_engine_ connect:audio_engine_.inputNode to:input_eq_node_ format:input_format];
+
+    [audio_engine_ connect:input_eq_node_ to:input_mixer_node_ format:input_format];
+    // Convert to RTC's internal format before passing buffers to SinkNode.
+    [audio_engine_ connect:input_mixer_node_ to:sink_node_ format:rtc_internal_format_];
+
+#if defined(WEBRTC_IOS)
+    if (!audio_engine_.inputNode.voiceProcessingEnabled) {
+      NSError* error = nil;
+      BOOL set_input_vp_result = [audio_engine_.inputNode setVoiceProcessingEnabled:YES
+                                                                              error:&error];
+      if (!set_input_vp_result) {
+        NSLog(@"setVoiceProcessingEnabled error: %@", error.localizedDescription);
+        RTC_DCHECK(set_input_vp_result);
+      }
+      LOGI() << "setVoiceProcessingEnabled (input) result: " << set_input_vp_result ? "YES" : "NO";
+    }
+#endif
+  } else if (old_state.input_enabled && !new_state.input_enabled) {
+    LOGI() << "Disabling input for AVAudioEngine...";
+    RTC_DCHECK(!audio_engine_.running);
+
+    // Disconnect input eq
+    [audio_engine_ disconnectNodeInput:input_eq_node_];
+    [audio_engine_ disconnectNodeOutput:input_eq_node_];
+    [audio_engine_ detachNode:input_eq_node_];
+    input_eq_node_ = nil;
+
+    // InputMixerNode
+    [audio_engine_ disconnectNodeInput:input_mixer_node_];
+    [audio_engine_ disconnectNodeOutput:input_mixer_node_];
+    [audio_engine_ detachNode:input_mixer_node_];
+    input_mixer_node_ = nil;
+
+    // SinkNode
+    [audio_engine_ disconnectNodeInput:sink_node_];
+    [audio_engine_ disconnectNodeOutput:sink_node_];
+    [audio_engine_ detachNode:sink_node_];
+    sink_node_ = nil;
+  }
+
+  if (new_state.input_enabled) {
+    if (audio_engine_.inputNode.voiceProcessingEnabled) {
+      // Re-apply muted state.
+      audio_engine_.inputNode.voiceProcessingInputMuted = new_state.input_muted;
+    }
+  }
+
+  if (new_state.IsAnyRunning()) {
+    if (!old_state.IsAnyRunning() || (old_state.is_interrupted && !new_state.is_interrupted) ||
+        did_change_audio_graph) {
+      LOGI() << "Starting AVAudioEngine...";
+      NSError* error = nil;
+      BOOL start_result = [audio_engine_ startAndReturnError:&error];
+      if (!start_result) {
+        LOGE() << "Failed to start engine: " << error.localizedDescription.UTF8String;
+      }
+    }
+  }
+
+  if (old_state.IsAnyEnabled() && !new_state.IsAnyEnabled()) {
+    LOGI() << "Releasing AVAudioEngine...";
+    audio_engine_ = nil;
+  }
+}
+
+// ----------------------------------------------------------------------------------------------------
+// Private - EngineState
+
+bool AudioEngineDevice::EngineState::operator==(const EngineState& rhs) const {
+  return input_enabled == rhs.input_enabled && output_enabled == rhs.output_enabled &&
+         input_running == rhs.input_running && output_running == rhs.output_running &&
+         input_muted == rhs.input_muted && is_interrupted == rhs.is_interrupted;
+}
+
+bool AudioEngineDevice::EngineState::operator!=(const EngineState& rhs) const {
+  return !(*this == rhs);
+}
+
+// ----------------------------------------------------------------------------------------------------
+// Private - Audio session
+#if defined(WEBRTC_IOS)
+bool AudioEngineDevice::ConfigureAudioSession() {
+  RTC_DCHECK_RUN_ON(thread_);
+  RTCLog(@"Configuring audio session.");
+  if (has_configured_session_) {
+    RTCLogWarning(@"Audio session already configured.");
+    return false;
+  }
+  RTC_OBJC_TYPE(RTCAudioSession)* session = [RTC_OBJC_TYPE(RTCAudioSession) sharedInstance];
+  [session lockForConfiguration];
+  bool success = [session configureWebRTCSession:nil];
+  [session unlockForConfiguration];
+  if (success) {
+    has_configured_session_ = true;
+    RTCLog(@"Configured audio session.");
+  } else {
+    RTCLog(@"Failed to configure audio session.");
+  }
+  return success;
+}
+
+bool AudioEngineDevice::ConfigureAudioSessionLocked() {
+  RTC_DCHECK_RUN_ON(thread_);
+  RTCLog(@"Configuring audio session.");
+  if (has_configured_session_) {
+    RTCLogWarning(@"Audio session already configured.");
+    return false;
+  }
+  RTC_OBJC_TYPE(RTCAudioSession)* session = [RTC_OBJC_TYPE(RTCAudioSession) sharedInstance];
+  bool success = [session configureWebRTCSession:nil];
+  if (success) {
+    has_configured_session_ = true;
+    RTCLog(@"Configured audio session.");
+  } else {
+    RTCLog(@"Failed to configure audio session.");
+  }
+  return success;
+}
+
+void AudioEngineDevice::UnconfigureAudioSession() {
+  RTC_DCHECK_RUN_ON(thread_);
+  RTCLog(@"Unconfiguring audio session.");
+  if (!has_configured_session_) {
+    RTCLogWarning(@"Audio session already unconfigured.");
+    return;
+  }
+  RTC_OBJC_TYPE(RTCAudioSession)* session = [RTC_OBJC_TYPE(RTCAudioSession) sharedInstance];
+  [session lockForConfiguration];
+  [session unconfigureWebRTCSession:nil];
+  [session endWebRTCSession:nil];
+  [session unlockForConfiguration];
+  has_configured_session_ = false;
+  RTCLog(@"Unconfigured audio session.");
+}
+#endif
+
+}  // namespace webrtc
diff --git a/sdk/BUILD.gn b/sdk/BUILD.gn
index 33bf72df8e..50ce76bd27 100644
--- a/sdk/BUILD.gn
+++ b/sdk/BUILD.gn
@@ -292,8 +292,6 @@ if (is_ios || is_mac) {
         visibility = [ "*" ]
 
         sources = [
-          "objc/native/src/audio/audio_device_ios.h",
-          "objc/native/src/audio/audio_device_ios.mm",
           "objc/native/src/audio/audio_device_module_ios.h",
           "objc/native/src/audio/audio_device_module_ios.mm",
           "objc/native/src/audio/helpers.h",
@@ -535,6 +533,8 @@ if (is_ios || is_mac) {
       sources = [
         "objc/native/api/objc_audio_device_module.h",
         "objc/native/api/objc_audio_device_module.mm",
+        "../modules/audio_device/audio_engine_device.h",
+        "../modules/audio_device/audio_engine_device.mm",
       ]
 
       deps = [
diff --git a/sdk/objc/components/audio/RTCAudioSession+Private.h b/sdk/objc/components/audio/RTCAudioSession+Private.h
index 2be1b9fb3d..8a3c52e5b7 100644
--- a/sdk/objc/components/audio/RTCAudioSession+Private.h
+++ b/sdk/objc/components/audio/RTCAudioSession+Private.h
@@ -90,6 +90,9 @@ NS_ASSUME_NONNULL_BEGIN
 - (void)notifyDidStartPlayOrRecord;
 - (void)notifyDidStopPlayOrRecord;
 
+- (void)notifyAudioEngineWillUpdateStateWithOutputEnabled:(BOOL)isOutputEnabled
+                                             inputEnabled:(BOOL)isInputEnabled;
+
 @end
 
 NS_ASSUME_NONNULL_END
diff --git a/sdk/objc/components/audio/RTCAudioSession.h b/sdk/objc/components/audio/RTCAudioSession.h
index 2730664858..8b34f9299c 100644
--- a/sdk/objc/components/audio/RTCAudioSession.h
+++ b/sdk/objc/components/audio/RTCAudioSession.h
@@ -102,6 +102,10 @@ RTC_OBJC_EXPORT
 - (void)audioSession:(RTC_OBJC_TYPE(RTCAudioSession) *)audioSession
     audioUnitStartFailedWithError:(NSError *)error;
 
+- (void)audioSession:(RTC_OBJC_TYPE(RTCAudioSession) *)audioSession
+    audioEngineWillUpdateStateWithOutputEnabled:(BOOL)isOutputEnabled
+                                   inputEnabled:(BOOL)isInputEnabled;
+
 @end
 
 /** This is a protocol used to inform RTCAudioSession when the audio session
diff --git a/sdk/objc/components/audio/RTCAudioSession.mm b/sdk/objc/components/audio/RTCAudioSession.mm
index 11d1a1c337..a914ce2f18 100644
--- a/sdk/objc/components/audio/RTCAudioSession.mm
+++ b/sdk/objc/components/audio/RTCAudioSession.mm
@@ -1010,4 +1010,16 @@ - (void)notifyFailedToSetActive:(BOOL)active error:(NSError *)error {
   }
 }
 
+- (void)notifyAudioEngineWillUpdateStateWithOutputEnabled:(BOOL)isOutputEnabled
+                                             inputEnabled:(BOOL)isInputEnabled {
+  for (auto delegate : self.delegates) {
+    SEL sel = @selector(audioSession:audioEngineWillUpdateStateWithOutputEnabled:inputEnabled:);
+    if ([delegate respondsToSelector:sel]) {
+      [delegate audioSession:self
+          audioEngineWillUpdateStateWithOutputEnabled:isOutputEnabled
+                                         inputEnabled:isInputEnabled];
+    }
+  }
+}
+
 @end
diff --git a/sdk/objc/native/src/audio/audio_device_module_ios.h b/sdk/objc/native/src/audio/audio_device_module_ios.h
index 2f9b95a0a8..d67822baee 100644
--- a/sdk/objc/native/src/audio/audio_device_module_ios.h
+++ b/sdk/objc/native/src/audio/audio_device_module_ios.h
@@ -14,8 +14,8 @@
 #include <memory>
 
 #include "api/task_queue/task_queue_factory.h"
-#include "audio_device_ios.h"
 #include "modules/audio_device/audio_device_buffer.h"
+#include "modules/audio_device/audio_engine_device.h"
 #include "modules/audio_device/include/audio_device.h"
 #include "rtc_base/checks.h"
 
@@ -136,7 +136,7 @@ class AudioDeviceModuleIOS : public AudioDeviceModule {
   const bool bypass_voice_processing_;
   bool initialized_ = false;
   const std::unique_ptr<TaskQueueFactory> task_queue_factory_;
-  std::unique_ptr<AudioDeviceIOS> audio_device_;
+  std::unique_ptr<AudioEngineDevice> audio_device_;
   std::unique_ptr<AudioDeviceBuffer> audio_device_buffer_;
 };
 }  // namespace ios_adm
diff --git a/sdk/objc/native/src/audio/audio_device_module_ios.mm b/sdk/objc/native/src/audio/audio_device_module_ios.mm
index 5f93a06ee8..ece6c6cff3 100644
--- a/sdk/objc/native/src/audio/audio_device_module_ios.mm
+++ b/sdk/objc/native/src/audio/audio_device_module_ios.mm
@@ -19,7 +19,7 @@
 #include "system_wrappers/include/metrics.h"
 
 #if defined(WEBRTC_IOS)
-#include "audio_device_ios.h"
+#include "modules/audio_device/audio_engine_device.h"
 #endif
 
 #define CHECKinitialized_() \
@@ -72,7 +72,7 @@
       return 0;
 
     audio_device_buffer_.reset(new webrtc::AudioDeviceBuffer(task_queue_factory_.get()));
-    audio_device_.reset(new ios_adm::AudioDeviceIOS(bypass_voice_processing_));
+    audio_device_.reset(new AudioEngineDevice(bypass_voice_processing_));
     RTC_CHECK(audio_device_);
 
     this->AttachAudioBuffer();

From db00fe4055f75908c286b16fe0df2e6a7b17d0bd Mon Sep 17 00:00:00 2001
From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com>
Date: Wed, 11 Dec 2024 00:16:55 +0700
Subject: [PATCH 04/15] Other audio ducking

---
 modules/audio_device/audio_engine_device.mm | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/modules/audio_device/audio_engine_device.mm b/modules/audio_device/audio_engine_device.mm
index cf4806cfe9..cc57811e06 100644
--- a/modules/audio_device/audio_engine_device.mm
+++ b/modules/audio_device/audio_engine_device.mm
@@ -852,6 +852,7 @@
 
 #if defined(WEBRTC_IOS)
     if (!audio_engine_.inputNode.voiceProcessingEnabled) {
+      // Voice processing.
       NSError* error = nil;
       BOOL set_input_vp_result = [audio_engine_.inputNode setVoiceProcessingEnabled:YES
                                                                               error:&error];
@@ -860,6 +861,17 @@
         RTC_DCHECK(set_input_vp_result);
       }
       LOGI() << "setVoiceProcessingEnabled (input) result: " << set_input_vp_result ? "YES" : "NO";
+
+      // Other audio ducking.
+      // iOS 17.0+, iPadOS 17.0+, Mac Catalyst 17.0+, macOS 14.0+, visionOS 1.0+
+      if (@available(iOS 17.0, macCatalyst 17.0, macOS 14.0, visionOS 1.0, *)) {
+        AVAudioVoiceProcessingOtherAudioDuckingConfiguration ducking_config;
+        ducking_config.enableAdvancedDucking = YES;
+        ducking_config.duckingLevel = AVAudioVoiceProcessingOtherAudioDuckingLevelMax;
+
+        LOGI() << "setVoiceProcessingOtherAudioDuckingConfiguration";
+        [audio_engine_.inputNode setVoiceProcessingOtherAudioDuckingConfiguration:ducking_config];
+      }
     }
 #endif
   } else if (old_state.input_enabled && !new_state.input_enabled) {

From ed22ffb241528440eb38e29943f195d773034111 Mon Sep 17 00:00:00 2001
From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com>
Date: Tue, 17 Dec 2024 01:11:23 +0700
Subject: [PATCH 05/15] Move to private method

---
 modules/audio_device/audio_engine_device.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/modules/audio_device/audio_engine_device.h b/modules/audio_device/audio_engine_device.h
index 27122d5d5a..d2b5ac7958 100644
--- a/modules/audio_device/audio_engine_device.h
+++ b/modules/audio_device/audio_engine_device.h
@@ -126,8 +126,6 @@ class AudioEngineDevice : public AudioDeviceGeneric,
 
   bool IsInterrupted();
 
-  // AudioEngine observer methods. May be called from any thread.
-  void OnEngineConfigurationChange();
 
  private:
   struct EngineState {
@@ -164,6 +162,9 @@ class AudioEngineDevice : public AudioDeviceGeneric,
   // Unconfigures the audio session.
   void UnconfigureAudioSession();
 
+  // AudioEngine observer methods. May be called from any thread.
+  void OnEngineConfigurationChange();
+
   // Determines whether voice processing should be enabled or disabled.
   const bool bypass_voice_processing_;
 

From 0324b227245ede07f7fb93186ba7e7813b1fe373 Mon Sep 17 00:00:00 2001
From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com>
Date: Mon, 16 Dec 2024 21:11:45 +0700
Subject: [PATCH 06/15] Rename AudioDeviceSink

---
 modules/audio_device/audio_device_data_observer.cc  |  4 ++--
 modules/audio_device/audio_device_generic.h         |  2 +-
 modules/audio_device/audio_device_impl.cc           |  6 +++---
 modules/audio_device/audio_device_impl.h            |  2 +-
 modules/audio_device/include/audio_device.h         |  7 +++----
 modules/audio_device/mac/audio_device_mac.h         |  6 +++---
 modules/audio_device/win/audio_device_core_win.cc   |  8 ++++----
 modules/audio_device/win/audio_device_core_win.h    |  6 +++---
 sdk/objc/api/peerconnection/RTCAudioDeviceModule.mm | 13 +++++++------
 sdk/objc/native/src/audio/audio_device_module_ios.h |  2 +-
 .../native/src/audio/audio_device_module_ios.mm     |  4 ++--
 11 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/modules/audio_device/audio_device_data_observer.cc b/modules/audio_device/audio_device_data_observer.cc
index 88a8301c4f..313acd5c52 100644
--- a/modules/audio_device/audio_device_data_observer.cc
+++ b/modules/audio_device/audio_device_data_observer.cc
@@ -307,8 +307,8 @@ class ADMWrapper : public AudioDeviceModule, public AudioTransport {
   }
 #endif  // WEBRTC_IOS
 
-  int32_t SetAudioDeviceSink(AudioDeviceSink* sink) const override {
-    return impl_->SetAudioDeviceSink(sink);
+  int32_t SetObserver(AudioDeviceObserver* observer) const override {
+    return impl_->SetObserver(observer);
   }
 
  protected:
diff --git a/modules/audio_device/audio_device_generic.h b/modules/audio_device/audio_device_generic.h
index 0585129de4..ff7f80da62 100644
--- a/modules/audio_device/audio_device_generic.h
+++ b/modules/audio_device/audio_device_generic.h
@@ -135,7 +135,7 @@ class AudioDeviceGeneric {
   virtual int GetRecordAudioParameters(AudioParameters* params) const;
 #endif  // WEBRTC_IOS
 
-  virtual int32_t SetAudioDeviceSink(AudioDeviceSink* sink) { return -1; }
+  virtual int32_t SetObserver(AudioDeviceObserver* observer) { return -1; }
   virtual int32_t GetPlayoutDevice() const { return -1; }
   virtual int32_t GetRecordingDevice() const { return -1; }
 
diff --git a/modules/audio_device/audio_device_impl.cc b/modules/audio_device/audio_device_impl.cc
index b5ec373ea1..1308d677ef 100644
--- a/modules/audio_device/audio_device_impl.cc
+++ b/modules/audio_device/audio_device_impl.cc
@@ -902,9 +902,9 @@ int AudioDeviceModuleImpl::GetRecordAudioParameters(
 }
 #endif  // WEBRTC_IOS
 
-int32_t AudioDeviceModuleImpl::SetAudioDeviceSink(AudioDeviceSink* sink) const {
-  RTC_LOG(LS_INFO) << __FUNCTION__ << "(" << sink << ")";
-  int32_t ok = audio_device_->SetAudioDeviceSink(sink);
+int32_t AudioDeviceModuleImpl::SetObserver(AudioDeviceObserver* observer) const {
+  RTC_LOG(LS_INFO) << __FUNCTION__ << "(" << observer << ")";
+  int32_t ok = audio_device_->SetObserver(observer);
   RTC_LOG(LS_INFO) << "output: " << ok;
   return ok;
 }
diff --git a/modules/audio_device/audio_device_impl.h b/modules/audio_device/audio_device_impl.h
index fd9b62c65b..02c763e82c 100644
--- a/modules/audio_device/audio_device_impl.h
+++ b/modules/audio_device/audio_device_impl.h
@@ -156,7 +156,7 @@ class AudioDeviceModuleImpl : public AudioDeviceModuleForTest {
   int GetRecordAudioParameters(AudioParameters* params) const override;
 #endif  // WEBRTC_IOS
 
-  int32_t SetAudioDeviceSink(AudioDeviceSink* sink) const override;
+  int32_t SetObserver(AudioDeviceObserver* observer) const override;
   int32_t GetPlayoutDevice() const override;
   int32_t GetRecordingDevice() const override;
 
diff --git a/modules/audio_device/include/audio_device.h b/modules/audio_device/include/audio_device.h
index 58019cc24f..27af770ddf 100644
--- a/modules/audio_device/include/audio_device.h
+++ b/modules/audio_device/include/audio_device.h
@@ -21,10 +21,9 @@ namespace webrtc {
 
 class AudioDeviceModuleForTest;
 
-// Sink for callbacks related to a audio device.
-class AudioDeviceSink {
+class AudioDeviceObserver {
  public:
-  virtual ~AudioDeviceSink() = default;
+  virtual ~AudioDeviceObserver() = default;
 
   // input/output devices updated or default device changed
   virtual void OnDevicesUpdated() = 0;
@@ -182,7 +181,7 @@ class AudioDeviceModule : public rtc::RefCountInterface {
   virtual int GetRecordAudioParameters(AudioParameters* params) const = 0;
 #endif  // WEBRTC_IOS
 
-  virtual int32_t SetAudioDeviceSink(AudioDeviceSink* sink) const { return -1; }
+  virtual int32_t SetObserver(AudioDeviceObserver* observer) const { return -1; }
   virtual int32_t GetPlayoutDevice() const { return -1; }
   virtual int32_t GetRecordingDevice() const { return -1; }
 
diff --git a/modules/audio_device/mac/audio_device_mac.h b/modules/audio_device/mac/audio_device_mac.h
index 6cb5482a84..84d7a7691b 100644
--- a/modules/audio_device/mac/audio_device_mac.h
+++ b/modules/audio_device/mac/audio_device_mac.h
@@ -154,8 +154,8 @@ class AudioDeviceMac : public AudioDeviceGeneric {
   virtual void AttachAudioBuffer(AudioDeviceBuffer* audioBuffer)
       RTC_LOCKS_EXCLUDED(mutex_);
 
-  virtual int32_t SetAudioDeviceSink(AudioDeviceSink* sink) RTC_LOCKS_EXCLUDED(mutex_) {
-    audio_device_module_sink_ = sink;
+  virtual int32_t SetObserver(AudioDeviceObserver* observer) RTC_LOCKS_EXCLUDED(mutex_) {
+    audio_device_module_sink_ = observer;
     return 0;
   }
   virtual int32_t GetPlayoutDevice() const;
@@ -354,7 +354,7 @@ class AudioDeviceMac : public AudioDeviceGeneric {
   // 0x5c is key "9", after that comes function keys.
   bool prev_key_state_[0x5d];
 
-  AudioDeviceSink *audio_device_module_sink_ = nullptr;
+  AudioDeviceObserver *audio_device_module_sink_ = nullptr;
 };
 
 }  // namespace webrtc
diff --git a/modules/audio_device/win/audio_device_core_win.cc b/modules/audio_device/win/audio_device_core_win.cc
index c1c2b32a9b..3f2675a5d4 100644
--- a/modules/audio_device/win/audio_device_core_win.cc
+++ b/modules/audio_device/win/audio_device_core_win.cc
@@ -3905,13 +3905,13 @@ int32_t AudioDeviceWindowsCore::_GetDeviceID(IMMDevice* pDevice,
   return 0;
 }
 
-int32_t AudioDeviceWindowsCore::SetAudioDeviceSink(AudioDeviceSink* sink) {
-  _deviceStateListener->SetAudioDeviceSink(sink);
+int32_t AudioDeviceWindowsCore::SetObserver(AudioDeviceObserver* observer) {
+  _deviceStateListener->SetObserver(observer);
   return 0;
 }
 
-void AudioDeviceWindowsCore::DeviceStateListener::SetAudioDeviceSink(AudioDeviceSink *sink) {
-  callback_ = sink;
+void AudioDeviceWindowsCore::DeviceStateListener::SetObserver(AudioDeviceObserver *observer) {
+  callback_ = observer;
 }
 
 HRESULT AudioDeviceWindowsCore::DeviceStateListener::OnDeviceStateChanged(LPCWSTR pwstrDeviceId, DWORD dwNewState) {
diff --git a/modules/audio_device/win/audio_device_core_win.h b/modules/audio_device/win/audio_device_core_win.h
index 10b6a92b7f..e7fe6bd71a 100644
--- a/modules/audio_device/win/audio_device_core_win.h
+++ b/modules/audio_device/win/audio_device_core_win.h
@@ -73,11 +73,11 @@ class AudioDeviceWindowsCore : public AudioDeviceGeneric {
     ULONG __stdcall Release() override;
     HRESULT __stdcall QueryInterface(REFIID iid, void** object) override;
 
-    void SetAudioDeviceSink(AudioDeviceSink *sink);
+    void SetObserver(AudioDeviceObserver *sink);
 
    private:
     LONG ref_count_ = 1;
-    AudioDeviceSink *callback_ = nullptr;
+    AudioDeviceObserver *callback_ = nullptr;
   };
 
   static bool CoreAudioIsSupported();
@@ -180,7 +180,7 @@ class AudioDeviceWindowsCore : public AudioDeviceGeneric {
 
   virtual int32_t EnableBuiltInAEC(bool enable);
 
-  virtual int32_t SetAudioDeviceSink(AudioDeviceSink* sink);
+  virtual int32_t SetObserver(AudioDeviceObserver* observer);
 
  public:
   virtual void AttachAudioBuffer(AudioDeviceBuffer* audioBuffer);
diff --git a/sdk/objc/api/peerconnection/RTCAudioDeviceModule.mm b/sdk/objc/api/peerconnection/RTCAudioDeviceModule.mm
index c88de392d7..b935f6db9e 100644
--- a/sdk/objc/api/peerconnection/RTCAudioDeviceModule.mm
+++ b/sdk/objc/api/peerconnection/RTCAudioDeviceModule.mm
@@ -23,13 +23,13 @@
 
 #import "sdk/objc/native/api/audio_device_module.h"
 
-class AudioDeviceSink : public webrtc::AudioDeviceSink {
+class AudioDeviceObserver : public webrtc::AudioDeviceObserver {
  public:
-  AudioDeviceSink() {}
+  AudioDeviceObserver() {}
 
   void OnDevicesUpdated() override {
 
-    RTCLogInfo(@"AudioDeviceSink OnDevicesUpdated");
+    RTCLogInfo(@"AudioDeviceObserver OnDevicesUpdated");
 
     if (callback_handler_) {
       callback_handler_();
@@ -43,7 +43,7 @@ void OnDevicesUpdated() override {
 @implementation RTC_OBJC_TYPE (RTCAudioDeviceModule) {
   rtc::Thread *_workerThread;
   rtc::scoped_refptr<webrtc::AudioDeviceModule> _native;
-  AudioDeviceSink *_sink;
+  AudioDeviceObserver *_observer;
 }
 
 - (instancetype)initWithNativeModule:(rtc::scoped_refptr<webrtc::AudioDeviceModule> )module
@@ -55,10 +55,10 @@ - (instancetype)initWithNativeModule:(rtc::scoped_refptr<webrtc::AudioDeviceModu
   _native = module;
   _workerThread = workerThread;
 
-  _sink = new AudioDeviceSink();
+  _observer = new AudioDeviceObserver();
 
   _workerThread->BlockingCall([self] {
-    _native->SetAudioDeviceSink(_sink);
+    _native->SetObserver(_observer);
   });
 
   return self;
@@ -242,6 +242,7 @@ - (BOOL)initRecording {
 
 - (BOOL)setDevicesUpdatedHandler: (nullable RTCOnAudioDevicesDidUpdate) handler {
   _sink->callback_handler_ = handler;
+  _observer->callback_handler_ = callback;
   return YES;
 }
 
diff --git a/sdk/objc/native/src/audio/audio_device_module_ios.h b/sdk/objc/native/src/audio/audio_device_module_ios.h
index d67822baee..349e4e840d 100644
--- a/sdk/objc/native/src/audio/audio_device_module_ios.h
+++ b/sdk/objc/native/src/audio/audio_device_module_ios.h
@@ -130,7 +130,7 @@ class AudioDeviceModuleIOS : public AudioDeviceModule {
   int GetRecordAudioParameters(AudioParameters* params) const override;
 #endif  // WEBRTC_IOS
 
-  int32_t SetAudioDeviceSink(AudioDeviceSink* sink) const override;
+  int32_t SetObserver(AudioDeviceObserver* observer) const override;
 
  private:
   const bool bypass_voice_processing_;
diff --git a/sdk/objc/native/src/audio/audio_device_module_ios.mm b/sdk/objc/native/src/audio/audio_device_module_ios.mm
index ece6c6cff3..a20d989e83 100644
--- a/sdk/objc/native/src/audio/audio_device_module_ios.mm
+++ b/sdk/objc/native/src/audio/audio_device_module_ios.mm
@@ -666,9 +666,9 @@
   }
 #endif  // WEBRTC_IOS
 
-  int32_t AudioDeviceModuleIOS::SetAudioDeviceSink(AudioDeviceSink* sink) const {
+  int32_t AudioDeviceModuleIOS::SetObserver(AudioDeviceObserver* observer) const {
     // not implemented
-    RTC_LOG(LS_WARNING) << __FUNCTION__ << "(" << sink << ") Not implemented";
+    RTC_LOG(LS_WARNING) << __FUNCTION__ << "(" << observer << ") Not implemented";
     return -1;
   }
 }

From 1bdb158bbc63cd660b72e9d0e8529e219f0be809 Mon Sep 17 00:00:00 2001
From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com>
Date: Wed, 11 Dec 2024 12:35:12 +0700
Subject: [PATCH 07/15] Muted talker detection

---
 modules/audio_device/audio_engine_device.h    |  3 +
 modules/audio_device/audio_engine_device.mm   | 34 ++++++++
 modules/audio_device/include/audio_device.h   | 24 ++++--
 .../api/peerconnection/RTCAudioDeviceModule.h | 11 ++-
 .../peerconnection/RTCAudioDeviceModule.mm    | 77 +++++++++++++++----
 .../src/audio/audio_device_module_ios.mm      |  7 +-
 6 files changed, 128 insertions(+), 28 deletions(-)

diff --git a/modules/audio_device/audio_engine_device.h b/modules/audio_device/audio_engine_device.h
index d2b5ac7958..6f9875c850 100644
--- a/modules/audio_device/audio_engine_device.h
+++ b/modules/audio_device/audio_engine_device.h
@@ -126,6 +126,7 @@ class AudioEngineDevice : public AudioDeviceGeneric,
 
   bool IsInterrupted();
 
+  int32_t SetObserver(AudioDeviceObserver* observer) override;
 
  private:
   struct EngineState {
@@ -184,6 +185,8 @@ class AudioEngineDevice : public AudioDeviceGeneric,
   // Set to true after successful call to Init(), false otherwise.
   bool initialized_ RTC_GUARDED_BY(thread_);
 
+  AudioDeviceObserver* observer_ RTC_GUARDED_BY(thread_);
+
   // Audio interruption observer instance.
   RTC_OBJC_TYPE(RTCNativeAudioSessionDelegateAdapter) * audio_session_observer_
       RTC_GUARDED_BY(thread_);
diff --git a/modules/audio_device/audio_engine_device.mm b/modules/audio_device/audio_engine_device.mm
index cc57811e06..5d8a6cbf1f 100644
--- a/modules/audio_device/audio_engine_device.mm
+++ b/modules/audio_device/audio_engine_device.mm
@@ -673,6 +673,14 @@
   return 0;
 }
 
+int32_t AudioEngineDevice::SetObserver(AudioDeviceObserver* observer) {
+  LOGI() << "SetObserver";
+  RTC_DCHECK_RUN_ON(thread_);
+
+  observer_ = observer;
+  return 0;
+}
+
 // ----------------------------------------------------------------------------------------------------
 // Private - Engine Related
 
@@ -862,6 +870,32 @@
       }
       LOGI() << "setVoiceProcessingEnabled (input) result: " << set_input_vp_result ? "YES" : "NO";
 
+      // Muted talker detection.
+      if (@available(iOS 17.0, macCatalyst 17.0, macOS 14.0, tvOS 17.0, visionOS 1.0, *)) {
+        auto listener_block = ^(AVAudioVoiceProcessingSpeechActivityEvent event) {
+          LOGI() << "AVAudioVoiceProcessingSpeechActivityEvent: " << event;
+          RTC_DCHECK(event == AVAudioVoiceProcessingSpeechActivityStarted ||
+                     event == AVAudioVoiceProcessingSpeechActivityEnded);
+          AudioDeviceModule::SpeechActivityEvent rtc_event =
+              (event == AVAudioVoiceProcessingSpeechActivityStarted
+                   ? AudioDeviceModule::SpeechActivityEvent::kStarted
+                   : AudioDeviceModule::SpeechActivityEvent::kEnded);
+
+          thread_->PostTask(SafeTask(safety_, [this, rtc_event] {
+            RTC_DCHECK_RUN_ON(thread_);  // Silence warning.
+            if (this->observer_ != nullptr) {
+              this->observer_->OnSpeechActivityEvent(rtc_event);
+            }
+          }));
+        };
+
+        BOOL set_listener_result =
+            [audio_engine_.inputNode setMutedSpeechActivityEventListener:listener_block];
+        LOGI() << "setMutedSpeechActivityEventListener result: " << set_listener_result ? "YES"
+                                                                                        : "NO";
+        RTC_DCHECK(set_listener_result);
+      }
+
       // Other audio ducking.
       // iOS 17.0+, iPadOS 17.0+, Mac Catalyst 17.0+, macOS 14.0+, visionOS 1.0+
       if (@available(iOS 17.0, macCatalyst 17.0, macOS 14.0, visionOS 1.0, *)) {
diff --git a/modules/audio_device/include/audio_device.h b/modules/audio_device/include/audio_device.h
index 27af770ddf..592108a607 100644
--- a/modules/audio_device/include/audio_device.h
+++ b/modules/audio_device/include/audio_device.h
@@ -20,14 +20,7 @@
 namespace webrtc {
 
 class AudioDeviceModuleForTest;
-
-class AudioDeviceObserver {
- public:
-  virtual ~AudioDeviceObserver() = default;
-
-  // input/output devices updated or default device changed
-  virtual void OnDevicesUpdated() = 0;
-};
+class AudioDeviceObserver;
 
 class AudioDeviceModule : public rtc::RefCountInterface {
  public:
@@ -50,6 +43,11 @@ class AudioDeviceModule : public rtc::RefCountInterface {
     kDefaultDevice = -2
   };
 
+  enum SpeechActivityEvent {
+    kStarted = 0,
+    kEnded,
+  };
+
   struct Stats {
     // The fields below correspond to similarly-named fields in the WebRTC stats
     // spec. https://w3c.github.io/webrtc-stats/#playoutstats-dict*
@@ -203,6 +201,16 @@ class AudioDeviceModuleForTest : public AudioDeviceModule {
   virtual int SetRecordingSampleRate(uint32_t sample_rate) = 0;
 };
 
+class AudioDeviceObserver {
+ public:
+  virtual ~AudioDeviceObserver() = default;
+
+  // input/output devices updated or default device changed
+  virtual void OnDevicesUpdated() {}
+  virtual void OnSpeechActivityEvent(
+      AudioDeviceModule::SpeechActivityEvent event) {}
+};
+
 }  // namespace webrtc
 
 #endif  // MODULES_AUDIO_DEVICE_INCLUDE_AUDIO_DEVICE_H_
diff --git a/sdk/objc/api/peerconnection/RTCAudioDeviceModule.h b/sdk/objc/api/peerconnection/RTCAudioDeviceModule.h
index b02cecfd0b..1efa6411b7 100644
--- a/sdk/objc/api/peerconnection/RTCAudioDeviceModule.h
+++ b/sdk/objc/api/peerconnection/RTCAudioDeviceModule.h
@@ -22,7 +22,13 @@
 
 NS_ASSUME_NONNULL_BEGIN
 
-typedef void (^RTCOnAudioDevicesDidUpdate)();
+typedef NS_ENUM(NSInteger, RTCSpeechActivityEvent) {
+  RTCSpeechActivityEventStarted,
+  RTCSpeechActivityEventEnded,
+};
+
+typedef void (^RTCDevicesDidUpdateCallback)();
+typedef void (^RTCSpeechActivityCallback)(RTCSpeechActivityEvent);
 
 RTC_OBJC_EXPORT
 @interface RTC_OBJC_TYPE (RTCAudioDeviceModule) : NSObject
@@ -42,7 +48,8 @@ RTC_OBJC_EXPORT
 - (BOOL)trySetOutputDevice:(nullable RTC_OBJC_TYPE(RTCIODevice) *)device;
 - (BOOL)trySetInputDevice:(nullable RTC_OBJC_TYPE(RTCIODevice) *)device;
 
-- (BOOL)setDevicesUpdatedHandler: (nullable RTCOnAudioDevicesDidUpdate) handler;
+- (BOOL)setDevicesDidUpdateCallback:(nullable RTCDevicesDidUpdateCallback)callback;
+- (BOOL)setSpeechActivityCallback:(nullable RTCSpeechActivityCallback)callback;
 
 - (BOOL)startPlayout;
 - (BOOL)stopPlayout;
diff --git a/sdk/objc/api/peerconnection/RTCAudioDeviceModule.mm b/sdk/objc/api/peerconnection/RTCAudioDeviceModule.mm
index b935f6db9e..e99217a4dc 100644
--- a/sdk/objc/api/peerconnection/RTCAudioDeviceModule.mm
+++ b/sdk/objc/api/peerconnection/RTCAudioDeviceModule.mm
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include <AudioUnit/AudioUnit.h>
+#include <os/lock.h>
 
 #import "RTCAudioDeviceModule.h"
 #import "RTCAudioDeviceModule+Private.h"
@@ -25,19 +25,60 @@
 
 class AudioDeviceObserver : public webrtc::AudioDeviceObserver {
  public:
-  AudioDeviceObserver() {}
+  AudioDeviceObserver() : lock_(OS_UNFAIR_LOCK_INIT) {}
 
   void OnDevicesUpdated() override {
+    os_unfair_lock_lock(&lock_);
+    if (on_devices_did_update_callback_) {
+      on_devices_did_update_callback_();
+    }
+    os_unfair_lock_unlock(&lock_);
+  }
 
-    RTCLogInfo(@"AudioDeviceObserver OnDevicesUpdated");
-
-    if (callback_handler_) {
-      callback_handler_();
+  void OnSpeechActivityEvent(webrtc::AudioDeviceModule::SpeechActivityEvent event) override {
+    os_unfair_lock_lock(&lock_);
+    if (on_speech_activity_callback_) {
+      on_speech_activity_callback_(ConvertSpeechActivityEvent(event));
     }
+    os_unfair_lock_unlock(&lock_);
+  }
+
+ void SetDevicesUpdatedCallBack(RTCDevicesDidUpdateCallback cb) {
+    os_unfair_lock_lock(&lock_);
+    on_devices_did_update_callback_ = cb;
+    os_unfair_lock_unlock(&lock_);
+  }
+
+  void SetOnSpeechActivityCallBack(RTCSpeechActivityCallback cb) {
+    os_unfair_lock_lock(&lock_);
+    on_speech_activity_callback_ = cb;
+    os_unfair_lock_unlock(&lock_);
+  }
+
+  bool IsAnyCallbackAttached() {
+    os_unfair_lock_lock(&lock_);
+    bool result =
+        on_devices_did_update_callback_ != nullptr || on_speech_activity_callback_ != nullptr;
+    os_unfair_lock_unlock(&lock_);
+    return result;
   }
 
- // private:
-  RTCOnAudioDevicesDidUpdate callback_handler_;
+ private:
+  os_unfair_lock lock_;
+  RTCDevicesDidUpdateCallback on_devices_did_update_callback_;
+  RTCSpeechActivityCallback on_speech_activity_callback_;
+
+  RTCSpeechActivityEvent ConvertSpeechActivityEvent(
+      webrtc::AudioDeviceModule::SpeechActivityEvent event) {
+    switch (event) {
+      case webrtc::AudioDeviceModule::SpeechActivityEvent::kStarted:
+        return RTCSpeechActivityEvent::RTCSpeechActivityEventStarted;
+      case webrtc::AudioDeviceModule::SpeechActivityEvent::kEnded:
+        return RTCSpeechActivityEvent::RTCSpeechActivityEventEnded;
+      default:
+        return RTCSpeechActivityEvent::RTCSpeechActivityEventEnded;
+    }
+  }
 };
 
 @implementation RTC_OBJC_TYPE (RTCAudioDeviceModule) {
@@ -57,10 +98,6 @@ - (instancetype)initWithNativeModule:(rtc::scoped_refptr<webrtc::AudioDeviceModu
 
   _observer = new AudioDeviceObserver();
 
-  _workerThread->BlockingCall([self] {
-    _native->SetObserver(_observer);
-  });
-
   return self;
 }
 
@@ -240,9 +277,19 @@ - (BOOL)initRecording {
   });
 }
 
-- (BOOL)setDevicesUpdatedHandler: (nullable RTCOnAudioDevicesDidUpdate) handler {
-  _sink->callback_handler_ = handler;
-  _observer->callback_handler_ = callback;
+- (BOOL)setDevicesDidUpdateCallback:(nullable RTCDevicesDidUpdateCallback)callback {
+  _observer->SetDevicesUpdatedCallBack(callback);
+  webrtc::AudioDeviceObserver *observer = _observer->IsAnyCallbackAttached() ? _observer : nullptr;
+  _workerThread->BlockingCall([self, observer] { _native->SetObserver(observer); });
+
+  return YES;
+}
+
+- (BOOL)setSpeechActivityCallback:(nullable RTCSpeechActivityCallback)callback {
+  _observer->SetOnSpeechActivityCallBack(callback);
+  webrtc::AudioDeviceObserver *observer = _observer->IsAnyCallbackAttached() ? _observer : nullptr;
+  _workerThread->BlockingCall([self, observer] { _native->SetObserver(observer); });
+
   return YES;
 }
 
diff --git a/sdk/objc/native/src/audio/audio_device_module_ios.mm b/sdk/objc/native/src/audio/audio_device_module_ios.mm
index a20d989e83..d178508b2b 100644
--- a/sdk/objc/native/src/audio/audio_device_module_ios.mm
+++ b/sdk/objc/native/src/audio/audio_device_module_ios.mm
@@ -667,9 +667,10 @@
 #endif  // WEBRTC_IOS
 
   int32_t AudioDeviceModuleIOS::SetObserver(AudioDeviceObserver* observer) const {
-    // not implemented
-    RTC_LOG(LS_WARNING) << __FUNCTION__ << "(" << observer << ") Not implemented";
-    return -1;
+    RTC_DLOG(LS_INFO) << __FUNCTION__;
+    int r = audio_device_->SetObserver(observer);
+    RTC_DLOG(LS_INFO) << "output: " << r;
+    return r;
   }
 }
 }

From e07b8144d086f4d57bca7c3bffee13d3046e3724 Mon Sep 17 00:00:00 2001
From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com>
Date: Tue, 17 Dec 2024 12:28:55 +0700
Subject: [PATCH 08/15] Clean up imports

---
 modules/audio_device/audio_engine_device.mm | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/modules/audio_device/audio_engine_device.mm b/modules/audio_device/audio_engine_device.mm
index 5d8a6cbf1f..ef1567febf 100644
--- a/modules/audio_device/audio_engine_device.mm
+++ b/modules/audio_device/audio_engine_device.mm
@@ -14,12 +14,11 @@
  * limitations under the License.
  */
 
-#import <AVFoundation/AVFoundation.h>
 #import <Foundation/Foundation.h>
+#import <AVFoundation/AVFoundation.h>
 
 #include "audio_engine_device.h"
 
-#include <AudioUnit/AudioUnit.h>
 #include <mach/mach_time.h>
 #include <cmath>
 
@@ -31,9 +30,6 @@
 #include "rtc_base/thread.h"
 #include "rtc_base/thread_annotations.h"
 #include "rtc_base/time_utils.h"
-#include "sdk/objc/native/src/audio/helpers.h"
-#include "system_wrappers/include/field_trial.h"
-#include "system_wrappers/include/metrics.h"
 
 #import "base/RTCLogging.h"
 
@@ -58,8 +54,6 @@
 const uint32_t N_REC_CHANNELS = 1;   // default is mono recording
 const uint32_t N_PLAY_CHANNELS = 1;  // default is stereo playout
 
-using ios::CheckAndLogError;
-
 AudioEngineDevice::AudioEngineDevice(bool bypass_voice_processing)
     : bypass_voice_processing_(bypass_voice_processing),
       audio_device_buffer_(nullptr),

From 631126f5aada49d064f9d05b1bd26b75f64e7a1c Mon Sep 17 00:00:00 2001
From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com>
Date: Fri, 20 Dec 2024 23:24:42 +0700
Subject: [PATCH 09/15] Fix macOS vp

---
 modules/audio_device/audio_engine_device.h  |   3 +-
 modules/audio_device/audio_engine_device.mm | 104 +++++++++++---------
 2 files changed, 61 insertions(+), 46 deletions(-)

diff --git a/modules/audio_device/audio_engine_device.h b/modules/audio_device/audio_engine_device.h
index 6f9875c850..96613ccfa3 100644
--- a/modules/audio_device/audio_engine_device.h
+++ b/modules/audio_device/audio_engine_device.h
@@ -204,7 +204,8 @@ class AudioEngineDevice : public AudioDeviceGeneric,
 
   // AVAudioEngine objects
   AVAudioEngine* audio_engine_;
-  AVAudioFormat* rtc_internal_format_;  // Int16
+  AVAudioFormat* rtc_internal_format_;     // Int16
+  AVAudioFormat* engine_internal_format_;  // Float32
 
   // Output related
   AVAudioSourceNode* source_node_;
diff --git a/modules/audio_device/audio_engine_device.mm b/modules/audio_device/audio_engine_device.mm
index ef1567febf..d1ea48cb7a 100644
--- a/modules/audio_device/audio_engine_device.mm
+++ b/modules/audio_device/audio_engine_device.mm
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#import <Foundation/Foundation.h>
 #import <AVFoundation/AVFoundation.h>
+#import <Foundation/Foundation.h>
 
 #include "audio_engine_device.h"
 
@@ -147,6 +147,11 @@
                                                             channels:1
                                                          interleaved:YES];
 
+  engine_internal_format_ = [[AVAudioFormat alloc] initWithCommonFormat:AVAudioPCMFormatFloat32
+                                                             sampleRate:48000.0
+                                                               channels:1
+                                                            interleaved:NO];
+
   initialized_ = true;
 
   return InitStatus::OK;
@@ -776,7 +781,15 @@
     LOGI() << "Enabling output for AVAudioEngine...";
     RTC_DCHECK(!audio_engine_.running);
 
-    AVAudioFormat* output_format = [audio_engine_.outputNode outputFormatForBus:0];
+    if (!audio_engine_.outputNode.voiceProcessingEnabled) {
+      NSError* error = nil;
+      BOOL set_vp_result = [audio_engine_.outputNode setVoiceProcessingEnabled:YES error:&error];
+      if (!set_vp_result) {
+        NSLog(@"setVoiceProcessingEnabled error: %@", error.localizedDescription);
+        RTC_DCHECK(set_vp_result);
+      }
+      LOGI() << "setVoiceProcessingEnabled (output) result: " << set_vp_result ? "YES" : "NO";
+    }
 
     AVAudioSourceNodeRenderBlock source_block =
         ^OSStatus(BOOL* isSilence, const AudioTimeStamp* timestamp, AVAudioFrameCount frameCount,
@@ -797,11 +810,13 @@
 
     [audio_engine_ attachNode:source_node_];
 
-    [audio_engine_ connect:source_node_ to:audio_engine_.mainMixerNode format:output_format];
+    [audio_engine_ connect:source_node_
+                        to:audio_engine_.mainMixerNode
+                    format:engine_internal_format_];
 
     [audio_engine_ connect:audio_engine_.mainMixerNode
                         to:audio_engine_.outputNode
-                    format:output_format];
+                    format:engine_internal_format_];
 
   } else if (old_state.output_enabled && !new_state.output_enabled) {
     LOGI() << "Disabling output for AVAudioEngine...";
@@ -820,50 +835,17 @@
     LOGI() << "Enabling input for AVAudioEngine...";
     RTC_DCHECK(!audio_engine_.running);
 
-    AVAudioFormat* input_format = [audio_engine_.inputNode outputFormatForBus:0];
-
-    input_eq_node_ = [[AVAudioUnitEQ alloc] initWithNumberOfBands:2];
-    [audio_engine_ attachNode:input_eq_node_];
-
-    input_mixer_node_ = [[AVAudioMixerNode alloc] init];
-    [audio_engine_ attachNode:input_mixer_node_];
-
-    AVAudioSinkNodeReceiverBlock sink_block = ^OSStatus(const AudioTimeStamp* timestamp,
-                                                        AVAudioFrameCount frameCount,
-                                                        const AudioBufferList* inputData) {
-      RTC_DCHECK(inputData->mNumberBuffers == 1);
-
-      const int64_t capture_time_ns = timestamp->mHostTime * machTickUnitsToNanoseconds_;
-      const int16_t* rtc_buffer = (int16_t*)inputData->mBuffers[0].mData;
-
-      fine_audio_buffer_->DeliverRecordedData(rtc::ArrayView<const int16_t>(rtc_buffer, frameCount),
-                                              kFixedRecordDelayEstimate, capture_time_ns);
-
-      return noErr;
-    };
-
-    sink_node_ = [[AVAudioSinkNode alloc] initWithReceiverBlock:sink_block];
-    [audio_engine_ attachNode:sink_node_];
-
-    // InputNode -> InputEQNode -> InputMixerNode -> SinkNode -> RTC
-    [audio_engine_ connect:audio_engine_.inputNode to:input_eq_node_ format:input_format];
-
-    [audio_engine_ connect:input_eq_node_ to:input_mixer_node_ format:input_format];
-    // Convert to RTC's internal format before passing buffers to SinkNode.
-    [audio_engine_ connect:input_mixer_node_ to:sink_node_ format:rtc_internal_format_];
-
-#if defined(WEBRTC_IOS)
     if (!audio_engine_.inputNode.voiceProcessingEnabled) {
-      // Voice processing.
       NSError* error = nil;
-      BOOL set_input_vp_result = [audio_engine_.inputNode setVoiceProcessingEnabled:YES
-                                                                              error:&error];
-      if (!set_input_vp_result) {
+      BOOL set_vp_result = [audio_engine_.inputNode setVoiceProcessingEnabled:YES error:&error];
+      if (!set_vp_result) {
         NSLog(@"setVoiceProcessingEnabled error: %@", error.localizedDescription);
-        RTC_DCHECK(set_input_vp_result);
+        RTC_DCHECK(set_vp_result);
       }
-      LOGI() << "setVoiceProcessingEnabled (input) result: " << set_input_vp_result ? "YES" : "NO";
+      LOGI() << "setVoiceProcessingEnabled (input) result: " << set_vp_result ? "YES" : "NO";
+    }
 
+    if (audio_engine_.inputNode.voiceProcessingEnabled) {
       // Muted talker detection.
       if (@available(iOS 17.0, macCatalyst 17.0, macOS 14.0, tvOS 17.0, visionOS 1.0, *)) {
         auto listener_block = ^(AVAudioVoiceProcessingSpeechActivityEvent event) {
@@ -895,13 +877,45 @@
       if (@available(iOS 17.0, macCatalyst 17.0, macOS 14.0, visionOS 1.0, *)) {
         AVAudioVoiceProcessingOtherAudioDuckingConfiguration ducking_config;
         ducking_config.enableAdvancedDucking = YES;
-        ducking_config.duckingLevel = AVAudioVoiceProcessingOtherAudioDuckingLevelMax;
+        ducking_config.duckingLevel = AVAudioVoiceProcessingOtherAudioDuckingLevelMid;
 
         LOGI() << "setVoiceProcessingOtherAudioDuckingConfiguration";
         [audio_engine_.inputNode setVoiceProcessingOtherAudioDuckingConfiguration:ducking_config];
       }
     }
-#endif
+
+    input_eq_node_ = [[AVAudioUnitEQ alloc] initWithNumberOfBands:2];
+    [audio_engine_ attachNode:input_eq_node_];
+
+    input_mixer_node_ = [[AVAudioMixerNode alloc] init];
+    [audio_engine_ attachNode:input_mixer_node_];
+
+    AVAudioSinkNodeReceiverBlock sink_block = ^OSStatus(const AudioTimeStamp* timestamp,
+                                                        AVAudioFrameCount frameCount,
+                                                        const AudioBufferList* inputData) {
+      RTC_DCHECK(inputData->mNumberBuffers == 1);
+
+      const int64_t capture_time_ns = timestamp->mHostTime * machTickUnitsToNanoseconds_;
+      const int16_t* rtc_buffer = (int16_t*)inputData->mBuffers[0].mData;
+
+      fine_audio_buffer_->DeliverRecordedData(rtc::ArrayView<const int16_t>(rtc_buffer, frameCount),
+                                              kFixedRecordDelayEstimate, capture_time_ns);
+
+      return noErr;
+    };
+
+    sink_node_ = [[AVAudioSinkNode alloc] initWithReceiverBlock:sink_block];
+    [audio_engine_ attachNode:sink_node_];
+
+    // InputNode -> InputEQNode -> InputMixerNode -> SinkNode -> RTC
+    // [audio_engine_ connect:audio_engine_.inputNode to:input_eq_node_ format:input_format];
+
+    [audio_engine_ connect:audio_engine_.inputNode
+                        to:input_mixer_node_
+                    format:engine_internal_format_];
+    // Convert to RTC's internal format before passing buffers to SinkNode.
+    [audio_engine_ connect:input_mixer_node_ to:sink_node_ format:rtc_internal_format_];
+
   } else if (old_state.input_enabled && !new_state.input_enabled) {
     LOGI() << "Disabling input for AVAudioEngine...";
     RTC_DCHECK(!audio_engine_.running);

From 5bbeb487cecd3e60cb1669cc8661d80d2b91bee7 Mon Sep 17 00:00:00 2001
From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com>
Date: Sat, 21 Dec 2024 14:15:31 +0700
Subject: [PATCH 10/15] Debug print audio graph

---
 modules/audio_device/audio_engine_device.h  |   2 +
 modules/audio_device/audio_engine_device.mm | 101 ++++++++++++++++++++
 2 files changed, 103 insertions(+)

diff --git a/modules/audio_device/audio_engine_device.h b/modules/audio_device/audio_engine_device.h
index 96613ccfa3..7f774a191e 100644
--- a/modules/audio_device/audio_engine_device.h
+++ b/modules/audio_device/audio_engine_device.h
@@ -166,6 +166,8 @@ class AudioEngineDevice : public AudioDeviceGeneric,
   // AudioEngine observer methods. May be called from any thread.
   void OnEngineConfigurationChange();
 
+  void DebugAudioEngine();
+
   // Determines whether voice processing should be enabled or disabled.
   const bool bypass_voice_processing_;
 
diff --git a/modules/audio_device/audio_engine_device.mm b/modules/audio_device/audio_engine_device.mm
index d1ea48cb7a..7334bb90e4 100644
--- a/modules/audio_device/audio_engine_device.mm
+++ b/modules/audio_device/audio_engine_device.mm
@@ -954,6 +954,7 @@
       BOOL start_result = [audio_engine_ startAndReturnError:&error];
       if (!start_result) {
         LOGE() << "Failed to start engine: " << error.localizedDescription.UTF8String;
+        DebugAudioEngine();
       }
     }
   }
@@ -979,6 +980,7 @@
 
 // ----------------------------------------------------------------------------------------------------
 // Private - Audio session
+
 #if defined(WEBRTC_IOS)
 bool AudioEngineDevice::ConfigureAudioSession() {
   RTC_DCHECK_RUN_ON(thread_);
@@ -1035,4 +1037,103 @@
 }
 #endif
 
+// ----------------------------------------------------------------------------------------------------
+// Private - Debug
+
+void AudioEngineDevice::DebugAudioEngine() {
+  RTC_DCHECK_RUN_ON(thread_);
+
+  auto padded_string = [](int pad) { return std::string(pad * 2, ' '); };
+
+  auto audio_format = [](AVAudioFormat* format) {
+    std::ostringstream result;
+
+    // Get the underlying AudioStreamBasicDescription
+    const AudioStreamBasicDescription& asbd = *format.streamDescription;
+
+    result << "(";
+    // Basic properties
+    result << "sampleRate: " << format.sampleRate;
+    result << ", channels: " << format.channelCount;
+    result << ", bitsPerChannel: " << asbd.mBitsPerChannel;
+
+    // Format ID (should be LinearPCM)
+    result << ", formatID: ";
+    char formatID[5] = {0};
+    *(UInt32*)formatID = CFSwapInt32HostToBig(asbd.mFormatID);
+    result << formatID;
+    result << (asbd.mFormatID == kAudioFormatLinearPCM ? " (LinearPCM)" : " (Not LinearPCM)");
+
+    // Format Flags
+    result << std::hex << std::showbase;
+    result << ", formatFlags: " << asbd.mFormatFlags;
+
+    // Check specific flags
+    bool isFloat = (asbd.mFormatFlags & kAudioFormatFlagIsFloat);
+    bool isPacked = (asbd.mFormatFlags & kAudioFormatFlagIsPacked);
+    bool isNonInterleaved = (asbd.mFormatFlags & kAudioFormatFlagIsNonInterleaved);
+    bool isNativeEndian = (asbd.mFormatFlags & kAudioFormatFlagsNativeEndian);
+
+    bool isAudioUnitCanonical = isNativeEndian && isFloat && isPacked && isNonInterleaved;
+
+    result << std::dec;  // Switch back to decimal
+    result << " [";
+    result << "float:" << (isFloat ? "true" : "false") << ", ";
+    result << "packed:" << (isPacked ? "true" : "false") << ", ";
+    result << "non-interleaved:" << (isNonInterleaved ? "true" : "false") << ", ";
+    result << "native-endian:" << (isNativeEndian ? "true" : "false") << ", ";
+    result << "audio-unit-canonical:" << (isAudioUnitCanonical ? "true" : "false");
+    result << "]";
+
+    result << ")";
+    return result.str();
+  };
+
+  std::function<void(AVAudioNode*, int)> print_node;
+  print_node = [this, &padded_string, &audio_format](AVAudioNode* node, int base_depth = 0) {
+    LOGI() << padded_string(base_depth) << NSStringFromClass([node class]).UTF8String << "."
+           << node.hash;
+
+    // Inputs
+    for (NSUInteger i = 0; i < node.numberOfInputs; i++) {
+      AVAudioFormat* format = [node inputFormatForBus:i];
+      LOGI() << padded_string(base_depth) << " <- #" << i << audio_format(format);
+
+      AVAudioConnectionPoint* connection = [this->audio_engine_ inputConnectionPointForNode:node
+                                                                                   inputBus:i];
+      if (connection != nil) {
+        LOGI() << padded_string(base_depth + 1) << " <-> "
+               << NSStringFromClass([connection.node class]).UTF8String << "."
+               << connection.node.hash << " #" << connection.bus;
+      }
+    }
+
+    // Outputs
+    for (NSUInteger i = 0; i < node.numberOfOutputs; i++) {
+      AVAudioFormat* format = [node outputFormatForBus:i];
+      LOGI() << padded_string(base_depth) << " -> #" << i << audio_format(format);
+
+      for (NSUInteger o = 0; o < node.numberOfOutputs; o++) {
+        NSArray* points = [this->audio_engine_ outputConnectionPointsForNode:node outputBus:o];
+        for (AVAudioConnectionPoint* connection in points) {
+          LOGI() << padded_string(base_depth + 1) << " <-> "
+                 << NSStringFromClass([connection.node class]).UTF8String << "."
+                 << connection.node.hash << " #" << connection.bus;
+        }
+      }
+    }
+  };
+
+  NSArray<AVAudioNode*>* attachedNodes = [audio_engine_.attachedNodes allObjects];
+  LOGI() << "==================================================";
+  LOGI() << "DebugAudioEngine attached nodes: " << attachedNodes.count;
+
+  for (NSUInteger i = 0; i < attachedNodes.count; i++) {
+    AVAudioNode* node = attachedNodes[i];
+    print_node(node, 0);
+  }
+
+  LOGI() << "==================================================";
+}
+
 }  // namespace webrtc

From 49ca1ee31fa79a233571f7322b538b98ca8dcfc4 Mon Sep 17 00:00:00 2001
From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com>
Date: Sun, 22 Dec 2024 04:32:07 +0700
Subject: [PATCH 11/15] Check AGC

---
 modules/audio_device/audio_engine_device.mm | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/modules/audio_device/audio_engine_device.mm b/modules/audio_device/audio_engine_device.mm
index 7334bb90e4..988d15bcd3 100644
--- a/modules/audio_device/audio_engine_device.mm
+++ b/modules/audio_device/audio_engine_device.mm
@@ -843,6 +843,8 @@
         RTC_DCHECK(set_vp_result);
       }
       LOGI() << "setVoiceProcessingEnabled (input) result: " << set_vp_result ? "YES" : "NO";
+
+      RTC_DCHECK(audio_engine_.inputNode.isVoiceProcessingAGCEnabled);
     }
 
     if (audio_engine_.inputNode.voiceProcessingEnabled) {

From 6ba820c35fc496741cf8a958920e44fdda0c0487 Mon Sep 17 00:00:00 2001
From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com>
Date: Tue, 31 Dec 2024 01:57:09 +0900
Subject: [PATCH 12/15] Squashed recent progress

Fix adm selection

Fixes

Revert adm selection in audio_device_impl

Rename IsManualRenderingMode

Simplify pcm buffer delegate

Fixes

Fixes

Ducking config

Strip manual rendering logic

Runtime-ducking config

Fix compile

Fix start recording

Connect output

Buffer logic

Enable output when input is enabled
---
 .../audio_device_data_observer.cc             |   2 +-
 modules/audio_device/audio_device_impl.cc     |  16 +-
 modules/audio_device/audio_device_impl.h      |   2 +-
 modules/audio_device/audio_engine_device.h    | 120 +--
 modules/audio_device/audio_engine_device.mm   | 801 ++++++++++++------
 modules/audio_device/include/audio_device.h   |  36 +-
 sdk/BUILD.gn                                  |   2 +
 sdk/objc/api/RTCAudioRendererAdapter.mm       |  49 +-
 .../api/peerconnection/RTCAudioDeviceModule.h |  27 +-
 .../peerconnection/RTCAudioDeviceModule.mm    | 266 ++++--
 .../RTCPeerConnectionFactory+Native.h         |   4 +-
 .../RTCPeerConnectionFactory.mm               |  38 +-
 .../RTCPeerConnectionFactoryBuilder.mm        |   2 +-
 .../audio/RTCAudioSession+Private.h           |   3 -
 sdk/objc/components/audio/RTCAudioSession.h   |   4 -
 sdk/objc/components/audio/RTCAudioSession.mm  |  12 -
 .../src/audio/audio_device_module_ios.h       |   6 +-
 .../src/audio/audio_device_module_ios.mm      |   6 +-
 18 files changed, 904 insertions(+), 492 deletions(-)

diff --git a/modules/audio_device/audio_device_data_observer.cc b/modules/audio_device/audio_device_data_observer.cc
index 313acd5c52..f25b451ffc 100644
--- a/modules/audio_device/audio_device_data_observer.cc
+++ b/modules/audio_device/audio_device_data_observer.cc
@@ -307,7 +307,7 @@ class ADMWrapper : public AudioDeviceModule, public AudioTransport {
   }
 #endif  // WEBRTC_IOS
 
-  int32_t SetObserver(AudioDeviceObserver* observer) const override {
+  int32_t SetObserver(AudioDeviceObserver* observer) override {
     return impl_->SetObserver(observer);
   }
 
diff --git a/modules/audio_device/audio_device_impl.cc b/modules/audio_device/audio_device_impl.cc
index 1308d677ef..eb1e20ed9f 100644
--- a/modules/audio_device/audio_device_impl.cc
+++ b/modules/audio_device/audio_device_impl.cc
@@ -34,12 +34,11 @@
 #if defined(WEBRTC_ENABLE_LINUX_PULSE)
 #include "modules/audio_device/linux/audio_device_pulse_linux.h"
 #endif
+#elif defined(WEBRTC_IOS)
+#include "sdk/objc/native/src/audio/audio_device_ios.h"
+#elif defined(WEBRTC_MAC)
+#include "modules/audio_device/mac/audio_device_mac.h"
 #endif
-
-#if defined(WEBRTC_IOS) || defined(WEBRTC_MAC)
-#include "modules/audio_device/audio_engine_device.h"
-#endif
-
 #if defined(WEBRTC_DUMMY_FILE_DEVICES)
 #include "modules/audio_device/dummy/file_audio_device.h"
 #include "modules/audio_device/dummy/file_audio_device_factory.h"
@@ -247,7 +246,8 @@ int32_t AudioDeviceModuleImpl::CreatePlatformSpecificObjects() {
 // iOS ADM implementation.
 #if defined(WEBRTC_IOS)
   if (audio_layer == kPlatformDefaultAudio) {
-    audio_device_.reset(new AudioEngineDevice(/*bypass_voice_processing=*/bypass_voice_processing_));
+    audio_device_.reset(
+        new ios_adm::AudioDeviceIOS(/*bypass_voice_processing=*/bypass_voice_processing_));
     RTC_LOG(LS_INFO) << "iPhone Audio APIs will be utilized.";
   }
 // END #if defined(WEBRTC_IOS)
@@ -255,7 +255,7 @@ int32_t AudioDeviceModuleImpl::CreatePlatformSpecificObjects() {
 // Mac OS X ADM implementation.
 #elif defined(WEBRTC_MAC)
   if (audio_layer == kPlatformDefaultAudio) {
-    audio_device_.reset(new AudioEngineDevice(/*bypass_voice_processing=*/false));
+    audio_device_.reset(new AudioDeviceMac());
     RTC_LOG(LS_INFO) << "Mac OS X Audio APIs will be utilized.";
   }
 #endif  // WEBRTC_MAC
@@ -902,7 +902,7 @@ int AudioDeviceModuleImpl::GetRecordAudioParameters(
 }
 #endif  // WEBRTC_IOS
 
-int32_t AudioDeviceModuleImpl::SetObserver(AudioDeviceObserver* observer) const {
+int32_t AudioDeviceModuleImpl::SetObserver(AudioDeviceObserver* observer) {
   RTC_LOG(LS_INFO) << __FUNCTION__ << "(" << observer << ")";
   int32_t ok = audio_device_->SetObserver(observer);
   RTC_LOG(LS_INFO) << "output: " << ok;
diff --git a/modules/audio_device/audio_device_impl.h b/modules/audio_device/audio_device_impl.h
index 02c763e82c..d1f253871c 100644
--- a/modules/audio_device/audio_device_impl.h
+++ b/modules/audio_device/audio_device_impl.h
@@ -156,7 +156,7 @@ class AudioDeviceModuleImpl : public AudioDeviceModuleForTest {
   int GetRecordAudioParameters(AudioParameters* params) const override;
 #endif  // WEBRTC_IOS
 
-  int32_t SetObserver(AudioDeviceObserver* observer) const override;
+  int32_t SetObserver(AudioDeviceObserver* observer) override;
   int32_t GetPlayoutDevice() const override;
   int32_t GetRecordingDevice() const override;
 
diff --git a/modules/audio_device/audio_engine_device.h b/modules/audio_device/audio_engine_device.h
index 7f774a191e..0b95aafcfe 100644
--- a/modules/audio_device/audio_engine_device.h
+++ b/modules/audio_device/audio_engine_device.h
@@ -17,6 +17,8 @@
 #ifndef SDK_OBJC_NATIVE_SRC_AUDIO_AUDIO_DEVICE_AUDIOENGINE_H_
 #define SDK_OBJC_NATIVE_SRC_AUDIO_AUDIO_DEVICE_AUDIOENGINE_H_
 
+#import <AVFAudio/AVFAudio.h>
+
 #include <atomic>
 #include <memory>
 
@@ -31,26 +33,18 @@
 #include "sdk/objc/native/src/audio/audio_session_observer.h"
 
 RTC_FWD_DECL_OBJC_CLASS(RTC_OBJC_TYPE(RTCNativeAudioSessionDelegateAdapter));
-RTC_FWD_DECL_OBJC_CLASS(AVAudioEngine);
-RTC_FWD_DECL_OBJC_CLASS(AVAudioSourceNode);
-RTC_FWD_DECL_OBJC_CLASS(AVAudioSinkNode);
-RTC_FWD_DECL_OBJC_CLASS(AVAudioFormat);
-RTC_FWD_DECL_OBJC_CLASS(AVAudioMixerNode);
-RTC_FWD_DECL_OBJC_CLASS(AVAudioUnitEQ);
 
 namespace webrtc {
 
 class FineAudioBuffer;
 
-class AudioEngineDevice : public AudioDeviceGeneric,
+class AudioEngineDevice : public AudioDeviceModule,
                           public AudioSessionObserver {
  public:
   explicit AudioEngineDevice(bool bypass_voice_processing);
   ~AudioEngineDevice() override;
 
-  void AttachAudioBuffer(AudioDeviceBuffer* audioBuffer) override;
-
-  InitStatus Init() override;
+  int32_t Init() override;
   int32_t Terminate() override;
   bool Initialized() const override;
 
@@ -68,16 +62,18 @@ class AudioEngineDevice : public AudioDeviceGeneric,
   int32_t StopRecording() override;
   bool Recording() const override;
 
-  int32_t PlayoutDelay(uint16_t& delayMS) const override;
+  int32_t PlayoutDelay(uint16_t* delayMS) const override;
   int32_t GetPlayoutUnderrunCount() const override { return -1; }
 
-  // int GetPlayoutAudioParameters(AudioParameters* params) const override;
-  // int GetRecordAudioParameters(AudioParameters* params) const override;
+#if defined(WEBRTC_IOS)
+  int GetPlayoutAudioParameters(AudioParameters* params) const override;
+  int GetRecordAudioParameters(AudioParameters* params) const override;
+#endif
 
   int32_t ActiveAudioLayer(
-      AudioDeviceModule::AudioLayer& audioLayer) const override;
-  int32_t PlayoutIsAvailable(bool& available) override;
-  int32_t RecordingIsAvailable(bool& available) override;
+      AudioDeviceModule::AudioLayer* audioLayer) const override;
+  int32_t PlayoutIsAvailable(bool* available) override;
+  int32_t RecordingIsAvailable(bool* available) override;
   int16_t PlayoutDevices() override;
   int16_t RecordingDevices() override;
   int32_t PlayoutDeviceName(uint16_t index, char name[kAdmMaxDeviceNameSize],
@@ -94,28 +90,40 @@ class AudioEngineDevice : public AudioDeviceGeneric,
   bool SpeakerIsInitialized() const override;
   int32_t InitMicrophone() override;
   bool MicrophoneIsInitialized() const override;
-  int32_t SpeakerVolumeIsAvailable(bool& available) override;
+  int32_t SpeakerVolumeIsAvailable(bool* available) override;
   int32_t SetSpeakerVolume(uint32_t volume) override;
-  int32_t SpeakerVolume(uint32_t& volume) const override;
-  int32_t MaxSpeakerVolume(uint32_t& maxVolume) const override;
-  int32_t MinSpeakerVolume(uint32_t& minVolume) const override;
-  int32_t MicrophoneVolumeIsAvailable(bool& available) override;
+  int32_t SpeakerVolume(uint32_t* volume) const override;
+  int32_t MaxSpeakerVolume(uint32_t* maxVolume) const override;
+  int32_t MinSpeakerVolume(uint32_t* minVolume) const override;
+  int32_t MicrophoneVolumeIsAvailable(bool* available) override;
   int32_t SetMicrophoneVolume(uint32_t volume) override;
-  int32_t MicrophoneVolume(uint32_t& volume) const override;
-  int32_t MaxMicrophoneVolume(uint32_t& maxVolume) const override;
-  int32_t MinMicrophoneVolume(uint32_t& minVolume) const override;
-  int32_t MicrophoneMuteIsAvailable(bool& available) override;
+  int32_t MicrophoneVolume(uint32_t* volume) const override;
+  int32_t MaxMicrophoneVolume(uint32_t* maxVolume) const override;
+  int32_t MinMicrophoneVolume(uint32_t* minVolume) const override;
+  int32_t MicrophoneMuteIsAvailable(bool* available) override;
   int32_t SetMicrophoneMute(bool enable) override;
-  int32_t MicrophoneMute(bool& enabled) const override;
-  int32_t SpeakerMuteIsAvailable(bool& available) override;
+  int32_t MicrophoneMute(bool* enabled) const override;
+  int32_t SpeakerMuteIsAvailable(bool* available) override;
   int32_t SetSpeakerMute(bool enable) override;
-  int32_t SpeakerMute(bool& enabled) const override;
-  int32_t StereoPlayoutIsAvailable(bool& available) override;
+  int32_t SpeakerMute(bool* enabled) const override;
+  int32_t StereoPlayoutIsAvailable(bool* available) const override;
   int32_t SetStereoPlayout(bool enable) override;
-  int32_t StereoPlayout(bool& enabled) const override;
-  int32_t StereoRecordingIsAvailable(bool& available) override;
+  int32_t StereoPlayout(bool* enabled) const override;
+  int32_t StereoRecordingIsAvailable(bool* available) const override;
   int32_t SetStereoRecording(bool enable) override;
-  int32_t StereoRecording(bool& enabled) const override;
+  int32_t StereoRecording(bool* enabled) const override;
+
+  int32_t RegisterAudioCallback(AudioTransport* audioCallback) override;
+
+  // Only supported on Android.
+  bool BuiltInAECIsAvailable() const override;
+  bool BuiltInAGCIsAvailable() const override;
+  bool BuiltInNSIsAvailable() const override;
+
+  // Enables the built-in audio effects. Only supported on Android.
+  int32_t EnableBuiltInAEC(bool enable) override;
+  int32_t EnableBuiltInAGC(bool enable) override;
+  int32_t EnableBuiltInNS(bool enable) override;
 
   // AudioSessionObserver methods. May be called from any thread.
   void OnInterruptionBegin() override;
@@ -128,6 +136,17 @@ class AudioEngineDevice : public AudioDeviceGeneric,
 
   int32_t SetObserver(AudioDeviceObserver* observer) override;
 
+  int32_t SetManualRenderingMode(bool enable);
+  int32_t ManualRenderingMode(bool* enabled);
+
+  int32_t SetAdvancedDucking(bool enable);
+  int32_t AdvancedDucking(bool* enabled);
+
+  int32_t SetDuckingLevel(long level);
+  int32_t DuckingLevel(long* level);
+
+  int32_t InitAndStartRecording();
+
  private:
   struct EngineState {
     bool input_enabled = false;
@@ -138,6 +157,11 @@ class AudioEngineDevice : public AudioDeviceGeneric,
     bool input_muted = false;
     bool is_interrupted = false;
 
+    bool is_manual_mode = false;
+    bool voice_processing = true;
+    bool advanced_ducking = true;
+    long ducking_level = 0; // 0 = Default
+
     bool operator==(const EngineState& rhs) const;
     bool operator!=(const EngineState& rhs) const;
 
@@ -150,40 +174,36 @@ class AudioEngineDevice : public AudioDeviceGeneric,
 
   EngineState engine_state_ RTC_GUARDED_BY(thread_);
 
+  AVAudioInputNode* InputNode();
+  AVAudioOutputNode* OutputNode();
+
   bool IsMicrophonePermissionGranted();
   void SetEngineState(std::function<EngineState(EngineState)> state_transform);
   void UpdateEngineState(EngineState old_state, EngineState new_state);
 
-  // Configures the audio session for WebRTC.
-  bool ConfigureAudioSession();
-
-  // Like above, but requires caller to already hold session lock.
-  bool ConfigureAudioSessionLocked();
-
-  // Unconfigures the audio session.
-  void UnconfigureAudioSession();
-
   // AudioEngine observer methods. May be called from any thread.
   void OnEngineConfigurationChange();
 
   void DebugAudioEngine();
 
+  void StartRenderLoop();
+  AVAudioEngineManualRenderingBlock render_block_;
+
   // Determines whether voice processing should be enabled or disabled.
   const bool bypass_voice_processing_;
 
-  // Native I/O audio thread checker.
-  SequenceChecker io_thread_checker_;
-
   // Thread that this object is created on.
   rtc::Thread* thread_;
+  std::unique_ptr<rtc::Thread> render_thread_;
+  AVAudioPCMBuffer* render_buffer_;
 
-  AudioDeviceBuffer* audio_device_buffer_;
+  const std::unique_ptr<TaskQueueFactory> task_queue_factory_;
+  std::unique_ptr<AudioDeviceBuffer> audio_device_buffer_;
+  std::unique_ptr<FineAudioBuffer> fine_audio_buffer_;
 
   AudioParameters playout_parameters_;
   AudioParameters record_parameters_;
 
-  std::unique_ptr<FineAudioBuffer> fine_audio_buffer_;
-
   // Set to true after successful call to Init(), false otherwise.
   bool initialized_ RTC_GUARDED_BY(thread_);
 
@@ -193,9 +213,6 @@ class AudioEngineDevice : public AudioDeviceGeneric,
   RTC_OBJC_TYPE(RTCNativeAudioSessionDelegateAdapter) * audio_session_observer_
       RTC_GUARDED_BY(thread_);
 
-  // Set to true if we've activated the audio session.
-  bool has_configured_session_ RTC_GUARDED_BY(thread_);
-
   // Avoids running pending task after `this` is Terminated.
   rtc::scoped_refptr<PendingTaskSafetyFlag> safety_ =
       PendingTaskSafetyFlag::Create();
@@ -206,8 +223,7 @@ class AudioEngineDevice : public AudioDeviceGeneric,
 
   // AVAudioEngine objects
   AVAudioEngine* audio_engine_;
-  AVAudioFormat* rtc_internal_format_;     // Int16
-  AVAudioFormat* engine_internal_format_;  // Float32
+  AVAudioFormat* manual_render_rtc_format_;     // Int16
 
   // Output related
   AVAudioSourceNode* source_node_;
diff --git a/modules/audio_device/audio_engine_device.mm b/modules/audio_device/audio_engine_device.mm
index 988d15bcd3..c48976a45f 100644
--- a/modules/audio_device/audio_engine_device.mm
+++ b/modules/audio_device/audio_engine_device.mm
@@ -23,6 +23,7 @@
 #include <cmath>
 
 #include "api/array_view.h"
+#include "api/task_queue/default_task_queue_factory.h"
 #include "api/task_queue/pending_task_safety_flag.h"
 #include "modules/audio_device/fine_audio_buffer.h"
 #include "rtc_base/checks.h"
@@ -49,20 +50,17 @@
 const UInt16 kFixedPlayoutDelayEstimate = 30;
 const UInt16 kFixedRecordDelayEstimate = 30;
 
-const uint32_t N_REC_SAMPLES_PER_SEC = 48000;
-const uint32_t N_PLAY_SAMPLES_PER_SEC = 48000;
-const uint32_t N_REC_CHANNELS = 1;   // default is mono recording
-const uint32_t N_PLAY_CHANNELS = 1;  // default is stereo playout
+const size_t kMaximumFramesPerBuffer = 3072;  // Maximum slice size for VoiceProcessingIO
+const size_t kAudioSampleSize = 2;            // Signed 16-bit integer
 
 AudioEngineDevice::AudioEngineDevice(bool bypass_voice_processing)
     : bypass_voice_processing_(bypass_voice_processing),
-      audio_device_buffer_(nullptr),
-      initialized_(false),
-      has_configured_session_(false) {
+      task_queue_factory_(CreateDefaultTaskQueueFactory()),
+      initialized_(false) {
   LOGI() << "bypass_voice_processing " << bypass_voice_processing_;
 
-  io_thread_checker_.Detach();
   thread_ = rtc::Thread::Current();
+  audio_device_buffer_.reset(new webrtc::AudioDeviceBuffer(task_queue_factory_.get()));
 
 #if defined(WEBRTC_IOS)
   audio_session_observer_ =
@@ -85,6 +83,12 @@
   mach_timebase_info_data_t tinfo;
   mach_timebase_info(&tinfo);
   machTickUnitsToNanoseconds_ = (double)tinfo.numer / tinfo.denom;
+
+  // Manual rendering formats are fixed to 48k for now.
+  manual_render_rtc_format_ = [[AVAudioFormat alloc] initWithCommonFormat:AVAudioPCMFormatInt16
+                                                               sampleRate:48000
+                                                                 channels:1
+                                                              interleaved:YES];
 }
 
 AudioEngineDevice::~AudioEngineDevice() {
@@ -102,21 +106,6 @@
   audio_session_observer_ = nil;
 }
 
-void AudioEngineDevice::AttachAudioBuffer(AudioDeviceBuffer* audioBuffer) {
-  LOGI() << "AttachAudioBuffer";
-  RTC_DCHECK(audioBuffer);
-  RTC_DCHECK_RUN_ON(thread_);
-  audio_device_buffer_ = audioBuffer;
-
-  // Fixes values for mac.
-  audio_device_buffer_->SetRecordingSampleRate(N_REC_SAMPLES_PER_SEC);
-  audio_device_buffer_->SetPlayoutSampleRate(N_PLAY_SAMPLES_PER_SEC);
-  audio_device_buffer_->SetRecordingChannels(N_REC_CHANNELS);
-  audio_device_buffer_->SetPlayoutChannels(N_PLAY_CHANNELS);
-
-  fine_audio_buffer_.reset(new FineAudioBuffer(audio_device_buffer_));
-}
-
 // MARK: - Main life cycle
 
 bool AudioEngineDevice::Initialized() const {
@@ -126,13 +115,13 @@
   return initialized_;
 }
 
-AudioDeviceGeneric::InitStatus AudioEngineDevice::Init() {
+int32_t AudioEngineDevice::Init() {
   LOGI() << "Init";
-  io_thread_checker_.Detach();
-
   RTC_DCHECK_RUN_ON(thread_);
+
   if (initialized_) {
-    return InitStatus::OK;
+    LOGW() << "Init: Already initialized";
+    return 0;
   }
 
 #if defined(WEBRTC_IOS)
@@ -142,19 +131,8 @@
   record_parameters_.reset(config.sampleRate, config.inputNumberOfChannels);
 #endif
 
-  rtc_internal_format_ = [[AVAudioFormat alloc] initWithCommonFormat:AVAudioPCMFormatInt16
-                                                          sampleRate:48000.0
-                                                            channels:1
-                                                         interleaved:YES];
-
-  engine_internal_format_ = [[AVAudioFormat alloc] initWithCommonFormat:AVAudioPCMFormatFloat32
-                                                             sampleRate:48000.0
-                                                               channels:1
-                                                            interleaved:NO];
-
   initialized_ = true;
-
-  return InitStatus::OK;
+  return 0;
 }
 
 int32_t AudioEngineDevice::Terminate() {
@@ -192,8 +170,6 @@
   LOGI() << "InitPlayout";
   RTC_DCHECK_RUN_ON(thread_);
   RTC_DCHECK(initialized_);
-  RTC_DCHECK(!engine_state_.output_enabled);
-  RTC_DCHECK(!engine_state_.output_running);
 
   if (engine_state_.output_enabled) {
     LOGW() << "InitPlayout: Already initialized";
@@ -212,7 +188,6 @@
   LOGI() << "StartPlayout";
   RTC_DCHECK_RUN_ON(thread_);
   RTC_DCHECK(engine_state_.output_enabled);
-  RTC_DCHECK(!engine_state_.output_running);
 
   if (!engine_state_.output_enabled) {
     LOGW() << "StartPlayout: Not initialized";
@@ -224,10 +199,6 @@
     return 0;
   }
 
-  if (fine_audio_buffer_) {
-    fine_audio_buffer_->ResetPlayout();
-  }
-
   SetEngineState([](EngineState state) -> EngineState {
     state.output_running = true;
     return state;
@@ -256,6 +227,8 @@
     return state;
   });
 
+  audio_device_buffer_->StopPlayout();
+
   return 0;
 }
 
@@ -280,8 +253,6 @@
   LOGI() << "InitRecording";
   RTC_DCHECK_RUN_ON(thread_);
   RTC_DCHECK(initialized_);
-  RTC_DCHECK(!engine_state_.input_enabled);
-  RTC_DCHECK(!engine_state_.input_running);
 
   if (engine_state_.input_enabled) {
     LOGW() << "InitRecording: Already initialized";
@@ -289,7 +260,9 @@
   }
 
   SetEngineState([](EngineState state) -> EngineState {
+    state.output_enabled = true;
     state.input_enabled = true;
+    state.input_muted = true;  // Muted by default
     return state;
   });
 
@@ -300,7 +273,6 @@
   LOGI() << "StartRecording";
   RTC_DCHECK_RUN_ON(thread_);
   RTC_DCHECK(engine_state_.input_enabled);
-  RTC_DCHECK(!engine_state_.input_running);
 
   if (!engine_state_.input_enabled) {
     LOGW() << "StartRecording: Not initialized";
@@ -312,10 +284,6 @@
     return 0;
   }
 
-  if (fine_audio_buffer_) {
-    fine_audio_buffer_->ResetRecord();
-  }
-
   SetEngineState([](EngineState state) -> EngineState {
     state.input_running = true;
     state.input_muted = false;  // Always unmute
@@ -345,6 +313,8 @@
     return state;
   });
 
+  audio_device_buffer_->StopRecording();
+
   return 0;
 }
 
@@ -400,9 +370,13 @@
   return engine_state_.is_interrupted;
 }
 
-int32_t AudioEngineDevice::ActiveAudioLayer(AudioDeviceModule::AudioLayer& audioLayer) const {
+int32_t AudioEngineDevice::ActiveAudioLayer(AudioDeviceModule::AudioLayer* audioLayer) const {
   LOGI() << "ActiveAudioLayer";
-  audioLayer = AudioDeviceModule::kPlatformDefaultAudio;
+  if (audioLayer == nullptr) {
+    return -1;
+  }
+
+  *audioLayer = AudioDeviceModule::kPlatformDefaultAudio;
 
   return 0;
 }
@@ -419,9 +393,13 @@
   return true;
 }
 
-int32_t AudioEngineDevice::SpeakerVolumeIsAvailable(bool& available) {
+int32_t AudioEngineDevice::SpeakerVolumeIsAvailable(bool* available) {
   LOGI() << "SpeakerVolumeIsAvailable";
-  available = false;
+  if (available == nullptr) {
+    return -1;
+  }
+
+  *available = false;
 
   return 0;
 }
@@ -432,27 +410,31 @@
   return -1;
 }
 
-int32_t AudioEngineDevice::SpeakerVolume(uint32_t& volume) const {
+int32_t AudioEngineDevice::SpeakerVolume(uint32_t* volume) const {
   LOGW() << "SpeakerVolume: Not implemented";
 
   return -1;
 }
 
-int32_t AudioEngineDevice::MaxSpeakerVolume(uint32_t& maxVolume) const {
+int32_t AudioEngineDevice::MaxSpeakerVolume(uint32_t* maxVolume) const {
   LOGW() << "MaxSpeakerVolume: Not implemented";
 
   return -1;
 }
 
-int32_t AudioEngineDevice::MinSpeakerVolume(uint32_t& minVolume) const {
+int32_t AudioEngineDevice::MinSpeakerVolume(uint32_t* minVolume) const {
   LOGW() << "MinSpeakerVolume: Not implemented";
 
   return -1;
 }
 
-int32_t AudioEngineDevice::SpeakerMuteIsAvailable(bool& available) {
+int32_t AudioEngineDevice::SpeakerMuteIsAvailable(bool* available) {
   LOGI() << "SpeakerMuteIsAvailable";
-  available = false;
+  if (available == nullptr) {
+    return -1;
+  }
+
+  *available = false;
 
   return 0;
 }
@@ -463,7 +445,7 @@
   return -1;
 }
 
-int32_t AudioEngineDevice::SpeakerMute(bool& enabled) const {
+int32_t AudioEngineDevice::SpeakerMute(bool* enabled) const {
   LOGW() << "SpeakerMute: Not implemented";
 
   return -1;
@@ -486,10 +468,15 @@
 // ----------------------------------------------------------------------------------------------------
 // Microphone Muting
 
-int32_t AudioEngineDevice::MicrophoneMuteIsAvailable(bool& available) {
+int32_t AudioEngineDevice::MicrophoneMuteIsAvailable(bool* available) {
   RTC_DCHECK_RUN_ON(thread_);
   LOGI() << "MicrophoneMuteIsAvailable";
-  available = true;
+  if (available == nullptr) {
+    return -1;
+  }
+
+  *available = true;
+
   return 0;
 }
 
@@ -505,11 +492,15 @@
   return 0;
 }
 
-int32_t AudioEngineDevice::MicrophoneMute(bool& enabled) const {
+int32_t AudioEngineDevice::MicrophoneMute(bool* enabled) const {
   RTC_DCHECK_RUN_ON(thread_);
   LOGI() << "MicrophoneMute";
 
-  enabled = engine_state_.input_muted;
+  if (enabled == nullptr) {
+    return -1;
+  }
+
+  *enabled = engine_state_.input_muted;
 
   return 0;
 }
@@ -517,9 +508,13 @@
 // ----------------------------------------------------------------------------------------------------
 // Stereo Playout
 
-int32_t AudioEngineDevice::StereoPlayoutIsAvailable(bool& available) {
+int32_t AudioEngineDevice::StereoPlayoutIsAvailable(bool* available) const {
   LOGI() << "StereoPlayoutIsAvailable";
-  available = false;
+  if (available == nullptr) {
+    return -1;
+  }
+
+  *available = false;
 
   return 0;
 }
@@ -527,12 +522,18 @@
 int32_t AudioEngineDevice::SetStereoPlayout(bool enable) {
   LOGW() << "SetStereoPlayout: Not implemented, value:" << enable;
 
-  return -1;
+  audio_device_buffer_->SetPlayoutChannels(1);
+
+  return 0;
 }
 
-int32_t AudioEngineDevice::StereoPlayout(bool& enabled) const {
+int32_t AudioEngineDevice::StereoPlayout(bool* enabled) const {
   LOGI() << "StereoPlayout";
-  enabled = false;
+  if (enabled == nullptr) {
+    return -1;
+  }
+
+  *enabled = false;
 
   return 0;
 }
@@ -540,9 +541,13 @@
 // ----------------------------------------------------------------------------------------------------
 // Stereo Recording
 
-int32_t AudioEngineDevice::StereoRecordingIsAvailable(bool& available) {
-  LOGI() << "StereoPlayoutIsAvailable";
-  available = false;
+int32_t AudioEngineDevice::StereoRecordingIsAvailable(bool* available) const {
+  LOGI() << "StereoRecordingIsAvailable";
+  if (available == nullptr) {
+    return -1;
+  }
+
+  *available = false;
 
   return 0;
 }
@@ -550,12 +555,18 @@
 int32_t AudioEngineDevice::SetStereoRecording(bool enable) {
   LOGW() << "SetStereoRecording: Not implemented, value: " << enable;
 
-  return -1;
+  audio_device_buffer_->SetRecordingChannels(1);
+
+  return 0;
 }
 
-int32_t AudioEngineDevice::StereoRecording(bool& enabled) const {
+int32_t AudioEngineDevice::StereoRecording(bool* enabled) const {
   LOGI() << "StereoRecording";
-  enabled = false;
+  if (enabled == nullptr) {
+    return -1;
+  }
+
+  *enabled = false;
 
   return 0;
 }
@@ -563,9 +574,13 @@
 // ----------------------------------------------------------------------------------------------------
 // Microphone Volume
 
-int32_t AudioEngineDevice::MicrophoneVolumeIsAvailable(bool& available) {
+int32_t AudioEngineDevice::MicrophoneVolumeIsAvailable(bool* available) {
   LOGI() << "MicrophoneVolumeIsAvailable";
-  available = false;
+  if (available == nullptr) {
+    return -1;
+  }
+
+  *available = false;
 
   return 0;
 }
@@ -576,19 +591,19 @@
   return -1;
 }
 
-int32_t AudioEngineDevice::MicrophoneVolume(uint32_t& volume) const {
+int32_t AudioEngineDevice::MicrophoneVolume(uint32_t* volume) const {
   LOGW() << "SetMicrophoneVolume: Not implemented";
 
   return -1;
 }
 
-int32_t AudioEngineDevice::MaxMicrophoneVolume(uint32_t& maxVolume) const {
+int32_t AudioEngineDevice::MaxMicrophoneVolume(uint32_t* maxVolume) const {
   LOGW() << "SetMicrophoneVolume: Not implemented";
 
   return -1;
 }
 
-int32_t AudioEngineDevice::MinMicrophoneVolume(uint32_t& minVolume) const {
+int32_t AudioEngineDevice::MinMicrophoneVolume(uint32_t* minVolume) const {
   LOGW() << "MinMicrophoneVolume: Not implemented";
 
   return -1;
@@ -597,9 +612,13 @@
 // ----------------------------------------------------------------------------------------------------
 // Playout Device
 
-int32_t AudioEngineDevice::PlayoutIsAvailable(bool& available) {
+int32_t AudioEngineDevice::PlayoutIsAvailable(bool* available) {
   LOGI() << "PlayoutIsAvailable";
-  available = true;
+  if (available == nullptr) {
+    return -1;
+  }
+
+  *available = true;
 
   return 0;
 }
@@ -618,13 +637,13 @@
 
 int32_t AudioEngineDevice::PlayoutDeviceName(uint16_t index, char name[kAdmMaxDeviceNameSize],
                                              char guid[kAdmMaxGuidSize]) {
-  LOGW() << "PlayoutDeviceName: Not implemented";
+  // LOGW() << "PlayoutDeviceName: Not implemented";
 
   return -1;
 }
 
 int16_t AudioEngineDevice::PlayoutDevices() {
-  LOGI() << "PlayoutDevices";
+  // LOGI() << "PlayoutDevices";
 
   return (int16_t)1;
 }
@@ -634,7 +653,7 @@
 
 int32_t AudioEngineDevice::RecordingDeviceName(uint16_t index, char name[kAdmMaxDeviceNameSize],
                                                char guid[kAdmMaxGuidSize]) {
-  LOGW() << "RecordingDeviceName";
+  // LOGW() << "RecordingDeviceName";
 
   return -1;
 }
@@ -651,24 +670,65 @@
   return -1;
 }
 
-int32_t AudioEngineDevice::RecordingIsAvailable(bool& available) {
+int32_t AudioEngineDevice::RecordingIsAvailable(bool* available) {
   LOGI() << "RecordingIsAvailable";
+  if (available == nullptr) {
+    return -1;
+  }
+
+  *available = true;
 
-  available = true;
   return 0;
 }
 
 int16_t AudioEngineDevice::RecordingDevices() {
-  LOGI() << "RecordingDevices";
+  // LOGI() << "RecordingDevices";
 
   return (int16_t)1;
 }
 
+//
+
+int32_t AudioEngineDevice::RegisterAudioCallback(AudioTransport* audioCallback) {
+  LOGI() << "RegisterAudioCallback";
+  RTC_DCHECK_RUN_ON(thread_);
+  RTC_DCHECK(audio_device_buffer_ != nullptr);
+  RTC_DCHECK(audioCallback != nullptr);
+
+  return audio_device_buffer_->RegisterAudioCallback(audioCallback);
+}
+
+// ----------------------------------------------------------------------------------------------------
+// Misc
+
+bool AudioEngineDevice::BuiltInAECIsAvailable() const { return false; }
+
+bool AudioEngineDevice::BuiltInAGCIsAvailable() const { return false; }
+
+bool AudioEngineDevice::BuiltInNSIsAvailable() const { return false; }
+
+int32_t AudioEngineDevice::EnableBuiltInAEC(bool enable) { return -1; }
+
+int32_t AudioEngineDevice::EnableBuiltInAGC(bool enable) { return -1; }
+
+int32_t AudioEngineDevice::EnableBuiltInNS(bool enable) { return -1; }
+
 // ----------------------------------------------------------------------------------------------------
 // Misc
 
-int32_t AudioEngineDevice::PlayoutDelay(uint16_t& delayMS) const {
-  delayMS = kFixedPlayoutDelayEstimate;
+#if defined(WEBRTC_IOS)
+int AudioEngineDevice::GetPlayoutAudioParameters(AudioParameters* params) const { return -1; }
+int AudioEngineDevice::GetRecordAudioParameters(AudioParameters* params) const { return -1; }
+#endif
+
+int32_t AudioEngineDevice::PlayoutDelay(uint16_t* delayMS) const {
+  // LOGI() << "PlayoutDelay";
+  if (delayMS == nullptr) {
+    return -1;
+  }
+
+  *delayMS = kFixedPlayoutDelayEstimate;
+
   return 0;
 }
 
@@ -677,6 +737,113 @@
   RTC_DCHECK_RUN_ON(thread_);
 
   observer_ = observer;
+
+  return 0;
+}
+
+// ----------------------------------------------------------------------------------------------------
+// Unique methods to AudioEngineDevice
+
+int32_t AudioEngineDevice::ManualRenderingMode(bool* enabled) {
+  LOGI() << "ManualRenderingMode";
+  RTC_DCHECK_RUN_ON(thread_);
+
+  if (enabled == nullptr) {
+    return -1;
+  }
+
+  *enabled = engine_state_.is_manual_mode;
+
+  return 0;
+}
+
+int32_t AudioEngineDevice::SetManualRenderingMode(bool enable) {
+  RTC_DCHECK_RUN_ON(thread_);
+  LOGI() << "SetManualRenderingMode: " << enable;
+
+  SetEngineState([enable](EngineState state) -> EngineState {
+    state.is_manual_mode = enable;
+    return state;
+  });
+
+  return 0;
+}
+
+int32_t AudioEngineDevice::InitAndStartRecording() {
+  RTC_DCHECK_RUN_ON(thread_);
+  LOGI() << "InitAndStartRecording";
+
+  if (engine_state_.input_running) {
+    LOGW() << "InitAndStartRecording: Already recording";
+    return 0;
+  }
+
+  audio_device_buffer_->StartRecording();
+
+  if (fine_audio_buffer_) {
+    fine_audio_buffer_->ResetRecord();
+  }
+
+  SetEngineState([](EngineState state) -> EngineState {
+    state.output_enabled = true;
+    state.output_running = true;
+    state.input_enabled = true;
+    state.input_running = true;
+    state.input_muted = false;  // Always unmute
+    return state;
+  });
+
+  return 0;
+}
+
+int32_t AudioEngineDevice::SetAdvancedDucking(bool enable) {
+  RTC_DCHECK_RUN_ON(thread_);
+  LOGI() << "SetAdvancedDucking: " << enable;
+
+  SetEngineState([enable](EngineState state) -> EngineState {
+    state.advanced_ducking = enable;
+    return state;
+  });
+
+  return 0;
+}
+
+int32_t AudioEngineDevice::AdvancedDucking(bool* enabled) {
+  RTC_DCHECK_RUN_ON(thread_);
+
+  if (enabled == nullptr) {
+    return -1;
+  }
+
+  *enabled = engine_state_.advanced_ducking;
+  LOGI() << "AdvancedDucking value: " << *enabled;
+
+  return 0;
+}
+
+int32_t AudioEngineDevice::SetDuckingLevel(long level) {
+  RTC_DCHECK_RUN_ON(thread_);
+  LOGI() << "SetDuckingLevel: " << level;
+
+  SetEngineState([level](EngineState state) -> EngineState {
+    state.ducking_level = level;
+    return state;
+  });
+
+  return 0;
+}
+
+int32_t AudioEngineDevice::DuckingLevel(long* level) {
+  LOGI() << "DuckingLevel";
+  RTC_DCHECK_RUN_ON(thread_);
+
+  if (level == nullptr) {
+    return -1;
+  }
+
+  *level = engine_state_.ducking_level;
+  LOGI() << "DuckingLevel value: " << *level;
+
   return 0;
 }
 
@@ -686,19 +853,19 @@
 void AudioEngineDevice::OnEngineConfigurationChange() {
   LOGI() << "OnEngineConfigurationChange";
 
-  thread_->PostTask(SafeTask(safety_, [this] {
-    RTC_DCHECK_RUN_ON(thread_);
+  // thread_->PostTask(SafeTask(safety_, [this] {
+  //   RTC_DCHECK_RUN_ON(thread_);
 
-    EngineState previous_state = this->engine_state_;
+  //   EngineState previous_state = this->engine_state_;
 
-    this->SetEngineState([](EngineState state) -> EngineState {
-      return EngineState();  // Return default state to shutdown
-    });
+  //   this->SetEngineState([](EngineState state) -> EngineState {
+  //     return EngineState();  // Return default state to shutdown
+  //   });
 
-    this->SetEngineState([previous_state](EngineState state) -> EngineState {
-      return previous_state;  // Recover engine state
-    });
-  }));
+  //   this->SetEngineState([previous_state](EngineState state) -> EngineState {
+  //     return previous_state;  // Recover engine state
+  //   });
+  // }));
 }
 
 bool AudioEngineDevice::IsMicrophonePermissionGranted() {
@@ -712,32 +879,6 @@
   EngineState old_state = engine_state_;
   EngineState new_state = state_transform(old_state);
 
-#if defined(WEBRTC_IOS)
-  if ((!old_state.output_enabled && new_state.output_enabled) ||
-      (!old_state.input_enabled && new_state.input_enabled)) {
-    RTC_OBJC_TYPE(RTCAudioSession)* session = [RTC_OBJC_TYPE(RTCAudioSession) sharedInstance];
-    [session lockForConfiguration];
-    [session notifyAudioEngineWillUpdateStateWithOutputEnabled:new_state.output_enabled
-                                                  inputEnabled:new_state.input_enabled];
-    ConfigureAudioSessionLocked();
-    [session unlockForConfiguration];
-  }
-
-  if (!old_state.output_enabled && new_state.output_enabled) {
-    RTC_OBJC_TYPE(RTCAudioSession)* session = [RTC_OBJC_TYPE(RTCAudioSession) sharedInstance];
-    bool is_category_record = session.category == AVAudioSessionCategoryPlayAndRecord ||
-                              session.category == AVAudioSessionCategoryRecord;
-
-    // Already enable input if mic perms are already granted.
-    if (!new_state.input_enabled && is_category_record) {
-      EngineState update_state = new_state;
-      update_state.input_enabled = true;
-      update_state.input_muted = true;
-      new_state = update_state;
-    }
-  }
-#endif
-
   if (old_state == new_state) {
     LOGI() << "SetEngineState: Nothing to update";
     return;
@@ -752,44 +893,93 @@
     RTC_DCHECK(new_state.output_enabled);
   }
 
-  UpdateEngineState(old_state, new_state);
   engine_state_ = new_state;
+  UpdateEngineState(old_state, new_state);
 }
 
 void AudioEngineDevice::UpdateEngineState(EngineState old_state, EngineState new_state) {
   RTC_DCHECK_RUN_ON(thread_);
 
-  if (!old_state.IsAnyEnabled() && new_state.IsAnyEnabled()) {
+  // Playout or Recording enabled, create an engine instance.
+  bool is_new_engine = !old_state.IsAnyEnabled() && new_state.IsAnyEnabled();
+  // Playout or Recording not enabled, destroy engine instance.
+  bool is_release_engine = old_state.IsAnyEnabled() && !new_state.IsAnyEnabled();
+
+  bool is_restart_required = (old_state.input_enabled != new_state.input_enabled) ||
+                             (old_state.output_enabled != new_state.output_enabled);
+
+  if (is_new_engine) {
     LOGI() << "Creating AVAudioEngine...";
     audio_engine_ = [[AVAudioEngine alloc] init];
   }
 
-  bool did_change_audio_graph = (old_state.input_enabled != new_state.input_enabled) ||
-                                (old_state.output_enabled != new_state.output_enabled);
-
   if (old_state.IsAnyRunning()) {
-    if (!new_state.IsAnyRunning() || did_change_audio_graph) {
+    if (!new_state.IsAnyRunning() || is_restart_required) {
       LOGI() << "Stopping AVAudioEngine...";
       [audio_engine_ stop];
     } else if (!old_state.is_interrupted && new_state.is_interrupted) {
       LOGI() << "Pausing AVAudioEngine...";
       [audio_engine_ pause];
     }
+
+    if (!new_state.IsAnyRunning() || is_restart_required ||
+        (!old_state.is_interrupted && new_state.is_interrupted)) {
+      if (old_state.output_running && !new_state.output_running) {
+        LOGI() << "Stopping Playout buffer...";
+        audio_device_buffer_->StopPlayout();
+      }
+      if (old_state.input_running && !new_state.input_running) {
+        LOGI() << "Stopping Record buffer...";
+        audio_device_buffer_->StopRecording();
+      }
+    }
+  }
+
+  if ((!old_state.output_enabled && new_state.output_enabled) ||
+      (!old_state.input_enabled && new_state.input_enabled)) {
+    if (observer_ != nullptr) {
+      // Invoke here before configuring nodes. In iOS, session configuration is required before
+      // enabling AGC, muted talker etc.
+      observer_->OnEngineWillStart(audio_engine_, new_state.output_enabled,
+                                   new_state.input_enabled);
+    }
   }
 
   if (!old_state.output_enabled && new_state.output_enabled) {
     LOGI() << "Enabling output for AVAudioEngine...";
     RTC_DCHECK(!audio_engine_.running);
 
-    if (!audio_engine_.outputNode.voiceProcessingEnabled) {
-      NSError* error = nil;
-      BOOL set_vp_result = [audio_engine_.outputNode setVoiceProcessingEnabled:YES error:&error];
-      if (!set_vp_result) {
-        NSLog(@"setVoiceProcessingEnabled error: %@", error.localizedDescription);
-        RTC_DCHECK(set_vp_result);
-      }
-      LOGI() << "setVoiceProcessingEnabled (output) result: " << set_vp_result ? "YES" : "NO";
-    }
+    // Turning voice processing on outputNode, will turn on for inputNode also and mic indicator
+    // goes on. if (!audio_engine_.outputNode.voiceProcessingEnabled) {
+    //   NSError* error = nil;
+    //   BOOL set_vp_result = [audio_engine_.outputNode setVoiceProcessingEnabled:YES error:&error];
+    //   if (!set_vp_result) {
+    //     NSLog(@"setVoiceProcessingEnabled error: %@", error.localizedDescription);
+    //     RTC_DCHECK(set_vp_result);
+    //   }
+    //   LOGI() << "setVoiceProcessingEnabled (output) result: " << set_vp_result ? "YES" : "NO";
+    // }
+    AVAudioFormat* output_node_format = [this->OutputNode() outputFormatForBus:0];
+
+    LOGI() << "Output format sampleRate: " << output_node_format.sampleRate
+           << " channels: " << output_node_format.channelCount;
+
+    AVAudioFormat* engine_output_format = [[AVAudioFormat alloc]
+        initWithCommonFormat:output_node_format.commonFormat  // Usually float32
+                  sampleRate:output_node_format.sampleRate
+                    channels:1
+                 interleaved:output_node_format.interleaved];
+
+    audio_device_buffer_->SetPlayoutSampleRate(engine_output_format.sampleRate);
+    audio_device_buffer_->SetPlayoutChannels(engine_output_format.channelCount);
+    RTC_DCHECK(audio_device_buffer_ != nullptr);
+    fine_audio_buffer_.reset(new FineAudioBuffer(audio_device_buffer_.get()));
+
+    AVAudioFormat* rtc_output_format =
+        [[AVAudioFormat alloc] initWithCommonFormat:AVAudioPCMFormatInt16
+                                         sampleRate:engine_output_format.sampleRate
+                                           channels:1
+                                        interleaved:YES];
 
     AVAudioSourceNodeRenderBlock source_block =
         ^OSStatus(BOOL* isSilence, const AudioTimeStamp* timestamp, AVAudioFrameCount frameCount,
@@ -805,49 +995,56 @@
           return noErr;
         };
 
-    source_node_ = [[AVAudioSourceNode alloc] initWithFormat:rtc_internal_format_
+    source_node_ = [[AVAudioSourceNode alloc] initWithFormat:rtc_output_format
                                                  renderBlock:source_block];
-
     [audio_engine_ attachNode:source_node_];
 
-    [audio_engine_ connect:source_node_
-                        to:audio_engine_.mainMixerNode
-                    format:engine_internal_format_];
+    if (!(this->observer_ != nullptr &&
+          this->observer_->OnEngineWillConnectOutput(
+              audio_engine_, source_node_, audio_engine_.mainMixerNode, engine_output_format))) {
+      // Default implementation.
+      [audio_engine_ connect:source_node_
+                          to:audio_engine_.mainMixerNode
+                      format:engine_output_format];
+    }
 
     [audio_engine_ connect:audio_engine_.mainMixerNode
-                        to:audio_engine_.outputNode
-                    format:engine_internal_format_];
+                        to:this->OutputNode()
+                    format:engine_output_format];
 
   } else if (old_state.output_enabled && !new_state.output_enabled) {
     LOGI() << "Disabling output for AVAudioEngine...";
     RTC_DCHECK(!audio_engine_.running);
 
     // Disconnect
-    [audio_engine_ disconnectNodeInput:source_node_];
-    [audio_engine_ disconnectNodeOutput:source_node_];
-    // Detach
-    [audio_engine_ detachNode:source_node_];
-    // Release
-    source_node_ = nil;
+    if (source_node_ != nil) {
+      [audio_engine_ disconnectNodeInput:source_node_];
+      [audio_engine_ disconnectNodeOutput:source_node_];
+      [audio_engine_ detachNode:source_node_];
+      source_node_ = nil;
+    }
   }
 
   if (!old_state.input_enabled && new_state.input_enabled) {
     LOGI() << "Enabling input for AVAudioEngine...";
     RTC_DCHECK(!audio_engine_.running);
 
-    if (!audio_engine_.inputNode.voiceProcessingEnabled) {
+    if (!this->InputNode().voiceProcessingEnabled) {
       NSError* error = nil;
-      BOOL set_vp_result = [audio_engine_.inputNode setVoiceProcessingEnabled:YES error:&error];
+      BOOL set_vp_result = [this->InputNode() setVoiceProcessingEnabled:YES error:&error];
       if (!set_vp_result) {
         NSLog(@"setVoiceProcessingEnabled error: %@", error.localizedDescription);
         RTC_DCHECK(set_vp_result);
       }
       LOGI() << "setVoiceProcessingEnabled (input) result: " << set_vp_result ? "YES" : "NO";
+    }
 
-      RTC_DCHECK(audio_engine_.inputNode.isVoiceProcessingAGCEnabled);
+    if (!this->InputNode().isVoiceProcessingAGCEnabled) {
+      LOGW() << "voiceProcessingAGCEnabled (input) is false, ensure AVAudioSession.Mode is "
+                "videoChat or voiceChat.";
     }
 
-    if (audio_engine_.inputNode.voiceProcessingEnabled) {
+    if (this->InputNode().voiceProcessingEnabled) {
       // Muted talker detection.
       if (@available(iOS 17.0, macCatalyst 17.0, macOS 14.0, tvOS 17.0, visionOS 1.0, *)) {
         auto listener_block = ^(AVAudioVoiceProcessingSpeechActivityEvent event) {
@@ -868,21 +1065,13 @@
         };
 
         BOOL set_listener_result =
-            [audio_engine_.inputNode setMutedSpeechActivityEventListener:listener_block];
-        LOGI() << "setMutedSpeechActivityEventListener result: " << set_listener_result ? "YES"
-                                                                                        : "NO";
-        RTC_DCHECK(set_listener_result);
-      }
-
-      // Other audio ducking.
-      // iOS 17.0+, iPadOS 17.0+, Mac Catalyst 17.0+, macOS 14.0+, visionOS 1.0+
-      if (@available(iOS 17.0, macCatalyst 17.0, macOS 14.0, visionOS 1.0, *)) {
-        AVAudioVoiceProcessingOtherAudioDuckingConfiguration ducking_config;
-        ducking_config.enableAdvancedDucking = YES;
-        ducking_config.duckingLevel = AVAudioVoiceProcessingOtherAudioDuckingLevelMid;
-
-        LOGI() << "setVoiceProcessingOtherAudioDuckingConfiguration";
-        [audio_engine_.inputNode setVoiceProcessingOtherAudioDuckingConfiguration:ducking_config];
+            [this->InputNode() setMutedSpeechActivityEventListener:listener_block];
+        if (set_listener_result) {
+          LOGI() << "setMutedSpeechActivityEventListener success";
+        } else {
+          LOGW() << "setMutedSpeechActivityEventListener failed, ensure AVAudioSession.Mode is "
+                    "videoChat or voiceChat.";
+        }
       }
     }
 
@@ -892,6 +1081,32 @@
     input_mixer_node_ = [[AVAudioMixerNode alloc] init];
     [audio_engine_ attachNode:input_mixer_node_];
 
+    AVAudioFormat* input_node_format = [this->InputNode() outputFormatForBus:0];
+    // Example formats:
+    // Airpods: 1 ch,  24000 Hz, Float32
+    // Mac: 9 ch,  48000 Hz, Float32
+    LOGI() << "Input format, sampleRate: " << input_node_format.sampleRate
+           << " channels: " << input_node_format.channelCount;
+
+    // When VoiceProcessingIO is enabled, channels must be reduced from Mac's default 9 channels
+    // to 2 or lower.
+    AVAudioFormat* engine_input_format = [[AVAudioFormat alloc]
+        initWithCommonFormat:input_node_format.commonFormat  // Usually float32
+                  sampleRate:input_node_format.sampleRate
+                    channels:1
+                 interleaved:input_node_format.interleaved];
+
+    audio_device_buffer_->SetRecordingSampleRate(engine_input_format.sampleRate);
+    audio_device_buffer_->SetRecordingChannels(engine_input_format.channelCount);
+    RTC_DCHECK(audio_device_buffer_ != nullptr);
+    fine_audio_buffer_.reset(new FineAudioBuffer(audio_device_buffer_.get()));
+
+    AVAudioFormat* rtc_input_format =
+        [[AVAudioFormat alloc] initWithCommonFormat:AVAudioPCMFormatInt16
+                                         sampleRate:engine_input_format.sampleRate
+                                           channels:1
+                                        interleaved:YES];
+
     AVAudioSinkNodeReceiverBlock sink_block = ^OSStatus(const AudioTimeStamp* timestamp,
                                                         AVAudioFrameCount frameCount,
                                                         const AudioBufferList* inputData) {
@@ -906,51 +1121,90 @@
       return noErr;
     };
 
+    if (!(observer_ != nullptr &&
+          observer_->OnEngineWillConnectInput(audio_engine_, this->InputNode(), input_mixer_node_,
+                                              engine_input_format))) {
+      // Default implementation.
+      [audio_engine_ connect:this->InputNode() to:input_mixer_node_ format:engine_input_format];
+    }
+
     sink_node_ = [[AVAudioSinkNode alloc] initWithReceiverBlock:sink_block];
     [audio_engine_ attachNode:sink_node_];
 
-    // InputNode -> InputEQNode -> InputMixerNode -> SinkNode -> RTC
-    // [audio_engine_ connect:audio_engine_.inputNode to:input_eq_node_ format:input_format];
-
-    [audio_engine_ connect:audio_engine_.inputNode
-                        to:input_mixer_node_
-                    format:engine_internal_format_];
     // Convert to RTC's internal format before passing buffers to SinkNode.
-    [audio_engine_ connect:input_mixer_node_ to:sink_node_ format:rtc_internal_format_];
+    [audio_engine_ connect:input_mixer_node_ to:sink_node_ format:rtc_input_format];
 
   } else if (old_state.input_enabled && !new_state.input_enabled) {
     LOGI() << "Disabling input for AVAudioEngine...";
     RTC_DCHECK(!audio_engine_.running);
 
     // Disconnect input eq
-    [audio_engine_ disconnectNodeInput:input_eq_node_];
-    [audio_engine_ disconnectNodeOutput:input_eq_node_];
-    [audio_engine_ detachNode:input_eq_node_];
-    input_eq_node_ = nil;
+    if (input_eq_node_ != nil) {
+      [audio_engine_ disconnectNodeInput:input_eq_node_];
+      [audio_engine_ disconnectNodeOutput:input_eq_node_];
+      [audio_engine_ detachNode:input_eq_node_];
+      input_eq_node_ = nil;
+    }
 
     // InputMixerNode
-    [audio_engine_ disconnectNodeInput:input_mixer_node_];
-    [audio_engine_ disconnectNodeOutput:input_mixer_node_];
-    [audio_engine_ detachNode:input_mixer_node_];
-    input_mixer_node_ = nil;
+    if (input_mixer_node_ != nil) {
+      [audio_engine_ disconnectNodeInput:input_mixer_node_];
+      [audio_engine_ disconnectNodeOutput:input_mixer_node_];
+      [audio_engine_ detachNode:input_mixer_node_];
+      input_mixer_node_ = nil;
+    }
 
     // SinkNode
-    [audio_engine_ disconnectNodeInput:sink_node_];
-    [audio_engine_ disconnectNodeOutput:sink_node_];
-    [audio_engine_ detachNode:sink_node_];
-    sink_node_ = nil;
+    if (sink_node_ != nil) {
+      [audio_engine_ disconnectNodeInput:sink_node_];
+      [audio_engine_ disconnectNodeOutput:sink_node_];
+      [audio_engine_ detachNode:sink_node_];
+      sink_node_ = nil;
+    }
   }
 
   if (new_state.input_enabled) {
-    if (audio_engine_.inputNode.voiceProcessingEnabled) {
+    if (this->InputNode().voiceProcessingEnabled) {
       // Re-apply muted state.
-      audio_engine_.inputNode.voiceProcessingInputMuted = new_state.input_muted;
+      this->InputNode().voiceProcessingInputMuted = new_state.input_muted;
     }
   }
 
+#if !TARGET_OS_TV
+  if (new_state.input_enabled && this->InputNode().voiceProcessingEnabled &&
+      (!old_state.input_enabled || (old_state.advanced_ducking != new_state.advanced_ducking ||
+                                    old_state.ducking_level != new_state.ducking_level))) {
+    // Other audio ducking.
+    // iOS 17.0+, iPadOS 17.0+, Mac Catalyst 17.0+, macOS 14.0+, visionOS 1.0+
+    if (@available(iOS 17.0, macCatalyst 17.0, macOS 14.0, visionOS 1.0, *)) {
+      AVAudioVoiceProcessingOtherAudioDuckingConfiguration ducking_config;
+      ducking_config.enableAdvancedDucking = new_state.advanced_ducking;
+      ducking_config.duckingLevel =
+          (AVAudioVoiceProcessingOtherAudioDuckingLevel)new_state.ducking_level;
+
+      LOGI() << "setVoiceProcessingOtherAudioDuckingConfiguration";
+      this->InputNode().voiceProcessingOtherAudioDuckingConfiguration = ducking_config;
+    }
+  }
+#endif
+
+  if ((!old_state.output_running && new_state.output_running && !new_state.input_running) ||
+      (!old_state.output_enabled && new_state.output_enabled && new_state.input_running)) {
+    LOGI() << "Starting Playout buffer...";
+    audio_device_buffer_->StartPlayout();
+    fine_audio_buffer_->ResetPlayout();
+  }
+
+  if ((!old_state.input_running && new_state.input_running && !new_state.output_running) ||
+      (!old_state.input_enabled && new_state.input_enabled && new_state.output_running)) {
+    LOGI() << "Starting Record buffer...";
+    audio_device_buffer_->StartRecording();
+    fine_audio_buffer_->ResetRecord();
+  }
+
   if (new_state.IsAnyRunning()) {
     if (!old_state.IsAnyRunning() || (old_state.is_interrupted && !new_state.is_interrupted) ||
-        did_change_audio_graph) {
+        is_restart_required) {
       LOGI() << "Starting AVAudioEngine...";
       NSError* error = nil;
       BOOL start_result = [audio_engine_ startAndReturnError:&error];
@@ -961,7 +1215,7 @@
     }
   }
 
-  if (old_state.IsAnyEnabled() && !new_state.IsAnyEnabled()) {
+  if (is_release_engine) {
     LOGI() << "Releasing AVAudioEngine...";
     audio_engine_ = nil;
   }
@@ -970,10 +1224,96 @@
 // ----------------------------------------------------------------------------------------------------
 // Private - EngineState
 
+void AudioEngineDevice::StartRenderLoop() {
+  RTC_DCHECK_RUN_ON(render_thread_.get());
+
+  // Constants for timing and frame management
+  const double sample_rate = manual_render_rtc_format_.sampleRate;
+  // Fixed number of frames to render per cycle
+  const double target_frame_count = sample_rate / 100;
+  const double nanoseconds_per_frame = 1e9 / sample_rate;
+  const double target_cycle_time_ns = target_frame_count * nanoseconds_per_frame;
+
+  // Variables for timing management
+  uint64_t last_cycle_time = mach_absolute_time();
+  double sleep_time_ms = 5.0;  // Initial sleep time
+  const double min_sleep_time_ms = 1.0;
+  const double max_sleep_time_ms = 20.0;
+
+  // Simple moving average for sleep time adjustment
+  constexpr size_t avg_window_size = 5;
+  std::array<double, avg_window_size> cycle_times{};
+  size_t cycle_index = 0;
+
+  while (!render_thread_->IsQuitting()) {
+    RTC_DCHECK(render_buffer_ != nullptr);
+    AudioBufferList* abl = const_cast<AudioBufferList*>(render_buffer_.audioBufferList);
+
+    // Calculate timing
+    uint64_t current_time = mach_absolute_time();
+    double elapsed_time_ns = (current_time - last_cycle_time) * machTickUnitsToNanoseconds_;
+
+    // Update moving average of cycle times
+    cycle_times[cycle_index] = elapsed_time_ns;
+    cycle_index = (cycle_index + 1) % avg_window_size;
+
+    // Calculate average cycle time
+    double avg_cycle_time = 0;
+    for (double time : cycle_times) {
+      avg_cycle_time += time;
+    }
+    avg_cycle_time /= avg_window_size;
+
+    // Adjust sleep time based on average cycle time
+    if (avg_cycle_time > 0) {  // Only adjust if we have valid timing data
+      double time_diff = target_cycle_time_ns - avg_cycle_time;
+      double adjustment =
+          (time_diff / target_cycle_time_ns) * sleep_time_ms * 0.1;  // Gradual adjustment
+      sleep_time_ms = std::clamp(sleep_time_ms + adjustment, min_sleep_time_ms, max_sleep_time_ms);
+    }
+
+    // Set fixed frame count
+    unsigned int frames_to_render = target_frame_count;
+
+    // Adjust buffer size for the fixed frame count
+    abl->mBuffers[0].mDataByteSize = frames_to_render * kAudioSampleSize;
+
+    // Render audio
+    OSStatus err = noErr;
+    AVAudioEngineManualRenderingStatus result = render_block_(frames_to_render, abl, &err);
+
+    if (result == AVAudioEngineManualRenderingStatusSuccess) {
+      LOGI() << "Render success, frames: " << frames_to_render
+             << " frameLength: " << render_buffer_.frameLength
+             << " sleep_time_ms: " << sleep_time_ms;
+    } else {
+      LOGI() << "Render error: " << err << " frames: " << frames_to_render;
+      // On error, reset sleep time to default
+      sleep_time_ms = 5.0;
+    }
+
+    RTC_DCHECK(abl->mNumberBuffers == 1);
+    const int16_t* rtc_buffer =
+        static_cast<const int16_t*>(static_cast<const void*>(abl->mBuffers[0].mData));
+
+    last_cycle_time = mach_absolute_time();
+
+    fine_audio_buffer_->DeliverRecordedData(
+        rtc::ArrayView<const int16_t>(rtc_buffer, frames_to_render), kFixedRecordDelayEstimate,
+        absl::nullopt);
+
+    if (!render_thread_->IsQuitting()) {
+      render_thread_->SleepMs(static_cast<int>(sleep_time_ms));
+    }
+  }
+}
+
 bool AudioEngineDevice::EngineState::operator==(const EngineState& rhs) const {
   return input_enabled == rhs.input_enabled && output_enabled == rhs.output_enabled &&
          input_running == rhs.input_running && output_running == rhs.output_running &&
-         input_muted == rhs.input_muted && is_interrupted == rhs.is_interrupted;
+         input_muted == rhs.input_muted && is_interrupted == rhs.is_interrupted &&
+         is_manual_mode == rhs.is_manual_mode && voice_processing == rhs.voice_processing &&
+         advanced_ducking == rhs.advanced_ducking && ducking_level == rhs.ducking_level;
 }
 
 bool AudioEngineDevice::EngineState::operator!=(const EngineState& rhs) const {
@@ -981,63 +1321,24 @@
 }
 
 // ----------------------------------------------------------------------------------------------------
-// Private - Audio session
+// Private - Misc
 
-#if defined(WEBRTC_IOS)
-bool AudioEngineDevice::ConfigureAudioSession() {
+AVAudioInputNode* AudioEngineDevice::InputNode() {
   RTC_DCHECK_RUN_ON(thread_);
-  RTCLog(@"Configuring audio session.");
-  if (has_configured_session_) {
-    RTCLogWarning(@"Audio session already configured.");
-    return false;
-  }
-  RTC_OBJC_TYPE(RTCAudioSession)* session = [RTC_OBJC_TYPE(RTCAudioSession) sharedInstance];
-  [session lockForConfiguration];
-  bool success = [session configureWebRTCSession:nil];
-  [session unlockForConfiguration];
-  if (success) {
-    has_configured_session_ = true;
-    RTCLog(@"Configured audio session.");
-  } else {
-    RTCLog(@"Failed to configure audio session.");
-  }
-  return success;
-}
+  RTC_DCHECK(audio_engine_ != nil);
+  RTC_DCHECK(engine_state_.input_enabled);
+  RTC_DCHECK(!engine_state_.is_manual_mode);
 
-bool AudioEngineDevice::ConfigureAudioSessionLocked() {
-  RTC_DCHECK_RUN_ON(thread_);
-  RTCLog(@"Configuring audio session.");
-  if (has_configured_session_) {
-    RTCLogWarning(@"Audio session already configured.");
-    return false;
-  }
-  RTC_OBJC_TYPE(RTCAudioSession)* session = [RTC_OBJC_TYPE(RTCAudioSession) sharedInstance];
-  bool success = [session configureWebRTCSession:nil];
-  if (success) {
-    has_configured_session_ = true;
-    RTCLog(@"Configured audio session.");
-  } else {
-    RTCLog(@"Failed to configure audio session.");
-  }
-  return success;
+  return audio_engine_.inputNode;
 }
 
-void AudioEngineDevice::UnconfigureAudioSession() {
+AVAudioOutputNode* AudioEngineDevice::OutputNode() {
   RTC_DCHECK_RUN_ON(thread_);
-  RTCLog(@"Unconfiguring audio session.");
-  if (!has_configured_session_) {
-    RTCLogWarning(@"Audio session already unconfigured.");
-    return;
-  }
-  RTC_OBJC_TYPE(RTCAudioSession)* session = [RTC_OBJC_TYPE(RTCAudioSession) sharedInstance];
-  [session lockForConfiguration];
-  [session unconfigureWebRTCSession:nil];
-  [session endWebRTCSession:nil];
-  [session unlockForConfiguration];
-  has_configured_session_ = false;
-  RTCLog(@"Unconfigured audio session.");
+  RTC_DCHECK(audio_engine_ != nil);
+  RTC_DCHECK(engine_state_.output_enabled || engine_state_.is_manual_mode);
+
+  return audio_engine_.outputNode;
 }
-#endif
 
 // ----------------------------------------------------------------------------------------------------
 // Private - Debug
diff --git a/modules/audio_device/include/audio_device.h b/modules/audio_device/include/audio_device.h
index 592108a607..b38f5f8d61 100644
--- a/modules/audio_device/include/audio_device.h
+++ b/modules/audio_device/include/audio_device.h
@@ -16,6 +16,13 @@
 #include "api/task_queue/task_queue_factory.h"
 #include "modules/audio_device/include/audio_device_defines.h"
 #include "rtc_base/ref_count.h"
+#include "sdk/objc/base/RTCMacros.h"
+
+RTC_FWD_DECL_OBJC_CLASS(AVAudioEngine);
+RTC_FWD_DECL_OBJC_CLASS(AVAudioFormat);
+RTC_FWD_DECL_OBJC_CLASS(AVAudioNode);
+RTC_FWD_DECL_OBJC_CLASS(AVAudioSourceNode);
+RTC_FWD_DECL_OBJC_CLASS(AVAudioMixerNode);
 
 namespace webrtc {
 
@@ -61,14 +68,12 @@ class AudioDeviceModule : public rtc::RefCountInterface {
  public:
   // Creates a default ADM for usage in production code.
   static rtc::scoped_refptr<AudioDeviceModule> Create(
-      AudioLayer audio_layer,
-      TaskQueueFactory* task_queue_factory,
+      AudioLayer audio_layer, TaskQueueFactory* task_queue_factory,
       bool bypass_voice_processing = false);
   // Creates an ADM with support for extra test methods. Don't use this factory
   // in production code.
   static rtc::scoped_refptr<AudioDeviceModuleForTest> CreateForTest(
-      AudioLayer audio_layer,
-      TaskQueueFactory* task_queue_factory,
+      AudioLayer audio_layer, TaskQueueFactory* task_queue_factory,
       bool bypass_voice_processing = false);
 
   // Retrieve the currently utilized audio layer
@@ -179,7 +184,7 @@ class AudioDeviceModule : public rtc::RefCountInterface {
   virtual int GetRecordAudioParameters(AudioParameters* params) const = 0;
 #endif  // WEBRTC_IOS
 
-  virtual int32_t SetObserver(AudioDeviceObserver* observer) const { return -1; }
+  virtual int32_t SetObserver(AudioDeviceObserver* observer) { return -1; }
   virtual int32_t GetPlayoutDevice() const { return -1; }
   virtual int32_t GetRecordingDevice() const { return -1; }
 
@@ -209,6 +214,27 @@ class AudioDeviceObserver {
   virtual void OnDevicesUpdated() {}
   virtual void OnSpeechActivityEvent(
       AudioDeviceModule::SpeechActivityEvent event) {}
+
+  virtual void OnEngineWillStart(AVAudioEngine* engine, bool playout_enabled,
+                                 bool recording_enabled) {}
+
+  // Override the input node configuration with a custom implementation.
+  // Return true if the original implementation is used.
+  virtual bool OnEngineWillConnectInput(AVAudioEngine* engine,
+                                        AVAudioNode* src,
+                                        AVAudioNode* dst,
+                                        AVAudioFormat* format) {
+    return false;
+  }
+
+  // Override the input node configuration with a custom implementation.
+  // Return true if the original implementation is used.
+  virtual bool OnEngineWillConnectOutput(AVAudioEngine* engine,
+                                         AVAudioNode* src,
+                                         AVAudioNode* dst,
+                                         AVAudioFormat* format) {
+    return false;
+  }
 };
 
 }  // namespace webrtc
diff --git a/sdk/BUILD.gn b/sdk/BUILD.gn
index 50ce76bd27..8a0acbcd79 100644
--- a/sdk/BUILD.gn
+++ b/sdk/BUILD.gn
@@ -292,6 +292,8 @@ if (is_ios || is_mac) {
         visibility = [ "*" ]
 
         sources = [
+          "objc/native/src/audio/audio_device_ios.h",
+          "objc/native/src/audio/audio_device_ios.mm",
           "objc/native/src/audio/audio_device_module_ios.h",
           "objc/native/src/audio/audio_device_module_ios.mm",
           "objc/native/src/audio/helpers.h",
diff --git a/sdk/objc/api/RTCAudioRendererAdapter.mm b/sdk/objc/api/RTCAudioRendererAdapter.mm
index 20b4a651b9..5d7a0e8d6e 100644
--- a/sdk/objc/api/RTCAudioRendererAdapter.mm
+++ b/sdk/objc/api/RTCAudioRendererAdapter.mm
@@ -31,34 +31,16 @@
   void OnData(const void *audio_data, int bits_per_sample, int sample_rate,
               size_t number_of_channels, size_t number_of_frames,
               absl::optional<int64_t> absolute_capture_timestamp_ms) override {
-    OSStatus status;
-    AudioChannelLayout acl = {};
-    acl.mChannelLayoutTag =
-        (number_of_channels == 2) ? kAudioChannelLayoutTag_Stereo : kAudioChannelLayoutTag_Mono;
-
-    AudioStreamBasicDescription sd = {
-        .mSampleRate = static_cast<Float64>(sample_rate),
-        .mFormatID = kAudioFormatLinearPCM,
-        .mFormatFlags = kLinearPCMFormatFlagIsSignedInteger | kLinearPCMFormatFlagIsPacked,
-        .mBytesPerPacket = static_cast<UInt32>(number_of_channels * 2),
-        .mFramesPerPacket = 1,
-        .mBytesPerFrame = static_cast<UInt32>(number_of_channels * 2),
-        .mChannelsPerFrame = static_cast<UInt32>(number_of_channels),
-        .mBitsPerChannel = 16,
-        .mReserved = 0};
-
-    CMFormatDescriptionRef formatDescription = nullptr;
-    status = CMAudioFormatDescriptionCreate(kCFAllocatorDefault, &sd, sizeof(acl), &acl, 0, NULL,
-                                            NULL, &formatDescription);
-    if (status != noErr) {
-      NSLog(@"RTCAudioTrack: Failed to create audio formatDescription description. Error: %d",
-            (int)status);
+    if (sample_rate <= 0 || number_of_channels == 0 || number_of_channels > 2) {
+      NSLog(@"Invalid sample rate or channel count");
       return;
     }
 
     AVAudioFormat *format =
-        [[AVAudioFormat alloc] initWithCMAudioFormatDescription:formatDescription];
-    CFRelease(formatDescription);
+        [[AVAudioFormat alloc] initWithCommonFormat:AVAudioPCMFormatInt16
+                                         sampleRate:sample_rate
+                                           channels:(AVAudioChannelCount)number_of_channels
+                                        interleaved:YES];
 
     AVAudioFrameCount frameCount = static_cast<AVAudioFrameCount>(number_of_frames);
     AVAudioPCMBuffer *pcmBuffer = [[AVAudioPCMBuffer alloc] initWithPCMFormat:format
@@ -70,24 +52,9 @@ void OnData(const void *audio_data, int bits_per_sample, int sample_rate,
 
     pcmBuffer.frameLength = frameCount;
 
-    // Handle both mono and stereo
     const int16_t *inputData = static_cast<const int16_t *>(audio_data);
-    if (number_of_channels == 1) {
-      // Mono: straight copy
-      memcpy(pcmBuffer.int16ChannelData[0], inputData, number_of_frames * sizeof(int16_t));
-    } else if (number_of_channels == 2) {
-      // Stereo: manual deinterleave
-      int16_t *leftChannel = pcmBuffer.int16ChannelData[0];
-      int16_t *rightChannel = pcmBuffer.int16ChannelData[1];
-
-      for (size_t i = 0; i < number_of_frames; i++) {
-        leftChannel[i] = inputData[i * 2];
-        rightChannel[i] = inputData[i * 2 + 1];
-      }
-    } else {
-      NSLog(@"Unsupported number of channels: %zu", number_of_channels);
-      return;
-    }
+    const size_t copy_size = number_of_frames * number_of_channels * sizeof(int16_t);
+    memcpy(pcmBuffer.int16ChannelData[0], inputData, copy_size);
 
     [adapter_.audioRenderer renderPCMBuffer:pcmBuffer];
   }
diff --git a/sdk/objc/api/peerconnection/RTCAudioDeviceModule.h b/sdk/objc/api/peerconnection/RTCAudioDeviceModule.h
index 1efa6411b7..2409d6ec70 100644
--- a/sdk/objc/api/peerconnection/RTCAudioDeviceModule.h
+++ b/sdk/objc/api/peerconnection/RTCAudioDeviceModule.h
@@ -14,11 +14,12 @@
  * limitations under the License.
  */
 
+#import <AVFAudio/AVFAudio.h>
 #import <CoreMedia/CoreMedia.h>
 #import <Foundation/Foundation.h>
 
-#import "RTCMacros.h"
 #import "RTCIODevice.h"
+#import "RTCMacros.h"
 
 NS_ASSUME_NONNULL_BEGIN
 
@@ -29,6 +30,11 @@ typedef NS_ENUM(NSInteger, RTCSpeechActivityEvent) {
 
 typedef void (^RTCDevicesDidUpdateCallback)();
 typedef void (^RTCSpeechActivityCallback)(RTCSpeechActivityEvent);
+typedef void (^RTCOnEngineWillStart)(AVAudioEngine *, BOOL, BOOL);
+typedef bool (^RTCOnEngineWillConnectInput)(AVAudioEngine *, AVAudioNode *, AVAudioNode *,
+                                            AVAudioFormat *);
+typedef bool (^RTCOnEngineWillConnectOutput)(AVAudioEngine *, AVAudioNode *, AVAudioNode *,
+                                             AVAudioFormat *);
 
 RTC_OBJC_EXPORT
 @interface RTC_OBJC_TYPE (RTCAudioDeviceModule) : NSObject
@@ -39,8 +45,8 @@ RTC_OBJC_EXPORT
 @property(nonatomic, readonly) BOOL playing;
 @property(nonatomic, readonly) BOOL recording;
 
-@property(nonatomic, assign) RTC_OBJC_TYPE(RTCIODevice) *outputDevice;
-@property(nonatomic, assign) RTC_OBJC_TYPE(RTCIODevice) *inputDevice;
+@property(nonatomic, assign) RTC_OBJC_TYPE(RTCIODevice) * outputDevice;
+@property(nonatomic, assign) RTC_OBJC_TYPE(RTCIODevice) * inputDevice;
 
 // Executes low-level API's in sequence to switch the device
 // Use outputDevice / inputDevice property unless you need to know if setting the device is
@@ -50,6 +56,9 @@ RTC_OBJC_EXPORT
 
 - (BOOL)setDevicesDidUpdateCallback:(nullable RTCDevicesDidUpdateCallback)callback;
 - (BOOL)setSpeechActivityCallback:(nullable RTCSpeechActivityCallback)callback;
+- (BOOL)setOnEngineWillStartCallback:(nullable RTCOnEngineWillStart)callback;
+- (BOOL)setOnEngineWillConnectInputCallback:(nullable RTCOnEngineWillConnectInput)callback;
+- (BOOL)setOnEngineWillConnectOutputCallback:(nullable RTCOnEngineWillConnectOutput)callback;
 
 - (BOOL)startPlayout;
 - (BOOL)stopPlayout;
@@ -58,6 +67,18 @@ RTC_OBJC_EXPORT
 - (BOOL)stopRecording;
 - (BOOL)initRecording;
 
+- (BOOL)initAndStartRecording;
+
+// Manual rendering.
+@property(nonatomic, readonly, getter=isManualRenderingMode) BOOL manualRenderingMode;
+- (BOOL)setManualRenderingMode:(BOOL)enabled;
+
+// Ducking.
+@property(nonatomic, assign, getter=isAdvancedDuckingEnabled) BOOL advancedDuckingEnabled;
+@property(nonatomic, assign)
+    AVAudioVoiceProcessingOtherAudioDuckingLevel duckingLevel API_AVAILABLE(
+        ios(17.0), macos(14.0), visionos(1.0)) API_UNAVAILABLE(tvos);
+
 @end
 
 NS_ASSUME_NONNULL_END
diff --git a/sdk/objc/api/peerconnection/RTCAudioDeviceModule.mm b/sdk/objc/api/peerconnection/RTCAudioDeviceModule.mm
index e99217a4dc..fe475b075a 100644
--- a/sdk/objc/api/peerconnection/RTCAudioDeviceModule.mm
+++ b/sdk/objc/api/peerconnection/RTCAudioDeviceModule.mm
@@ -16,11 +16,12 @@
 
 #include <os/lock.h>
 
-#import "RTCAudioDeviceModule.h"
 #import "RTCAudioDeviceModule+Private.h"
+#import "RTCAudioDeviceModule.h"
 #import "RTCIODevice+Private.h"
 #import "base/RTCLogging.h"
 
+#import "modules/audio_device/audio_engine_device.h"
 #import "sdk/objc/native/api/audio_device_module.h"
 
 class AudioDeviceObserver : public webrtc::AudioDeviceObserver {
@@ -43,7 +44,38 @@ void OnSpeechActivityEvent(webrtc::AudioDeviceModule::SpeechActivityEvent event)
     os_unfair_lock_unlock(&lock_);
   }
 
- void SetDevicesUpdatedCallBack(RTCDevicesDidUpdateCallback cb) {
+  void OnEngineWillStart(AVAudioEngine *engine, bool playout_enabled,
+                         bool recording_enabled) override {
+    os_unfair_lock_lock(&lock_);
+    if (on_engine_will_start_) {
+      on_engine_will_start_(engine, playout_enabled, recording_enabled);
+    }
+    os_unfair_lock_unlock(&lock_);
+  }
+
+  bool OnEngineWillConnectInput(AVAudioEngine *engine, AVAudioNode *src, AVAudioNode *dst,
+                                AVAudioFormat *format) override {
+    bool result = false;
+    os_unfair_lock_lock(&lock_);
+    if (on_engine_will_connect_input_) {
+      result = on_engine_will_connect_input_(engine, src, dst, format);
+    }
+    os_unfair_lock_unlock(&lock_);
+    return result;
+  }
+
+  bool OnEngineWillConnectOutput(AVAudioEngine *engine, AVAudioNode *src, AVAudioNode *dst,
+                                 AVAudioFormat *format) override {
+    bool result = false;
+    os_unfair_lock_lock(&lock_);
+    if (on_engine_will_connect_output_) {
+      result = on_engine_will_connect_output_(engine, src, dst, format);
+    }
+    os_unfair_lock_unlock(&lock_);
+    return result;
+  }
+
+  void SetDevicesUpdatedCallBack(RTCDevicesDidUpdateCallback cb) {
     os_unfair_lock_lock(&lock_);
     on_devices_did_update_callback_ = cb;
     os_unfair_lock_unlock(&lock_);
@@ -55,10 +87,30 @@ void SetOnSpeechActivityCallBack(RTCSpeechActivityCallback cb) {
     os_unfair_lock_unlock(&lock_);
   }
 
+  void SetOnEngineWillStartCallback(RTCOnEngineWillStart cb) {
+    os_unfair_lock_lock(&lock_);
+    on_engine_will_start_ = cb;
+    os_unfair_lock_unlock(&lock_);
+  }
+
+  void SetOnEngineWillConnectInputCallback(RTCOnEngineWillConnectInput cb) {
+    os_unfair_lock_lock(&lock_);
+    on_engine_will_connect_input_ = cb;
+    os_unfair_lock_unlock(&lock_);
+  }
+
+  void SetOnEngineWillConnectOutputCallback(RTCOnEngineWillConnectOutput cb) {
+    os_unfair_lock_lock(&lock_);
+    on_engine_will_connect_output_ = cb;
+    os_unfair_lock_unlock(&lock_);
+  }
+
   bool IsAnyCallbackAttached() {
     os_unfair_lock_lock(&lock_);
-    bool result =
-        on_devices_did_update_callback_ != nullptr || on_speech_activity_callback_ != nullptr;
+    bool result = on_devices_did_update_callback_ != nullptr ||
+                  on_speech_activity_callback_ != nullptr || on_engine_will_start_ != nullptr ||
+                  on_engine_will_connect_input_ != nullptr ||
+                  on_engine_will_connect_output_ != nullptr;
     os_unfair_lock_unlock(&lock_);
     return result;
   }
@@ -67,6 +119,9 @@ bool IsAnyCallbackAttached() {
   os_unfair_lock lock_;
   RTCDevicesDidUpdateCallback on_devices_did_update_callback_;
   RTCSpeechActivityCallback on_speech_activity_callback_;
+  RTCOnEngineWillStart on_engine_will_start_;
+  RTCOnEngineWillConnectInput on_engine_will_connect_input_;
+  RTCOnEngineWillConnectOutput on_engine_will_connect_output_;
 
   RTCSpeechActivityEvent ConvertSpeechActivityEvent(
       webrtc::AudioDeviceModule::SpeechActivityEvent event) {
@@ -87,9 +142,8 @@ @implementation RTC_OBJC_TYPE (RTCAudioDeviceModule) {
   AudioDeviceObserver *_observer;
 }
 
-- (instancetype)initWithNativeModule:(rtc::scoped_refptr<webrtc::AudioDeviceModule> )module
-                        workerThread:(rtc::Thread * )workerThread {
-
+- (instancetype)initWithNativeModule:(rtc::scoped_refptr<webrtc::AudioDeviceModule>)module
+                        workerThread:(rtc::Thread *)workerThread {
   RTCLogInfo(@"RTCAudioDeviceModule initWithNativeModule:workerThread:");
 
   self = [super init];
@@ -102,21 +156,15 @@ - (instancetype)initWithNativeModule:(rtc::scoped_refptr<webrtc::AudioDeviceModu
 }
 
 - (NSArray<RTC_OBJC_TYPE(RTCIODevice) *> *)outputDevices {
-
-  return _workerThread->BlockingCall([self] {
-    return [self _outputDevices];
-  });
+  return _workerThread->BlockingCall([self] { return [self _outputDevices]; });
 }
 
 - (NSArray<RTC_OBJC_TYPE(RTCIODevice) *> *)inputDevices {
-  return _workerThread->BlockingCall([self] {
-    return [self _inputDevices];
-  });
+  return _workerThread->BlockingCall([self] { return [self _inputDevices]; });
 }
 
 - (RTC_OBJC_TYPE(RTCIODevice) *)outputDevice {
   return _workerThread->BlockingCall([self] {
-
     NSArray<RTC_OBJC_TYPE(RTCIODevice) *> *devices = [self _outputDevices];
     int16_t devicesCount = (int16_t)([devices count]);
     int16_t index = _native->GetPlayoutDevice();
@@ -129,14 +177,12 @@ - (instancetype)initWithNativeModule:(rtc::scoped_refptr<webrtc::AudioDeviceModu
   });
 }
 
-- (void)setOutputDevice: (RTC_OBJC_TYPE(RTCIODevice) *)device {
-  [self trySetOutputDevice: device];
+- (void)setOutputDevice:(RTC_OBJC_TYPE(RTCIODevice) *)device {
+  [self trySetOutputDevice:device];
 }
 
-- (BOOL)trySetOutputDevice: (RTC_OBJC_TYPE(RTCIODevice) *)device {
-
+- (BOOL)trySetOutputDevice:(RTC_OBJC_TYPE(RTCIODevice) *)device {
   return _workerThread->BlockingCall([self, device] {
-
     NSUInteger index = 0;
     NSArray *devices = [self _outputDevices];
 
@@ -145,7 +191,8 @@ - (BOOL)trySetOutputDevice: (RTC_OBJC_TYPE(RTCIODevice) *)device {
     }
 
     if (device != nil) {
-      index = [devices indexOfObjectPassingTest:^BOOL(RTC_OBJC_TYPE(RTCIODevice) *e, NSUInteger i, BOOL *stop) {
+      index = [devices indexOfObjectPassingTest:^BOOL(RTC_OBJC_TYPE(RTCIODevice) * e, NSUInteger i,
+                                                      BOOL * stop) {
         return (*stop = [e.deviceId isEqualToString:device.deviceId]);
       }];
       if (index == NSNotFound) {
@@ -155,11 +202,9 @@ - (BOOL)trySetOutputDevice: (RTC_OBJC_TYPE(RTCIODevice) *)device {
 
     _native->StopPlayout();
 
-    if (_native->SetPlayoutDevice(index) == 0 
-        && _native->InitPlayout() == 0
-        && _native->StartPlayout() == 0) {
-
-        return YES;
+    if (_native->SetPlayoutDevice(index) == 0 && _native->InitPlayout() == 0 &&
+        _native->StartPlayout() == 0) {
+      return YES;
     }
 
     return NO;
@@ -167,9 +212,7 @@ - (BOOL)trySetOutputDevice: (RTC_OBJC_TYPE(RTCIODevice) *)device {
 }
 
 - (RTC_OBJC_TYPE(RTCIODevice) *)inputDevice {
-
   return _workerThread->BlockingCall([self] {
-  
     NSArray<RTC_OBJC_TYPE(RTCIODevice) *> *devices = [self _inputDevices];
     int16_t devicesCount = (int16_t)([devices count]);
     int16_t index = _native->GetRecordingDevice();
@@ -182,14 +225,12 @@ - (BOOL)trySetOutputDevice: (RTC_OBJC_TYPE(RTCIODevice) *)device {
   });
 }
 
-- (void)setInputDevice: (RTC_OBJC_TYPE(RTCIODevice) *)device {
-  [self trySetInputDevice: device];
+- (void)setInputDevice:(RTC_OBJC_TYPE(RTCIODevice) *)device {
+  [self trySetInputDevice:device];
 }
 
-- (BOOL)trySetInputDevice: (RTC_OBJC_TYPE(RTCIODevice) *)device {
-
+- (BOOL)trySetInputDevice:(RTC_OBJC_TYPE(RTCIODevice) *)device {
   return _workerThread->BlockingCall([self, device] {
-
     NSUInteger index = 0;
     NSArray *devices = [self _inputDevices];
 
@@ -198,7 +239,8 @@ - (BOOL)trySetInputDevice: (RTC_OBJC_TYPE(RTCIODevice) *)device {
     }
 
     if (device != nil) {
-      index = [devices indexOfObjectPassingTest:^BOOL(RTC_OBJC_TYPE(RTCIODevice) *e, NSUInteger i, BOOL *stop) {
+      index = [devices indexOfObjectPassingTest:^BOOL(RTC_OBJC_TYPE(RTCIODevice) * e, NSUInteger i,
+                                                      BOOL * stop) {
         return (*stop = [e.deviceId isEqualToString:device.deviceId]);
       }];
       if (index == NSNotFound) {
@@ -208,11 +250,9 @@ - (BOOL)trySetInputDevice: (RTC_OBJC_TYPE(RTCIODevice) *)device {
 
     _native->StopRecording();
 
-    if (_native->SetRecordingDevice(index) == 0 
-        && _native->InitRecording() == 0
-        && _native->StartRecording() == 0) {
-
-        return YES;
+    if (_native->SetRecordingDevice(index) == 0 && _native->InitRecording() == 0 &&
+        _native->StartRecording() == 0) {
+      return YES;
     }
 
     return NO;
@@ -220,61 +260,44 @@ - (BOOL)trySetInputDevice: (RTC_OBJC_TYPE(RTCIODevice) *)device {
 }
 
 - (BOOL)playing {
-
-  return _workerThread->BlockingCall([self] {
-    return _native->Playing();
-  });
+  return _workerThread->BlockingCall([self] { return _native->Playing(); });
 }
 
 - (BOOL)recording {
-
-  return _workerThread->BlockingCall([self] {
-    return _native->Recording();
-  });
+  return _workerThread->BlockingCall([self] { return _native->Recording(); });
 }
 
 #pragma mark - Low-level access
 
 - (BOOL)startPlayout {
-
-  return _workerThread->BlockingCall([self] {
-    return _native->StartPlayout() == 0;
-  });
+  return _workerThread->BlockingCall([self] { return _native->StartPlayout() == 0; });
 }
 
 - (BOOL)stopPlayout {
-
-  return _workerThread->BlockingCall([self] {
-    return _native->StopPlayout() == 0;
-  });
+  return _workerThread->BlockingCall([self] { return _native->StopPlayout() == 0; });
 }
 
 - (BOOL)initPlayout {
-
-  return _workerThread->BlockingCall([self] {
-    return _native->InitPlayout() == 0;
-  });
+  return _workerThread->BlockingCall([self] { return _native->InitPlayout() == 0; });
 }
 
 - (BOOL)startRecording {
-
-  return _workerThread->BlockingCall([self] {
-    return _native->StartRecording() == 0;
-  });
+  return _workerThread->BlockingCall([self] { return _native->StartRecording() == 0; });
 }
 
 - (BOOL)stopRecording {
-
-  return _workerThread->BlockingCall([self] {
-    return _native->StopRecording() == 0;
-  });
+  return _workerThread->BlockingCall([self] { return _native->StopRecording() == 0; });
 }
 
 - (BOOL)initRecording {
+  return _workerThread->BlockingCall([self] { return _native->InitRecording() == 0; });
+}
 
-  return _workerThread->BlockingCall([self] {
-    return _native->InitRecording() == 0;
-  });
+- (BOOL)initAndStartRecording {
+  webrtc::AudioEngineDevice *module = dynamic_cast<webrtc::AudioEngineDevice *>(_native.get());
+  if (module == nullptr) return NO;
+
+  return _workerThread->BlockingCall([module] { return module->InitAndStartRecording() == 0; });
 }
 
 - (BOOL)setDevicesDidUpdateCallback:(nullable RTCDevicesDidUpdateCallback)callback {
@@ -293,13 +316,95 @@ - (BOOL)setSpeechActivityCallback:(nullable RTCSpeechActivityCallback)callback {
   return YES;
 }
 
+- (BOOL)setOnEngineWillStartCallback:(nullable RTCOnEngineWillStart)callback {
+  _observer->SetOnEngineWillStartCallback(callback);
+  webrtc::AudioDeviceObserver *observer = _observer->IsAnyCallbackAttached() ? _observer : nullptr;
+  _workerThread->BlockingCall([self, observer] { _native->SetObserver(observer); });
+
+  return YES;
+}
+
+- (BOOL)setOnEngineWillConnectInputCallback:(nullable RTCOnEngineWillConnectInput)callback {
+  _observer->SetOnEngineWillConnectInputCallback(callback);
+  webrtc::AudioDeviceObserver *observer = _observer->IsAnyCallbackAttached() ? _observer : nullptr;
+  _workerThread->BlockingCall([self, observer] { _native->SetObserver(observer); });
+
+  return YES;
+}
+
+- (BOOL)setOnEngineWillConnectOutputCallback:(nullable RTCOnEngineWillConnectOutput)callback {
+  _observer->SetOnEngineWillConnectOutputCallback(callback);
+  webrtc::AudioDeviceObserver *observer = _observer->IsAnyCallbackAttached() ? _observer : nullptr;
+  _workerThread->BlockingCall([self, observer] { _native->SetObserver(observer); });
+
+  return YES;
+}
+
+#pragma mark - Unique to AudioEngineDevice
+
+- (BOOL)isManualRenderingMode {
+  webrtc::AudioEngineDevice *module = dynamic_cast<webrtc::AudioEngineDevice *>(_native.get());
+  if (module == nullptr) return NO;
+
+  return _workerThread->BlockingCall([module] {
+    bool value = false;
+    return module->ManualRenderingMode(&value) == 0 ? value : NO;
+  });
+}
+
+- (BOOL)setManualRenderingMode:(BOOL)enabled {
+  webrtc::AudioEngineDevice *module = dynamic_cast<webrtc::AudioEngineDevice *>(_native.get());
+  if (module == nullptr) return NO;
+
+  return _workerThread->BlockingCall(
+      [module, enabled] { return module->SetManualRenderingMode(enabled) == 0; });
+}
+
+- (BOOL)isAdvancedDuckingEnabled {
+  webrtc::AudioEngineDevice *module = dynamic_cast<webrtc::AudioEngineDevice *>(_native.get());
+  if (module == nullptr) return NO;
+
+  return _workerThread->BlockingCall([module] {
+    bool value = false;
+    return module->AdvancedDucking(&value) == 0 ? value : NO;
+  });
+}
+
+- (void)setAdvancedDuckingEnabled:(BOOL)enabled {
+  webrtc::AudioEngineDevice *module = dynamic_cast<webrtc::AudioEngineDevice *>(_native.get());
+  if (module == nullptr) return;
+
+  _workerThread->BlockingCall(
+      [module, enabled] { return module->SetAdvancedDucking(enabled) == 0; });
+}
+
+- (AVAudioVoiceProcessingOtherAudioDuckingLevel)duckingLevel API_AVAILABLE(ios(17.0), macos(14.0),
+                                                                           visionos(1.0))
+    API_UNAVAILABLE(tvos) {
+  webrtc::AudioEngineDevice *module = dynamic_cast<webrtc::AudioEngineDevice *>(_native.get());
+  if (module == nullptr) return AVAudioVoiceProcessingOtherAudioDuckingLevelDefault;
+
+  return _workerThread->BlockingCall([module] {
+    long value = false;
+    return module->DuckingLevel(&value) == 0 ? (AVAudioVoiceProcessingOtherAudioDuckingLevel)value
+                                             : AVAudioVoiceProcessingOtherAudioDuckingLevelDefault;
+  });
+}
+
+- (void)setDuckingLevel:(AVAudioVoiceProcessingOtherAudioDuckingLevel)value
+    API_AVAILABLE(ios(17.0), macos(14.0), visionos(1.0)) {
+  webrtc::AudioEngineDevice *module = dynamic_cast<webrtc::AudioEngineDevice *>(_native.get());
+  if (module == nullptr) return;
+
+  _workerThread->BlockingCall([module, value] { return module->SetDuckingLevel(value) == 0; });
+}
+
 #pragma mark - Private
 
 - (NSArray<RTC_OBJC_TYPE(RTCIODevice) *> *)_outputDevices {
-
   char guid[webrtc::kAdmMaxGuidSize + 1] = {0};
   char name[webrtc::kAdmMaxDeviceNameSize + 1] = {0};
-  
+
   NSMutableArray *result = [NSMutableArray array];
 
   int16_t count = _native->PlayoutDevices();
@@ -309,8 +414,11 @@ - (BOOL)setSpeechActivityCallback:(nullable RTCSpeechActivityCallback)callback {
       _native->PlayoutDeviceName(i, name, guid);
       NSString *strGUID = [[NSString alloc] initWithCString:guid encoding:NSUTF8StringEncoding];
       NSString *strName = [[NSString alloc] initWithCString:name encoding:NSUTF8StringEncoding];
-      RTC_OBJC_TYPE(RTCIODevice) *device = [[RTC_OBJC_TYPE(RTCIODevice) alloc] initWithType:RTCIODeviceTypeOutput deviceId:strGUID name:strName];
-      [result addObject: device];
+      RTC_OBJC_TYPE(RTCIODevice) *device =
+          [[RTC_OBJC_TYPE(RTCIODevice) alloc] initWithType:RTCIODeviceTypeOutput
+                                                  deviceId:strGUID
+                                                      name:strName];
+      [result addObject:device];
     }
   }
 
@@ -318,10 +426,9 @@ - (BOOL)setSpeechActivityCallback:(nullable RTCSpeechActivityCallback)callback {
 }
 
 - (NSArray<RTC_OBJC_TYPE(RTCIODevice) *> *)_inputDevices {
-  
   char guid[webrtc::kAdmMaxGuidSize + 1] = {0};
   char name[webrtc::kAdmMaxDeviceNameSize + 1] = {0};
-  
+
   NSMutableArray *result = [NSMutableArray array];
 
   int16_t count = _native->RecordingDevices();
@@ -331,8 +438,11 @@ - (BOOL)setSpeechActivityCallback:(nullable RTCSpeechActivityCallback)callback {
       _native->RecordingDeviceName(i, name, guid);
       NSString *strGUID = [[NSString alloc] initWithCString:guid encoding:NSUTF8StringEncoding];
       NSString *strName = [[NSString alloc] initWithCString:name encoding:NSUTF8StringEncoding];
-      RTC_OBJC_TYPE(RTCIODevice) *device = [[RTC_OBJC_TYPE(RTCIODevice) alloc] initWithType:RTCIODeviceTypeInput deviceId:strGUID name:strName];
-      [result addObject: device];
+      RTC_OBJC_TYPE(RTCIODevice) *device =
+          [[RTC_OBJC_TYPE(RTCIODevice) alloc] initWithType:RTCIODeviceTypeInput
+                                                  deviceId:strGUID
+                                                      name:strName];
+      [result addObject:device];
     }
   }
 
diff --git a/sdk/objc/api/peerconnection/RTCPeerConnectionFactory+Native.h b/sdk/objc/api/peerconnection/RTCPeerConnectionFactory+Native.h
index cc45aba1ec..7c0484c7b9 100644
--- a/sdk/objc/api/peerconnection/RTCPeerConnectionFactory+Native.h
+++ b/sdk/objc/api/peerconnection/RTCPeerConnectionFactory+Native.h
@@ -48,7 +48,7 @@ NS_ASSUME_NONNULL_BEGIN
                         nativeVideoDecoderFactory:
                             (std::unique_ptr<webrtc::VideoDecoderFactory>)videoDecoderFactory
                                 audioDeviceModule:
-                                    (nullable webrtc::AudioDeviceModule *)audioDeviceModule
+                                    (rtc::scoped_refptr<webrtc::AudioDeviceModule>)audioDeviceModule
                             audioProcessingModule:
                                 (rtc::scoped_refptr<webrtc::AudioProcessing>)audioProcessingModule
                             bypassVoiceProcessing:(BOOL)bypassVoiceProcessing;
@@ -62,7 +62,7 @@ NS_ASSUME_NONNULL_BEGIN
                 (std::unique_ptr<webrtc::VideoEncoderFactory>)videoEncoderFactory
             nativeVideoDecoderFactory:
                 (std::unique_ptr<webrtc::VideoDecoderFactory>)videoDecoderFactory
-                    audioDeviceModule:(nullable webrtc::AudioDeviceModule *)audioDeviceModule
+                    audioDeviceModule:(rtc::scoped_refptr<webrtc::AudioDeviceModule>)audioDeviceModule
                 audioProcessingModule:
                     (rtc::scoped_refptr<webrtc::AudioProcessing>)audioProcessingModule
              networkControllerFactory:(std::unique_ptr<webrtc::NetworkControllerFactoryInterface>)
diff --git a/sdk/objc/api/peerconnection/RTCPeerConnectionFactory.mm b/sdk/objc/api/peerconnection/RTCPeerConnectionFactory.mm
index 72ac70a781..18a2467969 100644
--- a/sdk/objc/api/peerconnection/RTCPeerConnectionFactory.mm
+++ b/sdk/objc/api/peerconnection/RTCPeerConnectionFactory.mm
@@ -47,6 +47,7 @@
 #import "components/video_codec/RTCVideoEncoderFactoryH264.h"
 #include "media/base/media_constants.h"
 #include "modules/audio_device/include/audio_device.h"
+#include "modules/audio_device/audio_engine_device.h"
 #include "modules/audio_processing/include/audio_processing.h"
 
 #include "sdk/objc/native/api/objc_audio_device_module.h"
@@ -75,14 +76,6 @@ @implementation RTC_OBJC_TYPE (RTCPeerConnectionFactory) {
 @synthesize nativeFactory = _nativeFactory;
 @synthesize audioDeviceModule = _audioDeviceModule;
 
-- (rtc::scoped_refptr<webrtc::AudioDeviceModule>)createAudioDeviceModule:(BOOL)bypassVoiceProcessing {
-#if defined(WEBRTC_IOS)
-  return webrtc::CreateAudioDeviceModule(bypassVoiceProcessing);
-#else
-  return nullptr;
-#endif
-}
-
 - (instancetype)init {
   return [self
       initWithNativeAudioEncoderFactory:webrtc::CreateBuiltinAudioEncoderFactory()
@@ -91,7 +84,7 @@ - (instancetype)init {
                                             RTCVideoEncoderFactoryH264) alloc] init])
               nativeVideoDecoderFactory:webrtc::ObjCToNativeVideoDecoderFactory([[RTC_OBJC_TYPE(
                                             RTCVideoDecoderFactoryH264) alloc] init])
-                      audioDeviceModule:[self createAudioDeviceModule:NO].get()
+                      audioDeviceModule:nullptr
                   audioProcessingModule:nullptr
                   bypassVoiceProcessing:NO];
 }
@@ -119,15 +112,15 @@ - (instancetype)init {
   }
   rtc::scoped_refptr<webrtc::AudioDeviceModule> audio_device_module;
   if (audioDevice) {
+    // TODO: Should be created on worker thread ?
     audio_device_module = webrtc::CreateAudioDeviceModule(audioDevice);
-  } else {
-    audio_device_module = [self createAudioDeviceModule:NO];
   }
+
   return [self initWithNativeAudioEncoderFactory:webrtc::CreateBuiltinAudioEncoderFactory()
                        nativeAudioDecoderFactory:webrtc::CreateBuiltinAudioDecoderFactory()
                        nativeVideoEncoderFactory:std::move(native_encoder_factory)
                        nativeVideoDecoderFactory:std::move(native_decoder_factory)
-                               audioDeviceModule:audio_device_module.get()
+                               audioDeviceModule:audio_device_module
                            audioProcessingModule:nullptr
                            bypassVoiceProcessing:NO];
 #endif
@@ -164,7 +157,6 @@ - (instancetype)init {
   if (decoderFactory) {
     native_decoder_factory = webrtc::ObjCToNativeVideoDecoderFactory(decoderFactory);
   }
-  rtc::scoped_refptr<webrtc::AudioDeviceModule> audio_device_module = [self createAudioDeviceModule:bypassVoiceProcessing];
 
   if ([audioProcessingModule isKindOfClass:[RTC_OBJC_TYPE(RTCDefaultAudioProcessingModule) class]]) {
     _defaultAudioProcessingModule = (RTC_OBJC_TYPE(RTCDefaultAudioProcessingModule) *)audioProcessingModule;
@@ -172,13 +164,11 @@ - (instancetype)init {
     _defaultAudioProcessingModule = [[RTC_OBJC_TYPE(RTCDefaultAudioProcessingModule) alloc] init];
   }
 
-  NSLog(@"AudioProcessingModule: %@", _defaultAudioProcessingModule);
-  
   return [self initWithNativeAudioEncoderFactory:webrtc::CreateBuiltinAudioEncoderFactory()
                        nativeAudioDecoderFactory:webrtc::CreateBuiltinAudioDecoderFactory()
                        nativeVideoEncoderFactory:std::move(native_encoder_factory)
                        nativeVideoDecoderFactory:std::move(native_decoder_factory)
-                               audioDeviceModule:audio_device_module.get()
+                               audioDeviceModule:nullptr
                            audioProcessingModule:_defaultAudioProcessingModule.nativeAudioProcessingModule
                            bypassVoiceProcessing:bypassVoiceProcessing];
 #endif
@@ -227,7 +217,7 @@ - (instancetype)initWithNativeAudioEncoderFactory:
                             (std::unique_ptr<webrtc::VideoEncoderFactory>)videoEncoderFactory
                         nativeVideoDecoderFactory:
                             (std::unique_ptr<webrtc::VideoDecoderFactory>)videoDecoderFactory
-                                audioDeviceModule:(webrtc::AudioDeviceModule *)audioDeviceModule
+                                audioDeviceModule:(rtc::scoped_refptr<webrtc::AudioDeviceModule>)audioDeviceModule
                             audioProcessingModule:
                                 (rtc::scoped_refptr<webrtc::AudioProcessing>)audioProcessingModule
                             bypassVoiceProcessing:(BOOL)bypassVoiceProcessing {
@@ -248,7 +238,7 @@ - (instancetype)initWithNativeAudioEncoderFactory:
                             (std::unique_ptr<webrtc::VideoEncoderFactory>)videoEncoderFactory
                         nativeVideoDecoderFactory:
                             (std::unique_ptr<webrtc::VideoDecoderFactory>)videoDecoderFactory
-                                audioDeviceModule:(webrtc::AudioDeviceModule *)audioDeviceModule
+                                audioDeviceModule:(rtc::scoped_refptr<webrtc::AudioDeviceModule>)audioDeviceModule
                             audioProcessingModule:
                                 (rtc::scoped_refptr<webrtc::AudioProcessing>)audioProcessingModule
                          networkControllerFactory:
@@ -266,15 +256,13 @@ - (instancetype)initWithNativeAudioEncoderFactory:
     dependencies.trials = std::make_unique<webrtc::FieldTrialBasedConfig>();
     dependencies.task_queue_factory =
         webrtc::CreateDefaultTaskQueueFactory(dependencies.trials.get());
-   
-    if(audioDeviceModule) {
-      _nativeAudioDeviceModule = std::move(audioDeviceModule);
+
+    if (audioDeviceModule != nullptr) {
+      _nativeAudioDeviceModule = audioDeviceModule;
     } else {
       // always create ADM on worker thread
-      _nativeAudioDeviceModule = _workerThread->BlockingCall([&dependencies, &bypassVoiceProcessing]() {
-        return webrtc::AudioDeviceModule::Create(webrtc::AudioDeviceModule::AudioLayer::kPlatformDefaultAudio,
-                                                dependencies.task_queue_factory.get(),
-                                                bypassVoiceProcessing == YES);
+      _nativeAudioDeviceModule = _workerThread->BlockingCall([&bypassVoiceProcessing]() {
+        return rtc::make_ref_counted<webrtc::AudioEngineDevice>(bypassVoiceProcessing == YES);
       });
     }
 
diff --git a/sdk/objc/api/peerconnection/RTCPeerConnectionFactoryBuilder.mm b/sdk/objc/api/peerconnection/RTCPeerConnectionFactoryBuilder.mm
index 4cb12b0a59..59a72823f8 100644
--- a/sdk/objc/api/peerconnection/RTCPeerConnectionFactoryBuilder.mm
+++ b/sdk/objc/api/peerconnection/RTCPeerConnectionFactoryBuilder.mm
@@ -38,7 +38,7 @@ @implementation RTC_OBJC_TYPE(RTCPeerConnectionFactoryBuilder) {
                           nativeAudioDecoderFactory:_audioDecoderFactory
                           nativeVideoEncoderFactory:std::move(_videoEncoderFactory)
                           nativeVideoDecoderFactory:std::move(_videoDecoderFactory)
-                                  audioDeviceModule:_audioDeviceModule.get()
+                                  audioDeviceModule:_audioDeviceModule
                               audioProcessingModule:_audioProcessingModule
                               bypassVoiceProcessing:NO];
 }
diff --git a/sdk/objc/components/audio/RTCAudioSession+Private.h b/sdk/objc/components/audio/RTCAudioSession+Private.h
index 8a3c52e5b7..2be1b9fb3d 100644
--- a/sdk/objc/components/audio/RTCAudioSession+Private.h
+++ b/sdk/objc/components/audio/RTCAudioSession+Private.h
@@ -90,9 +90,6 @@ NS_ASSUME_NONNULL_BEGIN
 - (void)notifyDidStartPlayOrRecord;
 - (void)notifyDidStopPlayOrRecord;
 
-- (void)notifyAudioEngineWillUpdateStateWithOutputEnabled:(BOOL)isOutputEnabled
-                                             inputEnabled:(BOOL)isInputEnabled;
-
 @end
 
 NS_ASSUME_NONNULL_END
diff --git a/sdk/objc/components/audio/RTCAudioSession.h b/sdk/objc/components/audio/RTCAudioSession.h
index 8b34f9299c..2730664858 100644
--- a/sdk/objc/components/audio/RTCAudioSession.h
+++ b/sdk/objc/components/audio/RTCAudioSession.h
@@ -102,10 +102,6 @@ RTC_OBJC_EXPORT
 - (void)audioSession:(RTC_OBJC_TYPE(RTCAudioSession) *)audioSession
     audioUnitStartFailedWithError:(NSError *)error;
 
-- (void)audioSession:(RTC_OBJC_TYPE(RTCAudioSession) *)audioSession
-    audioEngineWillUpdateStateWithOutputEnabled:(BOOL)isOutputEnabled
-                                   inputEnabled:(BOOL)isInputEnabled;
-
 @end
 
 /** This is a protocol used to inform RTCAudioSession when the audio session
diff --git a/sdk/objc/components/audio/RTCAudioSession.mm b/sdk/objc/components/audio/RTCAudioSession.mm
index a914ce2f18..11d1a1c337 100644
--- a/sdk/objc/components/audio/RTCAudioSession.mm
+++ b/sdk/objc/components/audio/RTCAudioSession.mm
@@ -1010,16 +1010,4 @@ - (void)notifyFailedToSetActive:(BOOL)active error:(NSError *)error {
   }
 }
 
-- (void)notifyAudioEngineWillUpdateStateWithOutputEnabled:(BOOL)isOutputEnabled
-                                             inputEnabled:(BOOL)isInputEnabled {
-  for (auto delegate : self.delegates) {
-    SEL sel = @selector(audioSession:audioEngineWillUpdateStateWithOutputEnabled:inputEnabled:);
-    if ([delegate respondsToSelector:sel]) {
-      [delegate audioSession:self
-          audioEngineWillUpdateStateWithOutputEnabled:isOutputEnabled
-                                         inputEnabled:isInputEnabled];
-    }
-  }
-}
-
 @end
diff --git a/sdk/objc/native/src/audio/audio_device_module_ios.h b/sdk/objc/native/src/audio/audio_device_module_ios.h
index 349e4e840d..2f175d0e10 100644
--- a/sdk/objc/native/src/audio/audio_device_module_ios.h
+++ b/sdk/objc/native/src/audio/audio_device_module_ios.h
@@ -14,8 +14,8 @@
 #include <memory>
 
 #include "api/task_queue/task_queue_factory.h"
+#include "audio_device_ios.h"
 #include "modules/audio_device/audio_device_buffer.h"
-#include "modules/audio_device/audio_engine_device.h"
 #include "modules/audio_device/include/audio_device.h"
 #include "rtc_base/checks.h"
 
@@ -130,13 +130,13 @@ class AudioDeviceModuleIOS : public AudioDeviceModule {
   int GetRecordAudioParameters(AudioParameters* params) const override;
 #endif  // WEBRTC_IOS
 
-  int32_t SetObserver(AudioDeviceObserver* observer) const override;
+  int32_t SetObserver(AudioDeviceObserver* observer) override;
 
  private:
   const bool bypass_voice_processing_;
   bool initialized_ = false;
   const std::unique_ptr<TaskQueueFactory> task_queue_factory_;
-  std::unique_ptr<AudioEngineDevice> audio_device_;
+  std::unique_ptr<AudioDeviceIOS> audio_device_;
   std::unique_ptr<AudioDeviceBuffer> audio_device_buffer_;
 };
 }  // namespace ios_adm
diff --git a/sdk/objc/native/src/audio/audio_device_module_ios.mm b/sdk/objc/native/src/audio/audio_device_module_ios.mm
index d178508b2b..9b8598632b 100644
--- a/sdk/objc/native/src/audio/audio_device_module_ios.mm
+++ b/sdk/objc/native/src/audio/audio_device_module_ios.mm
@@ -19,7 +19,7 @@
 #include "system_wrappers/include/metrics.h"
 
 #if defined(WEBRTC_IOS)
-#include "modules/audio_device/audio_engine_device.h"
+#include "audio_device_ios.h"
 #endif
 
 #define CHECKinitialized_() \
@@ -72,7 +72,7 @@
       return 0;
 
     audio_device_buffer_.reset(new webrtc::AudioDeviceBuffer(task_queue_factory_.get()));
-    audio_device_.reset(new AudioEngineDevice(bypass_voice_processing_));
+    audio_device_.reset(new ios_adm::AudioDeviceIOS(bypass_voice_processing_));
     RTC_CHECK(audio_device_);
 
     this->AttachAudioBuffer();
@@ -666,7 +666,7 @@
   }
 #endif  // WEBRTC_IOS
 
-  int32_t AudioDeviceModuleIOS::SetObserver(AudioDeviceObserver* observer) const {
+  int32_t AudioDeviceModuleIOS::SetObserver(AudioDeviceObserver* observer) {
     RTC_DLOG(LS_INFO) << __FUNCTION__;
     int r = audio_device_->SetObserver(observer);
     RTC_DLOG(LS_INFO) << "output: " << r;

From 235da976a8889c991895dbfb7630eca43d12fc6b Mon Sep 17 00:00:00 2001
From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com>
Date: Tue, 14 Jan 2025 15:52:32 +0900
Subject: [PATCH 13/15] Squashed recent progress

---
 modules/audio_device/audio_engine_device.h    |  94 +++-
 modules/audio_device/audio_engine_device.mm   | 409 ++++++++----------
 modules/audio_device/include/audio_device.h   |  20 +-
 .../api/peerconnection/RTCAudioDeviceModule.h |  21 +-
 .../peerconnection/RTCAudioDeviceModule.mm    | 140 +++++-
 5 files changed, 418 insertions(+), 266 deletions(-)

diff --git a/modules/audio_device/audio_engine_device.h b/modules/audio_device/audio_engine_device.h
index 0b95aafcfe..48e4a87b64 100644
--- a/modules/audio_device/audio_engine_device.h
+++ b/modules/audio_device/audio_engine_device.h
@@ -17,9 +17,36 @@
 #ifndef SDK_OBJC_NATIVE_SRC_AUDIO_AUDIO_DEVICE_AUDIOENGINE_H_
 #define SDK_OBJC_NATIVE_SRC_AUDIO_AUDIO_DEVICE_AUDIOENGINE_H_
 
+#include <atomic>
+
+#if defined(__OBJC__)
 #import <AVFAudio/AVFAudio.h>
+#else
+// Forward declarations for C++ code
+#ifdef __OBJC__
+@class AVAudioEngine;
+@class AVAudioInputNode;
+@class AVAudioOutputNode;
+@class AVAudioSourceNode;
+@class AVAudioSinkNode;
+@class AVAudioMixerNode;
+@class AVAudioPCMBuffer;
+@class AVAudioFormat;
+typedef void (^AVAudioEngineManualRenderingBlock)(AVAudioFrameCount,
+                                                  AudioBufferList*, OSStatus*);
+#else
+typedef void AVAudioEngine;
+typedef void AVAudioInputNode;
+typedef void AVAudioOutputNode;
+typedef void AVAudioSourceNode;
+typedef void AVAudioSinkNode;
+typedef void AVAudioMixerNode;
+typedef void AVAudioPCMBuffer;
+typedef void AVAudioFormat;
+typedef void* AVAudioEngineManualRenderingBlock;
+#endif
+#endif
 
-#include <atomic>
 #include <memory>
 
 #include "api/scoped_refptr.h"
@@ -147,29 +174,73 @@ class AudioEngineDevice : public AudioDeviceModule,
 
   int32_t InitAndStartRecording();
 
+  enum RenderMode { Device, Manual };
+
  private:
+  // Represents the state of the audio engine, including input/output status,
+  // rendering mode, and various configuration flags.
   struct EngineState {
     bool input_enabled = false;
     bool input_running = false;
     bool output_enabled = false;
     bool output_running = false;
 
+    // Output will be enabled when input is enabled
+    bool input_follow_mode = true;
+
     bool input_muted = false;
     bool is_interrupted = false;
 
-    bool is_manual_mode = false;
+    RenderMode render_mode = RenderMode::Device;
     bool voice_processing = true;
     bool advanced_ducking = true;
-    long ducking_level = 0; // 0 = Default
-
-    bool operator==(const EngineState& rhs) const;
-    bool operator!=(const EngineState& rhs) const;
+    long ducking_level = 0;  // 0 = Default
+
+    bool operator==(const EngineState& rhs) const {
+      return input_enabled == rhs.input_enabled &&
+             input_running == rhs.input_running &&
+             output_enabled == rhs.output_enabled &&
+             output_running == rhs.output_running &&
+             input_follow_mode == rhs.input_follow_mode &&
+             input_muted == rhs.input_muted &&
+             is_interrupted == rhs.is_interrupted &&
+             render_mode == rhs.render_mode &&
+             voice_processing == rhs.voice_processing &&
+             advanced_ducking == rhs.advanced_ducking &&
+             ducking_level == rhs.ducking_level;
+    }
+
+    bool operator!=(const EngineState& rhs) const { return !(*this == rhs); }
+
+    bool IsOutputInputLinked() const {
+      return input_follow_mode && voice_processing;
+    }
+
+    bool IsOutputEnabled() const {
+      return IsOutputInputLinked() ? input_enabled || output_enabled
+                                   : output_enabled;
+    }
+
+    bool IsOutputRunning() const {
+      return IsOutputInputLinked() ? input_running || output_running
+                                   : output_running;
+    }
+
+    bool IsInputEnabled() const { return input_enabled; }
+    bool IsInputRunning() const { return input_running; }
 
     bool IsAnyEnabled() const { return input_enabled || output_enabled; }
     bool IsAnyRunning() const { return input_running || output_running; }
 
-    bool IsAllEnabled() const { return input_enabled && output_enabled; }
-    bool IsAllRunning() const { return input_running && output_running; }
+    bool IsAllEnabled() const {
+      return IsOutputInputLinked() ? input_enabled
+                                   : input_enabled && output_enabled;
+    }
+
+    bool IsAllRunning() const {
+      return IsOutputInputLinked() ? input_running
+                                   : input_running && output_running;
+    }
   };
 
   EngineState engine_state_ RTC_GUARDED_BY(thread_);
@@ -222,15 +293,16 @@ class AudioEngineDevice : public AudioDeviceModule,
   double machTickUnitsToNanoseconds_;
 
   // AVAudioEngine objects
-  AVAudioEngine* audio_engine_;
-  AVAudioFormat* manual_render_rtc_format_;     // Int16
+  AVAudioEngine* engine_device_;
+
+  // Used for manual rendering mode
+  AVAudioFormat* manual_render_rtc_format_;  // Int16
 
   // Output related
   AVAudioSourceNode* source_node_;
 
   // Input related nodes
   AVAudioSinkNode* sink_node_;
-  AVAudioUnitEQ* input_eq_node_;
   AVAudioMixerNode* input_mixer_node_;
 
   void* configuration_observer_;
diff --git a/modules/audio_device/audio_engine_device.mm b/modules/audio_device/audio_engine_device.mm
index c48976a45f..f48a155721 100644
--- a/modules/audio_device/audio_engine_device.mm
+++ b/modules/audio_device/audio_engine_device.mm
@@ -74,7 +74,7 @@
   NSNotificationCenter* center = [NSNotificationCenter defaultCenter];
   configuration_observer_ = (__bridge_retained void*)[center
       addObserverForName:AVAudioEngineConfigurationChangeNotification
-                  object:audio_engine_
+                  object:engine_device_
                    queue:nil
               usingBlock:^(NSNotification* notification) {
                 OnEngineConfigurationChange();
@@ -156,14 +156,14 @@
   LOGI() << "PlayoutIsInitialized";
   RTC_DCHECK_RUN_ON(thread_);
 
-  return engine_state_.output_enabled;
+  return engine_state_.IsOutputEnabled();
 }
 
 bool AudioEngineDevice::Playing() const {
   LOGI() << "Playing";
   RTC_DCHECK_RUN_ON(thread_);
 
-  return engine_state_.output_running;
+  return engine_state_.IsOutputRunning();
 }
 
 int32_t AudioEngineDevice::InitPlayout() {
@@ -171,11 +171,6 @@
   RTC_DCHECK_RUN_ON(thread_);
   RTC_DCHECK(initialized_);
 
-  if (engine_state_.output_enabled) {
-    LOGW() << "InitPlayout: Already initialized";
-    return 0;
-  }
-
   SetEngineState([](EngineState state) -> EngineState {
     state.output_enabled = true;
     return state;
@@ -187,17 +182,6 @@
 int32_t AudioEngineDevice::StartPlayout() {
   LOGI() << "StartPlayout";
   RTC_DCHECK_RUN_ON(thread_);
-  RTC_DCHECK(engine_state_.output_enabled);
-
-  if (!engine_state_.output_enabled) {
-    LOGW() << "StartPlayout: Not initialized";
-    return -1;
-  }
-
-  if (engine_state_.output_running) {
-    LOGW() << "StartPlayout: Already playing";
-    return 0;
-  }
 
   SetEngineState([](EngineState state) -> EngineState {
     state.output_running = true;
@@ -211,24 +195,12 @@
   LOGI() << "StopPlayout";
   RTC_DCHECK_RUN_ON(thread_);
 
-  if (!engine_state_.output_enabled) {
-    LOGW() << "StopPlayout: Not initialized";
-    return -1;
-  }
-
-  if (!engine_state_.output_running) {
-    LOGW() << "StopPlayout: Already stopped";
-    return 0;
-  }
-
   SetEngineState([](EngineState state) -> EngineState {
     state.output_enabled = false;
     state.output_running = false;
     return state;
   });
 
-  audio_device_buffer_->StopPlayout();
-
   return 0;
 }
 
@@ -239,7 +211,7 @@
   LOGI() << "RecordingIsInitialized";
   RTC_DCHECK_RUN_ON(thread_);
 
-  return engine_state_.input_enabled;
+  return engine_state_.IsInputEnabled();
 }
 
 bool AudioEngineDevice::Recording() const {
@@ -254,13 +226,7 @@
   RTC_DCHECK_RUN_ON(thread_);
   RTC_DCHECK(initialized_);
 
-  if (engine_state_.input_enabled) {
-    LOGW() << "InitRecording: Already initialized";
-    return 0;
-  }
-
   SetEngineState([](EngineState state) -> EngineState {
-    state.output_enabled = true;
     state.input_enabled = true;
     state.input_muted = true;  // Muted by default
     return state;
@@ -272,17 +238,6 @@
 int32_t AudioEngineDevice::StartRecording() {
   LOGI() << "StartRecording";
   RTC_DCHECK_RUN_ON(thread_);
-  RTC_DCHECK(engine_state_.input_enabled);
-
-  if (!engine_state_.input_enabled) {
-    LOGW() << "StartRecording: Not initialized";
-    return -1;
-  }
-
-  if (engine_state_.input_running) {
-    LOGW() << "StartRecording: Already recording";
-    return 0;
-  }
 
   SetEngineState([](EngineState state) -> EngineState {
     state.input_running = true;
@@ -297,24 +252,12 @@
   LOGI() << "StopRecording";
   RTC_DCHECK_RUN_ON(thread_);
 
-  if (!engine_state_.input_enabled) {
-    LOGW() << "StopRecording: Not initialized";
-    return -1;
-  }
-
-  if (!engine_state_.input_running) {
-    LOGW() << "StopRecording: Already stopped";
-    return 0;
-  }
-
   SetEngineState([](EngineState state) -> EngineState {
     state.input_enabled = false;
     state.input_running = false;
     return state;
   });
 
-  audio_device_buffer_->StopRecording();
-
   return 0;
 }
 
@@ -752,7 +695,7 @@
     return -1;
   }
 
-  *enabled = engine_state_.is_manual_mode;
+  *enabled = engine_state_.render_mode == RenderMode::Manual;
 
   return 0;
 }
@@ -762,7 +705,7 @@
   LOGI() << "SetManualRenderingMode: " << enable;
 
   SetEngineState([enable](EngineState state) -> EngineState {
-    state.is_manual_mode = enable;
+    state.render_mode = enable ? RenderMode::Manual : RenderMode::Device;
     return state;
   });
 
@@ -785,8 +728,6 @@
   }
 
   SetEngineState([](EngineState state) -> EngineState {
-    state.output_enabled = true;
-    state.output_running = true;
     state.input_enabled = true;
     state.input_running = true;
     state.input_muted = false;  // Always unmute
@@ -885,12 +826,12 @@
   }
 
   // Checks
-  if (new_state.input_running) {
-    RTC_DCHECK(new_state.input_enabled);
+  if (new_state.IsInputRunning()) {
+    RTC_DCHECK(new_state.IsInputEnabled());
   }
 
-  if (new_state.output_running) {
-    RTC_DCHECK(new_state.output_enabled);
+  if (new_state.IsOutputRunning()) {
+    RTC_DCHECK(new_state.IsOutputEnabled());
   }
 
   engine_state_ = new_state;
@@ -900,65 +841,60 @@
 void AudioEngineDevice::UpdateEngineState(EngineState old_state, EngineState new_state) {
   RTC_DCHECK_RUN_ON(thread_);
 
-  // Playout or Recording enabled, create an engine instance.
-  bool is_new_engine = !old_state.IsAnyEnabled() && new_state.IsAnyEnabled();
-  // Playout or Recording not enabled, destroy engine instance.
-  bool is_release_engine = old_state.IsAnyEnabled() && !new_state.IsAnyEnabled();
+  bool is_restart_required = (old_state.IsInputEnabled() != new_state.IsInputEnabled()) ||
+                             (old_state.IsOutputEnabled() != new_state.IsOutputEnabled());
 
-  bool is_restart_required = (old_state.input_enabled != new_state.input_enabled) ||
-                             (old_state.output_enabled != new_state.output_enabled);
-
-  if (is_new_engine) {
+  if (!old_state.IsAnyEnabled() && new_state.IsAnyEnabled()) {
     LOGI() << "Creating AVAudioEngine...";
-    audio_engine_ = [[AVAudioEngine alloc] init];
+    engine_device_ = [[AVAudioEngine alloc] init];
+
+    if (observer_ != nullptr) {
+      observer_->OnEngineDidCreate(engine_device_);
+    }
   }
 
-  if (old_state.IsAnyRunning()) {
-    if (!new_state.IsAnyRunning() || is_restart_required) {
-      LOGI() << "Stopping AVAudioEngine...";
-      [audio_engine_ stop];
-    } else if (!old_state.is_interrupted && new_state.is_interrupted) {
-      LOGI() << "Pausing AVAudioEngine...";
-      [audio_engine_ pause];
+  if (old_state.IsAnyRunning() && (!new_state.IsAnyRunning() || is_restart_required)) {
+    LOGI() << "Stopping AVAudioEngine...";
+    [engine_device_ stop];
+  } else if (old_state.IsAnyRunning() && !old_state.is_interrupted && new_state.is_interrupted) {
+    LOGI() << "Pausing AVAudioEngine...";
+    [engine_device_ pause];
+  }
+
+  if (old_state.IsAnyRunning() && (!new_state.IsAnyRunning() || is_restart_required ||
+                                   (!old_state.is_interrupted && new_state.is_interrupted))) {
+    if (observer_ != nullptr) {
+      observer_->OnEngineDidStop(engine_device_, new_state.IsOutputEnabled(),
+                                 new_state.IsInputEnabled());
     }
+  }
 
-    if (!new_state.IsAnyRunning() || is_restart_required ||
-        (!old_state.is_interrupted && new_state.is_interrupted)) {
-      if (old_state.output_running && !new_state.output_running) {
-        LOGI() << "Stopping Playout buffer...";
-        audio_device_buffer_->StopPlayout();
-      }
-      if (old_state.input_running && !new_state.input_running) {
-        LOGI() << "Stopping Record buffer...";
-        audio_device_buffer_->StopRecording();
-      }
+  if (old_state.IsAnyRunning() && (!new_state.IsAnyRunning() || is_restart_required ||
+                                   (!old_state.is_interrupted && new_state.is_interrupted))) {
+    if (old_state.IsOutputRunning() && !new_state.IsOutputRunning()) {
+      LOGI() << "Stopping Playout buffer...";
+      audio_device_buffer_->StopPlayout();
+    }
+    if (old_state.IsInputRunning() && !new_state.IsInputRunning()) {
+      LOGI() << "Stopping Record buffer...";
+      audio_device_buffer_->StopRecording();
     }
   }
 
-  if ((!old_state.output_enabled && new_state.output_enabled) ||
-      (!old_state.input_enabled && new_state.input_enabled)) {
+  if ((!old_state.IsOutputEnabled() && new_state.IsOutputEnabled()) ||
+      (!old_state.IsInputEnabled() && new_state.IsInputEnabled())) {
     if (observer_ != nullptr) {
       // Invoke here before configuring nodes. In iOS, session configuration is required before
       // enabling AGC, muted talker etc.
-      observer_->OnEngineWillStart(audio_engine_, new_state.output_enabled,
-                                   new_state.input_enabled);
+      observer_->OnEngineWillEnable(engine_device_, new_state.IsOutputEnabled(),
+                                    new_state.IsInputEnabled());
     }
   }
 
-  if (!old_state.output_enabled && new_state.output_enabled) {
+  if (!old_state.IsOutputEnabled() && new_state.IsOutputEnabled()) {
     LOGI() << "Enabling output for AVAudioEngine...";
-    RTC_DCHECK(!audio_engine_.running);
-
-    // Turning voice processing on outputNode, will turn on for inputNode also and mic indicator
-    // goes on. if (!audio_engine_.outputNode.voiceProcessingEnabled) {
-    //   NSError* error = nil;
-    //   BOOL set_vp_result = [audio_engine_.outputNode setVoiceProcessingEnabled:YES error:&error];
-    //   if (!set_vp_result) {
-    //     NSLog(@"setVoiceProcessingEnabled error: %@", error.localizedDescription);
-    //     RTC_DCHECK(set_vp_result);
-    //   }
-    //   LOGI() << "setVoiceProcessingEnabled (output) result: " << set_vp_result ? "YES" : "NO";
-    // }
+    RTC_DCHECK(!engine_device_.running);
+
     AVAudioFormat* output_node_format = [this->OutputNode() outputFormatForBus:0];
 
     LOGI() << "Output format sampleRate: " << output_node_format.sampleRate
@@ -997,37 +933,37 @@
 
     source_node_ = [[AVAudioSourceNode alloc] initWithFormat:rtc_output_format
                                                  renderBlock:source_block];
-    [audio_engine_ attachNode:source_node_];
+    [engine_device_ attachNode:source_node_];
+
+    [engine_device_ connect:source_node_
+                         to:engine_device_.mainMixerNode
+                     format:engine_output_format];
 
     if (!(this->observer_ != nullptr &&
-          this->observer_->OnEngineWillConnectOutput(
-              audio_engine_, source_node_, audio_engine_.mainMixerNode, engine_output_format))) {
+          this->observer_->OnEngineWillConnectOutput(engine_device_, engine_device_.mainMixerNode,
+                                                     this->OutputNode(), engine_output_format))) {
       // Default implementation.
-      [audio_engine_ connect:source_node_
-                          to:audio_engine_.mainMixerNode
-                      format:engine_output_format];
+      [engine_device_ connect:engine_device_.mainMixerNode
+                           to:this->OutputNode()
+                       format:engine_output_format];
     }
 
-    [audio_engine_ connect:audio_engine_.mainMixerNode
-                        to:this->OutputNode()
-                    format:engine_output_format];
-
-  } else if (old_state.output_enabled && !new_state.output_enabled) {
+  } else if (old_state.IsOutputEnabled() && !new_state.IsOutputEnabled()) {
     LOGI() << "Disabling output for AVAudioEngine...";
-    RTC_DCHECK(!audio_engine_.running);
+    RTC_DCHECK(!engine_device_.running);
 
     // Disconnect
     if (source_node_ != nil) {
-      [audio_engine_ disconnectNodeInput:source_node_];
-      [audio_engine_ disconnectNodeOutput:source_node_];
-      [audio_engine_ detachNode:source_node_];
+      [engine_device_ disconnectNodeInput:source_node_];
+      [engine_device_ disconnectNodeOutput:source_node_];
+      [engine_device_ detachNode:source_node_];
       source_node_ = nil;
     }
   }
 
-  if (!old_state.input_enabled && new_state.input_enabled) {
+  if (!old_state.IsInputEnabled() && new_state.IsInputEnabled()) {
     LOGI() << "Enabling input for AVAudioEngine...";
-    RTC_DCHECK(!audio_engine_.running);
+    RTC_DCHECK(!engine_device_.running);
 
     if (!this->InputNode().voiceProcessingEnabled) {
       NSError* error = nil;
@@ -1075,11 +1011,8 @@
       }
     }
 
-    input_eq_node_ = [[AVAudioUnitEQ alloc] initWithNumberOfBands:2];
-    [audio_engine_ attachNode:input_eq_node_];
-
     input_mixer_node_ = [[AVAudioMixerNode alloc] init];
-    [audio_engine_ attachNode:input_mixer_node_];
+    [engine_device_ attachNode:input_mixer_node_];
 
     AVAudioFormat* input_node_format = [this->InputNode() outputFormatForBus:0];
     // Example formats:
@@ -1122,48 +1055,48 @@
     };
 
     if (!(observer_ != nullptr &&
-          observer_->OnEngineWillConnectInput(audio_engine_, this->InputNode(), input_mixer_node_,
+          observer_->OnEngineWillConnectInput(engine_device_, this->InputNode(), input_mixer_node_,
                                               engine_input_format))) {
       // Default implementation.
-      [audio_engine_ connect:this->InputNode() to:input_mixer_node_ format:engine_input_format];
+      [engine_device_ connect:this->InputNode() to:input_mixer_node_ format:engine_input_format];
     }
 
     sink_node_ = [[AVAudioSinkNode alloc] initWithReceiverBlock:sink_block];
-    [audio_engine_ attachNode:sink_node_];
+    [engine_device_ attachNode:sink_node_];
 
     // Convert to RTC's internal format before passing buffers to SinkNode.
-    [audio_engine_ connect:input_mixer_node_ to:sink_node_ format:rtc_input_format];
+    [engine_device_ connect:input_mixer_node_ to:sink_node_ format:rtc_input_format];
 
-  } else if (old_state.input_enabled && !new_state.input_enabled) {
+  } else if (old_state.IsInputEnabled() && !new_state.IsInputEnabled()) {
     LOGI() << "Disabling input for AVAudioEngine...";
-    RTC_DCHECK(!audio_engine_.running);
-
-    // Disconnect input eq
-    if (input_eq_node_ != nil) {
-      [audio_engine_ disconnectNodeInput:input_eq_node_];
-      [audio_engine_ disconnectNodeOutput:input_eq_node_];
-      [audio_engine_ detachNode:input_eq_node_];
-      input_eq_node_ = nil;
-    }
+    RTC_DCHECK(!engine_device_.running);
 
     // InputMixerNode
     if (input_mixer_node_ != nil) {
-      [audio_engine_ disconnectNodeInput:input_mixer_node_];
-      [audio_engine_ disconnectNodeOutput:input_mixer_node_];
-      [audio_engine_ detachNode:input_mixer_node_];
+      [engine_device_ disconnectNodeInput:input_mixer_node_];
+      [engine_device_ disconnectNodeOutput:input_mixer_node_];
+      [engine_device_ detachNode:input_mixer_node_];
       input_mixer_node_ = nil;
     }
 
     // SinkNode
     if (sink_node_ != nil) {
-      [audio_engine_ disconnectNodeInput:sink_node_];
-      [audio_engine_ disconnectNodeOutput:sink_node_];
-      [audio_engine_ detachNode:sink_node_];
+      [engine_device_ disconnectNodeInput:sink_node_];
+      [engine_device_ disconnectNodeOutput:sink_node_];
+      [engine_device_ detachNode:sink_node_];
       sink_node_ = nil;
     }
   }
 
-  if (new_state.input_enabled) {
+  if ((old_state.IsOutputEnabled() && !new_state.IsOutputEnabled()) ||
+      (old_state.IsInputEnabled() && !new_state.IsInputEnabled())) {
+    if (observer_ != nullptr) {
+      observer_->OnEngineDidDisable(engine_device_, new_state.IsOutputEnabled(),
+                                    new_state.IsInputEnabled());
+    }
+  }
+
+  if (new_state.IsInputEnabled()) {
     if (this->InputNode().voiceProcessingEnabled) {
       // Re-apply muted state.
       this->InputNode().voiceProcessingInputMuted = new_state.input_muted;
@@ -1171,9 +1104,9 @@
   }
 
 #if !TARGET_OS_TV
-  if (new_state.input_enabled && this->InputNode().voiceProcessingEnabled &&
-      (!old_state.input_enabled || (old_state.advanced_ducking != new_state.advanced_ducking ||
-                                    old_state.ducking_level != new_state.ducking_level))) {
+  if (new_state.IsInputEnabled() && this->InputNode().voiceProcessingEnabled &&
+      (!old_state.IsInputEnabled() || (old_state.advanced_ducking != new_state.advanced_ducking ||
+                                       old_state.ducking_level != new_state.ducking_level))) {
     // Other audio ducking.
     // iOS 17.0+, iPadOS 17.0+, Mac Catalyst 17.0+, macOS 14.0+, visionOS 1.0+
     if (@available(iOS 17.0, macCatalyst 17.0, macOS 14.0, visionOS 1.0, *)) {
@@ -1188,15 +1121,16 @@
   }
 #endif
 
-  if ((!old_state.output_running && new_state.output_running && !new_state.input_running) ||
-      (!old_state.output_enabled && new_state.output_enabled && new_state.input_running)) {
+  if ((!old_state.IsOutputRunning() && new_state.IsOutputRunning() &&
+       !new_state.IsInputRunning()) ||
+      (!old_state.IsOutputEnabled() && new_state.IsOutputEnabled() && new_state.IsInputRunning())) {
     LOGI() << "Starting Playout buffer...";
     audio_device_buffer_->StartPlayout();
     fine_audio_buffer_->ResetPlayout();
   }
 
-  if ((!old_state.input_running && new_state.input_running && !new_state.output_running) ||
-      (!old_state.input_enabled && new_state.input_enabled && new_state.output_running)) {
+  if ((!old_state.IsInputRunning() && new_state.IsInputRunning() && !new_state.IsOutputRunning()) ||
+      (!old_state.IsInputEnabled() && new_state.IsInputEnabled() && new_state.IsOutputRunning())) {
     LOGI() << "Starting Record buffer...";
     audio_device_buffer_->StartRecording();
     fine_audio_buffer_->ResetRecord();
@@ -1205,9 +1139,14 @@
   if (new_state.IsAnyRunning()) {
     if (!old_state.IsAnyRunning() || (old_state.is_interrupted && !new_state.is_interrupted) ||
         is_restart_required) {
+      if (observer_ != nullptr) {
+        observer_->OnEngineWillStart(engine_device_, new_state.IsOutputEnabled(),
+                                     new_state.IsInputEnabled());
+      }
+
       LOGI() << "Starting AVAudioEngine...";
       NSError* error = nil;
-      BOOL start_result = [audio_engine_ startAndReturnError:&error];
+      BOOL start_result = [engine_device_ startAndReturnError:&error];
       if (!start_result) {
         LOGE() << "Failed to start engine: " << error.localizedDescription.UTF8String;
         DebugAudioEngine();
@@ -1215,9 +1154,12 @@
     }
   }
 
-  if (is_release_engine) {
+  if (old_state.IsAnyEnabled() && !new_state.IsAnyEnabled()) {
+    if (observer_ != nullptr) {
+      observer_->OnEngineWillRelease(engine_device_);
+    }
     LOGI() << "Releasing AVAudioEngine...";
-    audio_engine_ = nil;
+    engine_device_ = nil;
   }
 }
 
@@ -1229,115 +1171,108 @@
 
   // Constants for timing and frame management
   const double sample_rate = manual_render_rtc_format_.sampleRate;
-  // Fixed number of frames to render per cycle
-  const double target_frame_count = sample_rate / 100;
+  const double target_frame_count = sample_rate / 100;  // 10ms chunks
   const double nanoseconds_per_frame = 1e9 / sample_rate;
   const double target_cycle_time_ns = target_frame_count * nanoseconds_per_frame;
 
-  // Variables for timing management
+  // Timing management with exponential moving average
   uint64_t last_cycle_time = mach_absolute_time();
-  double sleep_time_ms = 5.0;  // Initial sleep time
-  const double min_sleep_time_ms = 1.0;
-  const double max_sleep_time_ms = 20.0;
+  double sleep_time_ms = 5.0;
+  const double min_sleep_time_ms = 0.5;
+  const double max_sleep_time_ms = 10.0;
+
+  // EMA coefficient (α) - higher value means more weight on recent samples
+  const double alpha = 0.2;
+  double ema_cycle_time = target_cycle_time_ns;
+
+  // Pre-allocate buffer for performance
+  const size_t buffer_size = static_cast<size_t>(target_frame_count) * kAudioSampleSize;
 
-  // Simple moving average for sleep time adjustment
-  constexpr size_t avg_window_size = 5;
-  std::array<double, avg_window_size> cycle_times{};
-  size_t cycle_index = 0;
+  // Error recovery
+  int consecutive_errors = 0;
+  const int max_consecutive_errors = 3;
 
   while (!render_thread_->IsQuitting()) {
     RTC_DCHECK(render_buffer_ != nullptr);
     AudioBufferList* abl = const_cast<AudioBufferList*>(render_buffer_.audioBufferList);
 
-    // Calculate timing
+    // Precise timing calculation
     uint64_t current_time = mach_absolute_time();
     double elapsed_time_ns = (current_time - last_cycle_time) * machTickUnitsToNanoseconds_;
 
-    // Update moving average of cycle times
-    cycle_times[cycle_index] = elapsed_time_ns;
-    cycle_index = (cycle_index + 1) % avg_window_size;
+    // Update EMA of cycle time
+    ema_cycle_time = (alpha * elapsed_time_ns) + ((1.0 - alpha) * ema_cycle_time);
 
-    // Calculate average cycle time
-    double avg_cycle_time = 0;
-    for (double time : cycle_times) {
-      avg_cycle_time += time;
-    }
-    avg_cycle_time /= avg_window_size;
-
-    // Adjust sleep time based on average cycle time
-    if (avg_cycle_time > 0) {  // Only adjust if we have valid timing data
-      double time_diff = target_cycle_time_ns - avg_cycle_time;
-      double adjustment =
-          (time_diff / target_cycle_time_ns) * sleep_time_ms * 0.1;  // Gradual adjustment
-      sleep_time_ms = std::clamp(sleep_time_ms + adjustment, min_sleep_time_ms, max_sleep_time_ms);
-    }
+    // Dynamic sleep time adjustment using PID-like control
+    const double error = target_cycle_time_ns - ema_cycle_time;
+    const double kP = 0.2;  // Proportional gain
+    const double adjustment = (error / target_cycle_time_ns) * kP;
+    sleep_time_ms =
+        std::clamp(sleep_time_ms * (1.0 + adjustment), min_sleep_time_ms, max_sleep_time_ms);
 
-    // Set fixed frame count
-    unsigned int frames_to_render = target_frame_count;
+    // Optimize buffer management
+    const unsigned int frames_to_render = static_cast<unsigned int>(target_frame_count);
+    abl->mBuffers[0].mDataByteSize = buffer_size;
 
-    // Adjust buffer size for the fixed frame count
-    abl->mBuffers[0].mDataByteSize = frames_to_render * kAudioSampleSize;
-
-    // Render audio
+    // Render audio with error handling
     OSStatus err = noErr;
     AVAudioEngineManualRenderingStatus result = render_block_(frames_to_render, abl, &err);
 
     if (result == AVAudioEngineManualRenderingStatusSuccess) {
-      LOGI() << "Render success, frames: " << frames_to_render
-             << " frameLength: " << render_buffer_.frameLength
-             << " sleep_time_ms: " << sleep_time_ms;
-    } else {
-      LOGI() << "Render error: " << err << " frames: " << frames_to_render;
-      // On error, reset sleep time to default
-      sleep_time_ms = 5.0;
-    }
+      consecutive_errors = 0;  // Reset error counter on success
 
-    RTC_DCHECK(abl->mNumberBuffers == 1);
-    const int16_t* rtc_buffer =
-        static_cast<const int16_t*>(static_cast<const void*>(abl->mBuffers[0].mData));
+      RTC_DCHECK(abl->mNumberBuffers == 1);
+      const int16_t* rtc_buffer =
+          static_cast<const int16_t*>(static_cast<const void*>(abl->mBuffers[0].mData));
 
-    last_cycle_time = mach_absolute_time();
+      // Update timing before processing
+      last_cycle_time = mach_absolute_time();
 
-    fine_audio_buffer_->DeliverRecordedData(
-        rtc::ArrayView<const int16_t>(rtc_buffer, frames_to_render), kFixedRecordDelayEstimate,
-        absl::nullopt);
+      // Process audio data
+      fine_audio_buffer_->DeliverRecordedData(
+          rtc::ArrayView<const int16_t>(rtc_buffer, frames_to_render), kFixedRecordDelayEstimate,
+          absl::nullopt);
+    } else {
+      consecutive_errors++;
+      LOGW() << "Render error: " << err << " frames: " << frames_to_render
+             << " consecutive errors: " << consecutive_errors;
+
+      if (consecutive_errors >= max_consecutive_errors) {
+        // Reset timing on persistent errors
+        sleep_time_ms = 5.0;
+        ema_cycle_time = target_cycle_time_ns;
+        consecutive_errors = 0;
+      }
+    }
 
+    // Precise sleep timing
     if (!render_thread_->IsQuitting()) {
-      render_thread_->SleepMs(static_cast<int>(sleep_time_ms));
+      const int sleep_ms = static_cast<int>(std::round(sleep_time_ms));
+      if (sleep_ms > 0) {
+        render_thread_->SleepMs(sleep_ms);
+      }
     }
   }
 }
 
-bool AudioEngineDevice::EngineState::operator==(const EngineState& rhs) const {
-  return input_enabled == rhs.input_enabled && output_enabled == rhs.output_enabled &&
-         input_running == rhs.input_running && output_running == rhs.output_running &&
-         input_muted == rhs.input_muted && is_interrupted == rhs.is_interrupted &&
-         is_manual_mode == rhs.is_manual_mode && voice_processing == rhs.voice_processing &&
-         advanced_ducking == rhs.advanced_ducking && ducking_level == rhs.ducking_level;
-}
-
-bool AudioEngineDevice::EngineState::operator!=(const EngineState& rhs) const {
-  return !(*this == rhs);
-}
-
 // ----------------------------------------------------------------------------------------------------
 // Private - Misc
 
 AVAudioInputNode* AudioEngineDevice::InputNode() {
   RTC_DCHECK_RUN_ON(thread_);
-  RTC_DCHECK(audio_engine_ != nil);
-  RTC_DCHECK(engine_state_.input_enabled);
-  RTC_DCHECK(!engine_state_.is_manual_mode);
+  RTC_DCHECK(engine_device_ != nil);
+  RTC_DCHECK(engine_state_.IsInputEnabled());
+  RTC_DCHECK(engine_state_.render_mode == RenderMode::Device);
 
-  return audio_engine_.inputNode;
+  return engine_device_.inputNode;
 }
 
 AVAudioOutputNode* AudioEngineDevice::OutputNode() {
   RTC_DCHECK_RUN_ON(thread_);
-  RTC_DCHECK(audio_engine_ != nil);
-  RTC_DCHECK(engine_state_.output_enabled || engine_state_.is_manual_mode);
+  RTC_DCHECK(engine_device_ != nil);
+  RTC_DCHECK(engine_state_.IsOutputEnabled() || engine_state_.render_mode == RenderMode::Manual);
 
-  return audio_engine_.outputNode;
+  return engine_device_.outputNode;
 }
 
 // ----------------------------------------------------------------------------------------------------
@@ -1402,8 +1337,8 @@
       AVAudioFormat* format = [node inputFormatForBus:i];
       LOGI() << padded_string(base_depth) << " <- #" << i << audio_format(format);
 
-      AVAudioConnectionPoint* connection = [this->audio_engine_ inputConnectionPointForNode:node
-                                                                                   inputBus:i];
+      AVAudioConnectionPoint* connection = [this->engine_device_ inputConnectionPointForNode:node
+                                                                                    inputBus:i];
       if (connection != nil) {
         LOGI() << padded_string(base_depth + 1) << " <-> "
                << NSStringFromClass([connection.node class]).UTF8String << "."
@@ -1417,7 +1352,7 @@
       LOGI() << padded_string(base_depth) << " -> #" << i << audio_format(format);
 
       for (NSUInteger o = 0; o < node.numberOfOutputs; o++) {
-        NSArray* points = [this->audio_engine_ outputConnectionPointsForNode:node outputBus:o];
+        NSArray* points = [this->engine_device_ outputConnectionPointsForNode:node outputBus:o];
         for (AVAudioConnectionPoint* connection in points) {
           LOGI() << padded_string(base_depth + 1) << " <-> "
                  << NSStringFromClass([connection.node class]).UTF8String << "."
@@ -1427,7 +1362,7 @@
     }
   };
 
-  NSArray<AVAudioNode*>* attachedNodes = [audio_engine_.attachedNodes allObjects];
+  NSArray<AVAudioNode*>* attachedNodes = [engine_device_.attachedNodes allObjects];
   LOGI() << "==================================================";
   LOGI() << "DebugAudioEngine attached nodes: " << attachedNodes.count;
 
diff --git a/modules/audio_device/include/audio_device.h b/modules/audio_device/include/audio_device.h
index b38f5f8d61..868e543186 100644
--- a/modules/audio_device/include/audio_device.h
+++ b/modules/audio_device/include/audio_device.h
@@ -215,13 +215,26 @@ class AudioDeviceObserver {
   virtual void OnSpeechActivityEvent(
       AudioDeviceModule::SpeechActivityEvent event) {}
 
+  // AVAudioEngine lifecycle
+  virtual void OnEngineDidCreate(AVAudioEngine* engine) {}
+
+  virtual void OnEngineWillEnable(AVAudioEngine* engine, bool playout_enabled,
+                                  bool recording_enabled) {}
+
   virtual void OnEngineWillStart(AVAudioEngine* engine, bool playout_enabled,
                                  bool recording_enabled) {}
 
+  virtual void OnEngineDidStop(AVAudioEngine* engine, bool playout_enabled,
+                               bool recording_enabled) {}
+
+  virtual void OnEngineDidDisable(AVAudioEngine* engine, bool playout_enabled,
+                                  bool recording_enabled) {}
+
+  virtual void OnEngineWillRelease(AVAudioEngine* engine) {}
+
   // Override the input node configuration with a custom implementation.
   // Return true if the original implementation is used.
-  virtual bool OnEngineWillConnectInput(AVAudioEngine* engine,
-                                        AVAudioNode* src,
+  virtual bool OnEngineWillConnectInput(AVAudioEngine* engine, AVAudioNode* src,
                                         AVAudioNode* dst,
                                         AVAudioFormat* format) {
     return false;
@@ -230,8 +243,7 @@ class AudioDeviceObserver {
   // Override the input node configuration with a custom implementation.
   // Return true if the original implementation is used.
   virtual bool OnEngineWillConnectOutput(AVAudioEngine* engine,
-                                         AVAudioNode* src,
-                                         AVAudioNode* dst,
+                                         AVAudioNode* src, AVAudioNode* dst,
                                          AVAudioFormat* format) {
     return false;
   }
diff --git a/sdk/objc/api/peerconnection/RTCAudioDeviceModule.h b/sdk/objc/api/peerconnection/RTCAudioDeviceModule.h
index 2409d6ec70..d16fe267b5 100644
--- a/sdk/objc/api/peerconnection/RTCAudioDeviceModule.h
+++ b/sdk/objc/api/peerconnection/RTCAudioDeviceModule.h
@@ -30,7 +30,14 @@ typedef NS_ENUM(NSInteger, RTCSpeechActivityEvent) {
 
 typedef void (^RTCDevicesDidUpdateCallback)();
 typedef void (^RTCSpeechActivityCallback)(RTCSpeechActivityEvent);
+
+typedef void (^RTCOnEngineDidCreate)(AVAudioEngine *);
+typedef void (^RTCOnEngineWillEnable)(AVAudioEngine *, BOOL, BOOL);
 typedef void (^RTCOnEngineWillStart)(AVAudioEngine *, BOOL, BOOL);
+typedef void (^RTCOnEngineDidStop)(AVAudioEngine *, BOOL, BOOL);
+typedef void (^RTCOnEngineDidDisable)(AVAudioEngine *, BOOL, BOOL);
+typedef void (^RTCOnEngineWillRelease)(AVAudioEngine *);
+
 typedef bool (^RTCOnEngineWillConnectInput)(AVAudioEngine *, AVAudioNode *, AVAudioNode *,
                                             AVAudioFormat *);
 typedef bool (^RTCOnEngineWillConnectOutput)(AVAudioEngine *, AVAudioNode *, AVAudioNode *,
@@ -56,7 +63,14 @@ RTC_OBJC_EXPORT
 
 - (BOOL)setDevicesDidUpdateCallback:(nullable RTCDevicesDidUpdateCallback)callback;
 - (BOOL)setSpeechActivityCallback:(nullable RTCSpeechActivityCallback)callback;
+
+- (BOOL)setOnEngineDidCreateCallback:(nullable RTCOnEngineDidCreate)callback;
+- (BOOL)setOnEngineWillEnableCallback:(nullable RTCOnEngineWillEnable)callback;
 - (BOOL)setOnEngineWillStartCallback:(nullable RTCOnEngineWillStart)callback;
+- (BOOL)setOnEngineDidStopCallback:(nullable RTCOnEngineDidStop)callback;
+- (BOOL)setOnEngineDidDisableCallback:(nullable RTCOnEngineDidDisable)callback;
+- (BOOL)setOnEngineWillReleaseCallback:(nullable RTCOnEngineWillRelease)callback;
+
 - (BOOL)setOnEngineWillConnectInputCallback:(nullable RTCOnEngineWillConnectInput)callback;
 - (BOOL)setOnEngineWillConnectOutputCallback:(nullable RTCOnEngineWillConnectOutput)callback;
 
@@ -73,11 +87,10 @@ RTC_OBJC_EXPORT
 @property(nonatomic, readonly, getter=isManualRenderingMode) BOOL manualRenderingMode;
 - (BOOL)setManualRenderingMode:(BOOL)enabled;
 
-// Ducking.
+// Advanced other audio ducking.
 @property(nonatomic, assign, getter=isAdvancedDuckingEnabled) BOOL advancedDuckingEnabled;
-@property(nonatomic, assign)
-    AVAudioVoiceProcessingOtherAudioDuckingLevel duckingLevel API_AVAILABLE(
-        ios(17.0), macos(14.0), visionos(1.0)) API_UNAVAILABLE(tvos);
+
+@property(nonatomic, assign) NSInteger duckingLevel;
 
 @end
 
diff --git a/sdk/objc/api/peerconnection/RTCAudioDeviceModule.mm b/sdk/objc/api/peerconnection/RTCAudioDeviceModule.mm
index fe475b075a..603a288303 100644
--- a/sdk/objc/api/peerconnection/RTCAudioDeviceModule.mm
+++ b/sdk/objc/api/peerconnection/RTCAudioDeviceModule.mm
@@ -44,6 +44,23 @@ void OnSpeechActivityEvent(webrtc::AudioDeviceModule::SpeechActivityEvent event)
     os_unfair_lock_unlock(&lock_);
   }
 
+  void OnEngineDidCreate(AVAudioEngine *engine) override {
+    os_unfair_lock_lock(&lock_);
+    if (on_engine_did_create_) {
+      on_engine_did_create_(engine);
+    }
+    os_unfair_lock_unlock(&lock_);
+  }
+
+  void OnEngineWillEnable(AVAudioEngine *engine, bool playout_enabled,
+                          bool recording_enabled) override {
+    os_unfair_lock_lock(&lock_);
+    if (on_engine_will_enable_) {
+      on_engine_will_enable_(engine, playout_enabled, recording_enabled);
+    }
+    os_unfair_lock_unlock(&lock_);
+  }
+
   void OnEngineWillStart(AVAudioEngine *engine, bool playout_enabled,
                          bool recording_enabled) override {
     os_unfair_lock_lock(&lock_);
@@ -53,6 +70,32 @@ void OnEngineWillStart(AVAudioEngine *engine, bool playout_enabled,
     os_unfair_lock_unlock(&lock_);
   }
 
+  void OnEngineDidStop(AVAudioEngine *engine, bool playout_enabled,
+                       bool recording_enabled) override {
+    os_unfair_lock_lock(&lock_);
+    if (on_engine_did_stop_) {
+      on_engine_did_stop_(engine, playout_enabled, recording_enabled);
+    }
+    os_unfair_lock_unlock(&lock_);
+  }
+
+  void OnEngineDidDisable(AVAudioEngine *engine, bool playout_enabled,
+                          bool recording_enabled) override {
+    os_unfair_lock_lock(&lock_);
+    if (on_engine_did_disable_) {
+      on_engine_did_disable_(engine, playout_enabled, recording_enabled);
+    }
+    os_unfair_lock_unlock(&lock_);
+  }
+
+  void OnEngineWillRelease(AVAudioEngine *engine) override {
+    os_unfair_lock_lock(&lock_);
+    if (on_engine_will_release_) {
+      on_engine_will_release_(engine);
+    }
+    os_unfair_lock_unlock(&lock_);
+  }
+
   bool OnEngineWillConnectInput(AVAudioEngine *engine, AVAudioNode *src, AVAudioNode *dst,
                                 AVAudioFormat *format) override {
     bool result = false;
@@ -75,6 +118,8 @@ bool OnEngineWillConnectOutput(AVAudioEngine *engine, AVAudioNode *src, AVAudioN
     return result;
   }
 
+  //
+
   void SetDevicesUpdatedCallBack(RTCDevicesDidUpdateCallback cb) {
     os_unfair_lock_lock(&lock_);
     on_devices_did_update_callback_ = cb;
@@ -87,12 +132,42 @@ void SetOnSpeechActivityCallBack(RTCSpeechActivityCallback cb) {
     os_unfair_lock_unlock(&lock_);
   }
 
+  void SetOnEngineDidCreateCallback(RTCOnEngineDidCreate cb) {
+    os_unfair_lock_lock(&lock_);
+    on_engine_did_create_ = cb;
+    os_unfair_lock_unlock(&lock_);
+  }
+
+  void SetOnEngineWillEnableCallback(RTCOnEngineWillEnable cb) {
+    os_unfair_lock_lock(&lock_);
+    on_engine_will_enable_ = cb;
+    os_unfair_lock_unlock(&lock_);
+  }
+
   void SetOnEngineWillStartCallback(RTCOnEngineWillStart cb) {
     os_unfair_lock_lock(&lock_);
     on_engine_will_start_ = cb;
     os_unfair_lock_unlock(&lock_);
   }
 
+  void SetOnEngineDidStopCallback(RTCOnEngineDidStop cb) {
+    os_unfair_lock_lock(&lock_);
+    on_engine_did_stop_ = cb;
+    os_unfair_lock_unlock(&lock_);
+  }
+
+  void SetOnEngineDidDisableCallback(RTCOnEngineDidDisable cb) {
+    os_unfair_lock_lock(&lock_);
+    on_engine_did_disable_ = cb;
+    os_unfair_lock_unlock(&lock_);
+  }
+
+  void SetOnEngineWillReleaseCallback(RTCOnEngineWillRelease cb) {
+    os_unfair_lock_lock(&lock_);
+    on_engine_will_release_ = cb;
+    os_unfair_lock_unlock(&lock_);
+  }
+
   void SetOnEngineWillConnectInputCallback(RTCOnEngineWillConnectInput cb) {
     os_unfair_lock_lock(&lock_);
     on_engine_will_connect_input_ = cb;
@@ -108,8 +183,10 @@ void SetOnEngineWillConnectOutputCallback(RTCOnEngineWillConnectOutput cb) {
   bool IsAnyCallbackAttached() {
     os_unfair_lock_lock(&lock_);
     bool result = on_devices_did_update_callback_ != nullptr ||
-                  on_speech_activity_callback_ != nullptr || on_engine_will_start_ != nullptr ||
-                  on_engine_will_connect_input_ != nullptr ||
+                  on_speech_activity_callback_ != nullptr || on_engine_did_create_ != nullptr ||
+                  on_engine_will_enable_ != nullptr || on_engine_will_start_ != nullptr ||
+                  on_engine_did_stop_ != nullptr || on_engine_did_disable_ != nullptr ||
+                  on_engine_will_release_ != nullptr || on_engine_will_connect_input_ != nullptr ||
                   on_engine_will_connect_output_ != nullptr;
     os_unfair_lock_unlock(&lock_);
     return result;
@@ -119,7 +196,14 @@ bool IsAnyCallbackAttached() {
   os_unfair_lock lock_;
   RTCDevicesDidUpdateCallback on_devices_did_update_callback_;
   RTCSpeechActivityCallback on_speech_activity_callback_;
+
+  RTCOnEngineDidCreate on_engine_did_create_;
+  RTCOnEngineWillEnable on_engine_will_enable_;
   RTCOnEngineWillStart on_engine_will_start_;
+  RTCOnEngineDidStop on_engine_did_stop_;
+  RTCOnEngineDidDisable on_engine_did_disable_;
+  RTCOnEngineWillRelease on_engine_will_release_;
+
   RTCOnEngineWillConnectInput on_engine_will_connect_input_;
   RTCOnEngineWillConnectOutput on_engine_will_connect_output_;
 
@@ -316,6 +400,22 @@ - (BOOL)setSpeechActivityCallback:(nullable RTCSpeechActivityCallback)callback {
   return YES;
 }
 
+- (BOOL)setOnEngineDidCreateCallback:(nullable RTCOnEngineDidCreate)callback {
+  _observer->SetOnEngineDidCreateCallback(callback);
+  webrtc::AudioDeviceObserver *observer = _observer->IsAnyCallbackAttached() ? _observer : nullptr;
+  _workerThread->BlockingCall([self, observer] { _native->SetObserver(observer); });
+
+  return YES;
+}
+
+- (BOOL)setOnEngineWillEnableCallback:(nullable RTCOnEngineWillEnable)callback {
+  _observer->SetOnEngineWillEnableCallback(callback);
+  webrtc::AudioDeviceObserver *observer = _observer->IsAnyCallbackAttached() ? _observer : nullptr;
+  _workerThread->BlockingCall([self, observer] { _native->SetObserver(observer); });
+
+  return YES;
+}
+
 - (BOOL)setOnEngineWillStartCallback:(nullable RTCOnEngineWillStart)callback {
   _observer->SetOnEngineWillStartCallback(callback);
   webrtc::AudioDeviceObserver *observer = _observer->IsAnyCallbackAttached() ? _observer : nullptr;
@@ -324,6 +424,30 @@ - (BOOL)setOnEngineWillStartCallback:(nullable RTCOnEngineWillStart)callback {
   return YES;
 }
 
+- (BOOL)setOnEngineDidStopCallback:(nullable RTCOnEngineDidStop)callback {
+  _observer->SetOnEngineDidStopCallback(callback);
+  webrtc::AudioDeviceObserver *observer = _observer->IsAnyCallbackAttached() ? _observer : nullptr;
+  _workerThread->BlockingCall([self, observer] { _native->SetObserver(observer); });
+
+  return YES;
+}
+
+- (BOOL)setOnEngineDidDisableCallback:(nullable RTCOnEngineDidDisable)callback {
+  _observer->SetOnEngineDidDisableCallback(callback);
+  webrtc::AudioDeviceObserver *observer = _observer->IsAnyCallbackAttached() ? _observer : nullptr;
+  _workerThread->BlockingCall([self, observer] { _native->SetObserver(observer); });
+
+  return YES;
+}
+
+- (BOOL)setOnEngineWillReleaseCallback:(nullable RTCOnEngineWillRelease)callback {
+  _observer->SetOnEngineWillReleaseCallback(callback);
+  webrtc::AudioDeviceObserver *observer = _observer->IsAnyCallbackAttached() ? _observer : nullptr;
+  _workerThread->BlockingCall([self, observer] { _native->SetObserver(observer); });
+
+  return YES;
+}
+
 - (BOOL)setOnEngineWillConnectInputCallback:(nullable RTCOnEngineWillConnectInput)callback {
   _observer->SetOnEngineWillConnectInputCallback(callback);
   webrtc::AudioDeviceObserver *observer = _observer->IsAnyCallbackAttached() ? _observer : nullptr;
@@ -378,21 +502,17 @@ - (void)setAdvancedDuckingEnabled:(BOOL)enabled {
       [module, enabled] { return module->SetAdvancedDucking(enabled) == 0; });
 }
 
-- (AVAudioVoiceProcessingOtherAudioDuckingLevel)duckingLevel API_AVAILABLE(ios(17.0), macos(14.0),
-                                                                           visionos(1.0))
-    API_UNAVAILABLE(tvos) {
+- (NSInteger)duckingLevel {
   webrtc::AudioEngineDevice *module = dynamic_cast<webrtc::AudioEngineDevice *>(_native.get());
-  if (module == nullptr) return AVAudioVoiceProcessingOtherAudioDuckingLevelDefault;
+  if (module == nullptr) return 0;
 
   return _workerThread->BlockingCall([module] {
     long value = false;
-    return module->DuckingLevel(&value) == 0 ? (AVAudioVoiceProcessingOtherAudioDuckingLevel)value
-                                             : AVAudioVoiceProcessingOtherAudioDuckingLevelDefault;
+    return module->DuckingLevel(&value) == 0 ? value : 0;
   });
 }
 
-- (void)setDuckingLevel:(AVAudioVoiceProcessingOtherAudioDuckingLevel)value
-    API_AVAILABLE(ios(17.0), macos(14.0), visionos(1.0)) {
+- (void)setDuckingLevel:(NSInteger)value {
   webrtc::AudioEngineDevice *module = dynamic_cast<webrtc::AudioEngineDevice *>(_native.get());
   if (module == nullptr) return;
 

From 2babb14f3e0b60efda31dd5ed88242c4139f7779 Mon Sep 17 00:00:00 2001
From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com>
Date: Fri, 17 Jan 2025 16:18:39 +0900
Subject: [PATCH 14/15] Squashed recent improvements

Pre initialize mode
Pre initialize logic
Persistent
Checks
Fix buffer logic
Patch default input_mute state
Buffer checks
Start buffer on enable
Delay estimate 0
Stop engine on interrupt
Pass should_resume
Silence warning
Correct session config
Fix state
Start logic
Misc
Rem ses
Rem ses2
State helper
Minor patch
Simplify
Change stop create order
Working state
Ref
State helpers
---
 modules/audio_device/audio_device_buffer.cc   |  10 +
 modules/audio_device/audio_device_buffer.h    |   3 +
 modules/audio_device/audio_engine_device.h    |  78 +++++-
 modules/audio_device/audio_engine_device.mm   | 258 ++++++++++++------
 .../api/peerconnection/RTCAudioDeviceModule.h |   7 +
 .../peerconnection/RTCAudioDeviceModule.mm    |  34 +++
 .../audio/RTCAudioSession+Private.h           |  13 +-
 sdk/objc/components/audio/RTCAudioSession.h   |  10 +
 sdk/objc/components/audio/RTCAudioSession.mm  |   1 +
 .../RTCNativeAudioSessionDelegateAdapter.mm   |   2 +-
 sdk/objc/native/src/audio/audio_device_ios.h  |   2 +-
 sdk/objc/native/src/audio/audio_device_ios.mm |   2 +-
 .../native/src/audio/audio_session_observer.h |   2 +-
 13 files changed, 307 insertions(+), 115 deletions(-)

diff --git a/modules/audio_device/audio_device_buffer.cc b/modules/audio_device/audio_device_buffer.cc
index 38ed633429..35cdfc6790 100644
--- a/modules/audio_device/audio_device_buffer.cc
+++ b/modules/audio_device/audio_device_buffer.cc
@@ -193,6 +193,16 @@ void AudioDeviceBuffer::StopRecording() {
   RTC_LOG(LS_INFO) << "total recording time: " << time_since_start;
 }
 
+bool AudioDeviceBuffer::IsPlaying() {
+  RTC_DCHECK_RUN_ON(&main_thread_checker_);
+  return playing_;
+}
+
+bool AudioDeviceBuffer::IsRecording() {
+  RTC_DCHECK_RUN_ON(&main_thread_checker_);
+  return recording_;
+}
+
 int32_t AudioDeviceBuffer::SetRecordingSampleRate(uint32_t fsHz) {
   RTC_LOG(LS_INFO) << "SetRecordingSampleRate(" << fsHz << ")";
   rec_sample_rate_ = fsHz;
diff --git a/modules/audio_device/audio_device_buffer.h b/modules/audio_device/audio_device_buffer.h
index b96696eb48..8c18ec56e9 100644
--- a/modules/audio_device/audio_device_buffer.h
+++ b/modules/audio_device/audio_device_buffer.h
@@ -93,6 +93,9 @@ class AudioDeviceBuffer {
   void StopPlayout();
   void StopRecording();
 
+  bool IsPlaying();
+  bool IsRecording();
+
   int32_t SetRecordingSampleRate(uint32_t fsHz);
   int32_t SetPlayoutSampleRate(uint32_t fsHz);
   uint32_t RecordingSampleRate() const;
diff --git a/modules/audio_device/audio_engine_device.h b/modules/audio_device/audio_engine_device.h
index 48e4a87b64..407023d1a4 100644
--- a/modules/audio_device/audio_engine_device.h
+++ b/modules/audio_device/audio_engine_device.h
@@ -154,7 +154,7 @@ class AudioEngineDevice : public AudioDeviceModule,
 
   // AudioSessionObserver methods. May be called from any thread.
   void OnInterruptionBegin() override;
-  void OnInterruptionEnd() override;
+  void OnInterruptionEnd(bool should_resume) override;
   void OnValidRouteChange() override;
   void OnCanPlayOrRecordChange(bool can_play_or_record) override;
   void OnChangedOutputVolume() override;
@@ -172,6 +172,9 @@ class AudioEngineDevice : public AudioDeviceModule,
   int32_t SetDuckingLevel(long level);
   int32_t DuckingLevel(long* level);
 
+  int32_t SetInitRecordingPersistentMode(bool enable);
+  int32_t InitRecordingPersistentMode(bool* enabled);
+
   int32_t InitAndStartRecording();
 
   enum RenderMode { Device, Manual };
@@ -187,8 +190,9 @@ class AudioEngineDevice : public AudioDeviceModule,
 
     // Output will be enabled when input is enabled
     bool input_follow_mode = true;
+    bool input_enabled_persistent_mode = false;
 
-    bool input_muted = false;
+    bool input_muted = true;
     bool is_interrupted = false;
 
     RenderMode render_mode = RenderMode::Device;
@@ -202,6 +206,8 @@ class AudioEngineDevice : public AudioDeviceModule,
              output_enabled == rhs.output_enabled &&
              output_running == rhs.output_running &&
              input_follow_mode == rhs.input_follow_mode &&
+             input_enabled_persistent_mode ==
+                 rhs.input_enabled_persistent_mode &&
              input_muted == rhs.input_muted &&
              is_interrupted == rhs.is_interrupted &&
              render_mode == rhs.render_mode &&
@@ -217,7 +223,7 @@ class AudioEngineDevice : public AudioDeviceModule,
     }
 
     bool IsOutputEnabled() const {
-      return IsOutputInputLinked() ? input_enabled || output_enabled
+      return IsOutputInputLinked() ? IsInputEnabled() || output_enabled
                                    : output_enabled;
     }
 
@@ -226,15 +232,17 @@ class AudioEngineDevice : public AudioDeviceModule,
                                    : output_running;
     }
 
-    bool IsInputEnabled() const { return input_enabled; }
+    bool IsInputEnabled() const {
+      return input_enabled || input_enabled_persistent_mode;
+    }
     bool IsInputRunning() const { return input_running; }
 
-    bool IsAnyEnabled() const { return input_enabled || output_enabled; }
+    bool IsAnyEnabled() const { return IsInputEnabled() || output_enabled; }
     bool IsAnyRunning() const { return input_running || output_running; }
 
     bool IsAllEnabled() const {
-      return IsOutputInputLinked() ? input_enabled
-                                   : input_enabled && output_enabled;
+      return IsOutputInputLinked() ? IsInputEnabled()
+                                   : IsInputEnabled() && output_enabled;
     }
 
     bool IsAllRunning() const {
@@ -243,6 +251,58 @@ class AudioEngineDevice : public AudioDeviceModule,
     }
   };
 
+  struct EngineStateUpdate {
+    EngineState prev;
+    EngineState next;
+
+    bool HasNoChanges() const { return prev == next; }
+
+    bool DidEnableOutput() const {
+      return !prev.IsOutputEnabled() && next.IsOutputEnabled();
+    }
+
+    bool DidEnableInput() const {
+      return !prev.IsInputEnabled() && next.IsInputEnabled();
+    }
+    bool DidEnableOutputOrInput() const {
+      return DidEnableOutput() || DidEnableInput();
+    }
+
+    bool DidDisableOutput() const {
+      return prev.IsOutputEnabled() && next.IsOutputEnabled();
+    }
+
+    bool DidDisableInput() const {
+      return prev.IsInputEnabled() && next.IsInputEnabled();
+    }
+
+    bool DidAnyEnable() const { return DidEnableOutput() || DidEnableInput(); }
+
+    bool DidAnyDisable() const {
+      return DidDisableOutput() || DidDisableInput();
+    }
+
+    bool DidBeginInterruption() const {
+      return !prev.is_interrupted && next.is_interrupted;
+    }
+
+    bool DidEndInterruption() const {
+      return prev.is_interrupted && next.is_interrupted;
+    }
+
+    bool DidUpdateAudioGraph() const {
+      return (prev.IsInputEnabled() != next.IsInputEnabled()) ||
+             (prev.IsOutputEnabled() != next.IsOutputEnabled());
+    }
+
+    // Special case to re-create engine when switching from Speaker & Mic ->
+    // Speaker only.
+    bool IsEngineRecreateRequired() const {
+      return (prev.IsOutputEnabled() && next.IsOutputEnabled()) &&
+             (prev.IsInputEnabled() && !next.IsInputEnabled());
+    }
+  };
+
   EngineState engine_state_ RTC_GUARDED_BY(thread_);
 
   AVAudioInputNode* InputNode();
@@ -250,7 +310,7 @@ class AudioEngineDevice : public AudioDeviceModule,
 
   bool IsMicrophonePermissionGranted();
   void SetEngineState(std::function<EngineState(EngineState)> state_transform);
-  void UpdateEngineState(EngineState old_state, EngineState new_state);
+  void UpdateEngineState(EngineStateUpdate state);
 
   // AudioEngine observer methods. May be called from any thread.
   void OnEngineConfigurationChange();
@@ -280,9 +340,11 @@ class AudioEngineDevice : public AudioDeviceModule,
 
   AudioDeviceObserver* observer_ RTC_GUARDED_BY(thread_);
 
+#if defined(WEBRTC_IOS)
   // Audio interruption observer instance.
   RTC_OBJC_TYPE(RTCNativeAudioSessionDelegateAdapter) * audio_session_observer_
       RTC_GUARDED_BY(thread_);
+#endif
 
   // Avoids running pending task after `this` is Terminated.
   rtc::scoped_refptr<PendingTaskSafetyFlag> safety_ =
diff --git a/modules/audio_device/audio_engine_device.mm b/modules/audio_device/audio_engine_device.mm
index f48a155721..46996a34e9 100644
--- a/modules/audio_device/audio_engine_device.mm
+++ b/modules/audio_device/audio_engine_device.mm
@@ -47,11 +47,13 @@
 #define LOGE() RTC_LOG(LS_ERROR) << "AudioEngineDevice::"
 #define LOGW() RTC_LOG(LS_WARNING) << "AudioEngineDevice::"
 
-const UInt16 kFixedPlayoutDelayEstimate = 30;
-const UInt16 kFixedRecordDelayEstimate = 30;
+const UInt16 kFixedPlayoutDelayEstimate = 0;
+const UInt16 kFixedRecordDelayEstimate = 0;
+const UInt16 kStartEngineMaxRetries = 10;  // Maximum blocking 1sec.
+const useconds_t kStartEngineRetryDelayMs = 100;
 
-const size_t kMaximumFramesPerBuffer = 3072;  // Maximum slice size for VoiceProcessingIO
-const size_t kAudioSampleSize = 2;            // Signed 16-bit integer
+// const size_t kMaximumFramesPerBuffer = 3072;  // Maximum slice size for VoiceProcessingIO
+const size_t kAudioSampleSize = 2;  // Signed 16-bit integer
 
 AudioEngineDevice::AudioEngineDevice(bool bypass_voice_processing)
     : bypass_voice_processing_(bypass_voice_processing),
@@ -67,7 +69,7 @@
       [[RTC_OBJC_TYPE(RTCNativeAudioSessionDelegateAdapter) alloc] initWithObserver:this];
   // Subscribe to audio session events.
   RTC_OBJC_TYPE(RTCAudioSession)* session = [RTC_OBJC_TYPE(RTCAudioSession) sharedInstance];
-  [session pushDelegate:audio_session_observer_];
+  [session addDelegate:audio_session_observer_];
 #endif
 
   // Add observer for configuration changes
@@ -103,7 +105,12 @@
   safety_->SetNotAlive();
 
   Terminate();
+
+#if defined(WEBRTC_IOS)
+  RTC_OBJC_TYPE(RTCAudioSession)* session = [RTC_OBJC_TYPE(RTCAudioSession) sharedInstance];
+  [session removeDelegate:audio_session_observer_];
   audio_session_observer_ = nil;
+#endif
 }
 
 // MARK: - Main life cycle
@@ -228,7 +235,6 @@
 
   SetEngineState([](EngineState state) -> EngineState {
     state.input_enabled = true;
-    state.input_muted = true;  // Muted by default
     return state;
   });
 
@@ -276,8 +282,8 @@
   }));
 }
 
-void AudioEngineDevice::OnInterruptionEnd() {
-  LOGI() << "OnInterruptionEnd";
+void AudioEngineDevice::OnInterruptionEnd(bool should_resume) {
+  LOGI() << "OnInterruptionEnd should_resume: " << should_resume;
 
   RTC_DCHECK(thread_);
   thread_->PostTask(SafeTask(safety_, [this] {
@@ -716,17 +722,6 @@
   RTC_DCHECK_RUN_ON(thread_);
   LOGI() << "InitAndStartRecording";
 
-  if (engine_state_.input_running) {
-    LOGW() << "InitAndStartRecording: Already recording";
-    return 0;
-  }
-
-  audio_device_buffer_->StartRecording();
-
-  if (fine_audio_buffer_) {
-    fine_audio_buffer_->ResetRecord();
-  }
-
   SetEngineState([](EngineState state) -> EngineState {
     state.input_enabled = true;
     state.input_running = true;
@@ -788,6 +783,32 @@
   return 0;
 }
 
+int32_t AudioEngineDevice::SetInitRecordingPersistentMode(bool enable) {
+  RTC_DCHECK_RUN_ON(thread_);
+  LOGI() << "SetInitRecordingPersistentMode: " << enable;
+
+  SetEngineState([enable](EngineState state) -> EngineState {
+    state.input_enabled_persistent_mode = enable;
+    return state;
+  });
+
+  return 0;
+}
+
+int32_t AudioEngineDevice::InitRecordingPersistentMode(bool* enabled) {
+  LOGI() << "InitRecordingPersistentMode";
+  RTC_DCHECK_RUN_ON(thread_);
+
+  if (enabled == nullptr) {
+    return -1;
+  }
+
+  *enabled = engine_state_.input_enabled_persistent_mode;
+  LOGI() << "InitRecordingPersistentMode value: " << *enabled;
+
+  return 0;
+}
+
 // ----------------------------------------------------------------------------------------------------
 // Private - Engine Related
 
@@ -819,79 +840,104 @@
 
   EngineState old_state = engine_state_;
   EngineState new_state = state_transform(old_state);
+  EngineStateUpdate state = {old_state, new_state};
 
-  if (old_state == new_state) {
-    LOGI() << "SetEngineState: Nothing to update";
+  if (state.HasNoChanges()) {
+    LOGI() << "SetEngineState: Nothing to state";
     return;
   }
 
-  // Checks
+  // Check input should be enabled if running.
   if (new_state.IsInputRunning()) {
     RTC_DCHECK(new_state.IsInputEnabled());
   }
 
+  // Check output should be enabled if running.
   if (new_state.IsOutputRunning()) {
     RTC_DCHECK(new_state.IsOutputEnabled());
   }
 
+  // Apply engine state changes
   engine_state_ = new_state;
-  UpdateEngineState(old_state, new_state);
+  UpdateEngineState(state);
+
+  // Buffer should be playing if output is running.
+  if (new_state.IsOutputEnabled()) {
+    RTC_DCHECK(audio_device_buffer_->IsPlaying());
+  } else {
+    RTC_DCHECK(!audio_device_buffer_->IsPlaying());
+  }
+
+  // Buffer should be recording if input is running.
+  if (new_state.IsInputEnabled()) {
+    RTC_DCHECK(audio_device_buffer_->IsRecording());
+  } else {
+    RTC_DCHECK(!audio_device_buffer_->IsRecording());
+  }
 }
 
-void AudioEngineDevice::UpdateEngineState(EngineState old_state, EngineState new_state) {
+void AudioEngineDevice::UpdateEngineState(EngineStateUpdate state) {
   RTC_DCHECK_RUN_ON(thread_);
 
-  bool is_restart_required = (old_state.IsInputEnabled() != new_state.IsInputEnabled()) ||
-                             (old_state.IsOutputEnabled() != new_state.IsOutputEnabled());
-
-  if (!old_state.IsAnyEnabled() && new_state.IsAnyEnabled()) {
-    LOGI() << "Creating AVAudioEngine...";
-    engine_device_ = [[AVAudioEngine alloc] init];
+  if (state.prev.IsAnyRunning() &&
+      (!state.next.IsAnyRunning() || state.DidUpdateAudioGraph() || state.DidBeginInterruption() ||
+       state.IsEngineRecreateRequired())) {
+    LOGI() << "Stopping AVAudioEngine...";
+    RTC_DCHECK(engine_device_ != nil);
+    [engine_device_ stop];
 
     if (observer_ != nullptr) {
-      observer_->OnEngineDidCreate(engine_device_);
+      observer_->OnEngineDidStop(engine_device_, state.next.IsOutputEnabled(),
+                                 state.next.IsInputEnabled());
     }
   }
 
-  if (old_state.IsAnyRunning() && (!new_state.IsAnyRunning() || is_restart_required)) {
-    LOGI() << "Stopping AVAudioEngine...";
-    [engine_device_ stop];
-  } else if (old_state.IsAnyRunning() && !old_state.is_interrupted && new_state.is_interrupted) {
-    LOGI() << "Pausing AVAudioEngine...";
-    [engine_device_ pause];
+  if (state.IsEngineRecreateRequired()) {
+    LOGI() << "Recreate required, releasing AVAudioEngine...";
+    if (observer_ != nullptr) {
+      observer_->OnEngineWillRelease(engine_device_);
+    }
+    engine_device_ = nil;
   }
 
-  if (old_state.IsAnyRunning() && (!new_state.IsAnyRunning() || is_restart_required ||
-                                   (!old_state.is_interrupted && new_state.is_interrupted))) {
+  if (state.next.IsAnyEnabled() &&
+      (!state.prev.IsAnyEnabled() || state.IsEngineRecreateRequired())) {
+    LOGI() << "Creating AVAudioEngine...";
+    RTC_DCHECK(engine_device_ == nullptr);
+    engine_device_ = [[AVAudioEngine alloc] init];
+
     if (observer_ != nullptr) {
-      observer_->OnEngineDidStop(engine_device_, new_state.IsOutputEnabled(),
-                                 new_state.IsInputEnabled());
+      observer_->OnEngineDidCreate(engine_device_);
     }
   }
 
-  if (old_state.IsAnyRunning() && (!new_state.IsAnyRunning() || is_restart_required ||
-                                   (!old_state.is_interrupted && new_state.is_interrupted))) {
-    if (old_state.IsOutputRunning() && !new_state.IsOutputRunning()) {
-      LOGI() << "Stopping Playout buffer...";
-      audio_device_buffer_->StopPlayout();
-    }
-    if (old_state.IsInputRunning() && !new_state.IsInputRunning()) {
-      LOGI() << "Stopping Record buffer...";
-      audio_device_buffer_->StopRecording();
+  if (!state.next.IsOutputEnabled() && audio_device_buffer_->IsPlaying()) {
+    LOGI() << "Stopping Playout buffer...";
+    if (engine_device_ != nullptr) {
+      // Rendering must be stopped first.
+      RTC_DCHECK(!engine_device_.running);
     }
+    audio_device_buffer_->StopPlayout();
   }
 
-  if ((!old_state.IsOutputEnabled() && new_state.IsOutputEnabled()) ||
-      (!old_state.IsInputEnabled() && new_state.IsInputEnabled())) {
-    if (observer_ != nullptr) {
-      // Invoke here before configuring nodes. In iOS, session configuration is required before
-      // enabling AGC, muted talker etc.
-      observer_->OnEngineWillEnable(engine_device_, new_state.IsOutputEnabled(),
-                                    new_state.IsInputEnabled());
+  if (!state.next.IsInputEnabled() && audio_device_buffer_->IsRecording()) {
+    LOGI() << "Stopping Record buffer...";
+    if (engine_device_ != nullptr) {
+      // Rendering must be stopped first.
+      RTC_DCHECK(!engine_device_.running);
     }
+    audio_device_buffer_->StopRecording();
   }
 
-  if (!old_state.IsOutputEnabled() && new_state.IsOutputEnabled()) {
+  if (state.DidAnyEnable() && observer_ != nullptr) {
+    // Invoke here before configuring nodes. In iOS, session configuration is required before
+    // enabling AGC, muted talker etc.
+    observer_->OnEngineWillEnable(engine_device_, state.next.IsOutputEnabled(),
+                                  state.next.IsInputEnabled());
+  }
+
+  if (state.next.IsOutputEnabled() &&
+      (!state.prev.IsOutputEnabled() || state.IsEngineRecreateRequired())) {
     LOGI() << "Enabling output for AVAudioEngine...";
     RTC_DCHECK(!engine_device_.running);
 
@@ -948,7 +994,8 @@
                        format:engine_output_format];
     }
 
-  } else if (old_state.IsOutputEnabled() && !new_state.IsOutputEnabled()) {
+  } else if ((state.prev.IsOutputEnabled() && !state.next.IsOutputEnabled()) &&
+             !state.IsEngineRecreateRequired()) {
     LOGI() << "Disabling output for AVAudioEngine...";
     RTC_DCHECK(!engine_device_.running);
 
@@ -961,7 +1008,8 @@
     }
   }
 
-  if (!old_state.IsInputEnabled() && new_state.IsInputEnabled()) {
+  if (state.next.IsInputEnabled() &&
+      (!state.prev.IsInputEnabled() || state.IsEngineRecreateRequired())) {
     LOGI() << "Enabling input for AVAudioEngine...";
     RTC_DCHECK(!engine_device_.running);
 
@@ -969,7 +1017,7 @@
       NSError* error = nil;
       BOOL set_vp_result = [this->InputNode() setVoiceProcessingEnabled:YES error:&error];
       if (!set_vp_result) {
-        NSLog(@"setVoiceProcessingEnabled error: %@", error.localizedDescription);
+        NSLog(@"AudioEngineDevice setVoiceProcessingEnabled error: %@", error.localizedDescription);
         RTC_DCHECK(set_vp_result);
       }
       LOGI() << "setVoiceProcessingEnabled (input) result: " << set_vp_result ? "YES" : "NO";
@@ -1067,7 +1115,8 @@
     // Convert to RTC's internal format before passing buffers to SinkNode.
     [engine_device_ connect:input_mixer_node_ to:sink_node_ format:rtc_input_format];
 
-  } else if (old_state.IsInputEnabled() && !new_state.IsInputEnabled()) {
+  } else if ((state.prev.IsInputEnabled() && !state.next.IsInputEnabled()) &&
+             !state.IsEngineRecreateRequired()) {
     LOGI() << "Disabling input for AVAudioEngine...";
     RTC_DCHECK(!engine_device_.running);
 
@@ -1088,32 +1137,29 @@
     }
   }
 
-  if ((old_state.IsOutputEnabled() && !new_state.IsOutputEnabled()) ||
-      (old_state.IsInputEnabled() && !new_state.IsInputEnabled())) {
-    if (observer_ != nullptr) {
-      observer_->OnEngineDidDisable(engine_device_, new_state.IsOutputEnabled(),
-                                    new_state.IsInputEnabled());
-    }
+  if (state.DidAnyDisable() && observer_ != nullptr) {
+    observer_->OnEngineDidDisable(engine_device_, state.next.IsOutputEnabled(),
+                                  state.next.IsInputEnabled());
   }
 
-  if (new_state.IsInputEnabled()) {
-    if (this->InputNode().voiceProcessingEnabled) {
-      // Re-apply muted state.
-      this->InputNode().voiceProcessingInputMuted = new_state.input_muted;
-    }
+  if (state.next.IsInputEnabled() && this->InputNode().voiceProcessingEnabled &&
+      this->InputNode().voiceProcessingInputMuted != state.next.input_muted) {
+    LOGI() << "setVoiceProcessingInputMuted: " << state.next.input_muted;
+    this->InputNode().voiceProcessingInputMuted = state.next.input_muted;
   }
 
 #if !TARGET_OS_TV
-  if (new_state.IsInputEnabled() && this->InputNode().voiceProcessingEnabled &&
-      (!old_state.IsInputEnabled() || (old_state.advanced_ducking != new_state.advanced_ducking ||
-                                       old_state.ducking_level != new_state.ducking_level))) {
+  if (state.next.IsInputEnabled() && this->InputNode().voiceProcessingEnabled &&
+      (!state.prev.IsInputEnabled() ||
+       (state.prev.advanced_ducking != state.next.advanced_ducking ||
+        state.prev.ducking_level != state.next.ducking_level))) {
     // Other audio ducking.
     // iOS 17.0+, iPadOS 17.0+, Mac Catalyst 17.0+, macOS 14.0+, visionOS 1.0+
     if (@available(iOS 17.0, macCatalyst 17.0, macOS 14.0, visionOS 1.0, *)) {
       AVAudioVoiceProcessingOtherAudioDuckingConfiguration ducking_config;
-      ducking_config.enableAdvancedDucking = new_state.advanced_ducking;
+      ducking_config.enableAdvancedDucking = state.next.advanced_ducking;
       ducking_config.duckingLevel =
-          (AVAudioVoiceProcessingOtherAudioDuckingLevel)new_state.ducking_level;
+          (AVAudioVoiceProcessingOtherAudioDuckingLevel)state.next.ducking_level;
 
       LOGI() << "setVoiceProcessingOtherAudioDuckingConfiguration";
       this->InputNode().voiceProcessingOtherAudioDuckingConfiguration = ducking_config;
@@ -1121,40 +1167,65 @@
   }
 #endif
 
-  if ((!old_state.IsOutputRunning() && new_state.IsOutputRunning() &&
-       !new_state.IsInputRunning()) ||
-      (!old_state.IsOutputEnabled() && new_state.IsOutputEnabled() && new_state.IsInputRunning())) {
+  // Start playout buffer if output is running
+  if (state.next.IsOutputEnabled() && !audio_device_buffer_->IsPlaying()) {
+    if (engine_device_ != nullptr) {
+      // Rendering must be stopped first.
+      RTC_DCHECK(!engine_device_.running);
+    }
     LOGI() << "Starting Playout buffer...";
     audio_device_buffer_->StartPlayout();
     fine_audio_buffer_->ResetPlayout();
   }
 
-  if ((!old_state.IsInputRunning() && new_state.IsInputRunning() && !new_state.IsOutputRunning()) ||
-      (!old_state.IsInputEnabled() && new_state.IsInputEnabled() && new_state.IsOutputRunning())) {
+  // Start recording buffer if input is running
+  if (state.next.IsInputEnabled() && !audio_device_buffer_->IsRecording()) {
+    if (engine_device_ != nullptr) {
+      // Rendering must be stopped first.
+      RTC_DCHECK(!engine_device_.running);
+    }
     LOGI() << "Starting Record buffer...";
     audio_device_buffer_->StartRecording();
     fine_audio_buffer_->ResetRecord();
   }
 
-  if (new_state.IsAnyRunning()) {
-    if (!old_state.IsAnyRunning() || (old_state.is_interrupted && !new_state.is_interrupted) ||
-        is_restart_required) {
+  if (state.next.IsAnyRunning()) {
+    if (!state.prev.IsAnyRunning() || state.DidEndInterruption() || state.DidUpdateAudioGraph() ||
+        state.IsEngineRecreateRequired()) {
       if (observer_ != nullptr) {
-        observer_->OnEngineWillStart(engine_device_, new_state.IsOutputEnabled(),
-                                     new_state.IsInputEnabled());
+        observer_->OnEngineWillStart(engine_device_, state.next.IsOutputEnabled(),
+                                     state.next.IsInputEnabled());
       }
 
       LOGI() << "Starting AVAudioEngine...";
       NSError* error = nil;
-      BOOL start_result = [engine_device_ startAndReturnError:&error];
+      BOOL start_result = false;
+      int start_retry_count = 0;
+
+      // Workaround for error -66637, when recovering from interruptions with categoryMode:
+      // .mixWithOthers.
+      while (!start_result && start_retry_count < kStartEngineMaxRetries) {
+        if (start_retry_count > 0) {
+          LOGW() << "Retrying engine start (attempt " << start_retry_count + 1 << "/"
+                 << kStartEngineMaxRetries << ")";
+          usleep(kStartEngineRetryDelayMs * 1000);
+        }
+
+        start_result = [engine_device_ startAndReturnError:&error];
+        if (!start_result) {
+          LOGE() << "Failed to start engine: " << error.localizedDescription.UTF8String;
+          start_retry_count++;
+        }
+      }
+
       if (!start_result) {
-        LOGE() << "Failed to start engine: " << error.localizedDescription.UTF8String;
+        LOGE() << "Failed to start engine after " << kStartEngineMaxRetries << " attempts";
         DebugAudioEngine();
       }
     }
   }
 
-  if (old_state.IsAnyEnabled() && !new_state.IsAnyEnabled()) {
+  if (state.prev.IsAnyEnabled() && !state.next.IsAnyEnabled()) {
     if (observer_ != nullptr) {
       observer_->OnEngineWillRelease(engine_device_);
     }
@@ -1281,6 +1352,11 @@
 void AudioEngineDevice::DebugAudioEngine() {
   RTC_DCHECK_RUN_ON(thread_);
 
+#if TARGET_OS_IOS
+  RTC_OBJC_TYPE(RTCAudioSession)* session = [RTC_OBJC_TYPE(RTCAudioSession) sharedInstance];
+  RTCLog(@"RTCAudioSession %@", session);
+#endif
+
   auto padded_string = [](int pad) { return std::string(pad * 2, ' '); };
 
   auto audio_format = [](AVAudioFormat* format) {
diff --git a/sdk/objc/api/peerconnection/RTCAudioDeviceModule.h b/sdk/objc/api/peerconnection/RTCAudioDeviceModule.h
index d16fe267b5..58145b36c5 100644
--- a/sdk/objc/api/peerconnection/RTCAudioDeviceModule.h
+++ b/sdk/objc/api/peerconnection/RTCAudioDeviceModule.h
@@ -83,6 +83,13 @@ RTC_OBJC_EXPORT
 
 - (BOOL)initAndStartRecording;
 
+@property(nonatomic, readonly) BOOL isPlayoutInitialized;
+@property(nonatomic, readonly) BOOL isRecordingInitialized;
+@property(nonatomic, readonly) BOOL isPlaying;
+@property(nonatomic, readonly) BOOL isRecording;
+
+@property(nonatomic, getter=isInitRecordingPersistentMode) BOOL initRecordingPersistentMode;
+
 // Manual rendering.
 @property(nonatomic, readonly, getter=isManualRenderingMode) BOOL manualRenderingMode;
 - (BOOL)setManualRenderingMode:(BOOL)enabled;
diff --git a/sdk/objc/api/peerconnection/RTCAudioDeviceModule.mm b/sdk/objc/api/peerconnection/RTCAudioDeviceModule.mm
index 603a288303..0050ed6535 100644
--- a/sdk/objc/api/peerconnection/RTCAudioDeviceModule.mm
+++ b/sdk/objc/api/peerconnection/RTCAudioDeviceModule.mm
@@ -464,8 +464,42 @@ - (BOOL)setOnEngineWillConnectOutputCallback:(nullable RTCOnEngineWillConnectOut
   return YES;
 }
 
+- (BOOL)isPlayoutInitialized {
+  return _workerThread->BlockingCall([self] { return _native->PlayoutIsInitialized(); });
+}
+
+- (BOOL)isRecordingInitialized {
+  return _workerThread->BlockingCall([self] { return _native->RecordingIsInitialized(); });
+}
+
+- (BOOL)isPlaying {
+  return _workerThread->BlockingCall([self] { return _native->Playing(); });
+}
+
+- (BOOL)isRecording {
+  return _workerThread->BlockingCall([self] { return _native->Recording(); });
+}
+
 #pragma mark - Unique to AudioEngineDevice
 
+- (BOOL)isInitRecordingPersistentMode {
+  webrtc::AudioEngineDevice *module = dynamic_cast<webrtc::AudioEngineDevice *>(_native.get());
+  if (module == nullptr) return NO;
+
+  return _workerThread->BlockingCall([module] {
+    bool value = false;
+    return module->InitRecordingPersistentMode(&value) == 0 ? value : NO;
+  });
+}
+
+- (void)setInitRecordingPersistentMode:(BOOL)enabled {
+  webrtc::AudioEngineDevice *module = dynamic_cast<webrtc::AudioEngineDevice *>(_native.get());
+  if (module == nullptr) return;
+
+  _workerThread->BlockingCall(
+      [module, enabled] { return module->SetInitRecordingPersistentMode(enabled); });
+}
+
 - (BOOL)isManualRenderingMode {
   webrtc::AudioEngineDevice *module = dynamic_cast<webrtc::AudioEngineDevice *>(_native.get());
   if (module == nullptr) return NO;
diff --git a/sdk/objc/components/audio/RTCAudioSession+Private.h b/sdk/objc/components/audio/RTCAudioSession+Private.h
index 2be1b9fb3d..199c7b34bc 100644
--- a/sdk/objc/components/audio/RTCAudioSession+Private.h
+++ b/sdk/objc/components/audio/RTCAudioSession+Private.h
@@ -14,18 +14,7 @@ NS_ASSUME_NONNULL_BEGIN
 
 @class RTC_OBJC_TYPE(RTCAudioSessionConfiguration);
 
-@interface RTC_OBJC_TYPE (RTCAudioSession)
-()
-
-    /** Number of times setActive:YES has succeeded without a balanced call to
-     *  setActive:NO.
-     */
-    @property(nonatomic, readonly) int activationCount;
-
-/** The number of times `beginWebRTCSession` was called without a balanced call
- *  to `endWebRTCSession`.
- */
-@property(nonatomic, readonly) int webRTCSessionCount;
+@interface RTC_OBJC_TYPE (RTCAudioSession) ()
 
 /** Convenience BOOL that checks useManualAudio and isAudioEnebled. */
 @property(readonly) BOOL canPlayOrRecord;
diff --git a/sdk/objc/components/audio/RTCAudioSession.h b/sdk/objc/components/audio/RTCAudioSession.h
index 2730664858..6fef04cb29 100644
--- a/sdk/objc/components/audio/RTCAudioSession.h
+++ b/sdk/objc/components/audio/RTCAudioSession.h
@@ -196,6 +196,16 @@ RTC_OBJC_EXPORT
  */
 @property(nonatomic) BOOL ignoresPreferredAttributeConfigurationErrors;
 
+/** Number of times setActive:YES has succeeded without a balanced call to
+ *  setActive:NO.
+ */
+@property(nonatomic, readonly) int activationCount;
+
+/** The number of times `beginWebRTCSession` was called without a balanced call
+ *  to `endWebRTCSession`.
+ */
+@property(nonatomic, readonly) int webRTCSessionCount;
+
 /** Default constructor. */
 + (instancetype)sharedInstance;
 - (instancetype)init NS_UNAVAILABLE;
diff --git a/sdk/objc/components/audio/RTCAudioSession.mm b/sdk/objc/components/audio/RTCAudioSession.mm
index 11d1a1c337..17c8253546 100644
--- a/sdk/objc/components/audio/RTCAudioSession.mm
+++ b/sdk/objc/components/audio/RTCAudioSession.mm
@@ -808,6 +808,7 @@ - (void)updateAudioSessionAfterEvent {
                   withOptions:options
                         error:&error]) {
     self.isActive = shouldActivate;
+     RTCLogError(@"Did set session active to %d", shouldActivate);
   } else {
     RTCLogError(@"Failed to set session active to %d. Error:%@",
                 shouldActivate, error.localizedDescription);
diff --git a/sdk/objc/components/audio/RTCNativeAudioSessionDelegateAdapter.mm b/sdk/objc/components/audio/RTCNativeAudioSessionDelegateAdapter.mm
index f652ad1e5f..e049b48970 100644
--- a/sdk/objc/components/audio/RTCNativeAudioSessionDelegateAdapter.mm
+++ b/sdk/objc/components/audio/RTCNativeAudioSessionDelegateAdapter.mm
@@ -34,7 +34,7 @@ - (void)audioSessionDidBeginInterruption:(RTC_OBJC_TYPE(RTCAudioSession) *)sessi
 
 - (void)audioSessionDidEndInterruption:(RTC_OBJC_TYPE(RTCAudioSession) *)session
                    shouldResumeSession:(BOOL)shouldResumeSession {
-  _observer->OnInterruptionEnd();
+  _observer->OnInterruptionEnd(shouldResumeSession);
 }
 
 - (void)audioSessionDidChangeRoute:(RTC_OBJC_TYPE(RTCAudioSession) *)session
diff --git a/sdk/objc/native/src/audio/audio_device_ios.h b/sdk/objc/native/src/audio/audio_device_ios.h
index 506487a1c2..83c6dc9e4c 100644
--- a/sdk/objc/native/src/audio/audio_device_ios.h
+++ b/sdk/objc/native/src/audio/audio_device_ios.h
@@ -143,7 +143,7 @@ class AudioDeviceIOS : public AudioDeviceGeneric,
 
   // AudioSessionObserver methods. May be called from any thread.
   void OnInterruptionBegin() override;
-  void OnInterruptionEnd() override;
+  void OnInterruptionEnd(bool should_resume) override;
   void OnValidRouteChange() override;
   void OnCanPlayOrRecordChange(bool can_play_or_record) override;
   void OnChangedOutputVolume() override;
diff --git a/sdk/objc/native/src/audio/audio_device_ios.mm b/sdk/objc/native/src/audio/audio_device_ios.mm
index 660edf7439..c9ddf56927 100644
--- a/sdk/objc/native/src/audio/audio_device_ios.mm
+++ b/sdk/objc/native/src/audio/audio_device_ios.mm
@@ -359,7 +359,7 @@ static void LogDeviceInfo() {
   thread_->PostTask(SafeTask(safety_, [this] { HandleInterruptionBegin(); }));
 }
 
-void AudioDeviceIOS::OnInterruptionEnd() {
+void AudioDeviceIOS::OnInterruptionEnd(bool should_resume) {
   RTC_DCHECK(thread_);
   LOGI() << "OnInterruptionEnd";
   thread_->PostTask(SafeTask(safety_, [this] { HandleInterruptionEnd(); }));
diff --git a/sdk/objc/native/src/audio/audio_session_observer.h b/sdk/objc/native/src/audio/audio_session_observer.h
index f7c44c8184..aeddbf9bcc 100644
--- a/sdk/objc/native/src/audio/audio_session_observer.h
+++ b/sdk/objc/native/src/audio/audio_session_observer.h
@@ -22,7 +22,7 @@ class AudioSessionObserver {
   virtual void OnInterruptionBegin() = 0;
 
   // Called when audio session interruption ends.
-  virtual void OnInterruptionEnd() = 0;
+  virtual void OnInterruptionEnd(bool should_resume) = 0;
 
   // Called when audio route changes.
   virtual void OnValidRouteChange() = 0;

From be003d591def802a6b061d5dac0fc964699839dc Mon Sep 17 00:00:00 2001
From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com>
Date: Wed, 22 Jan 2025 12:03:18 +0900
Subject: [PATCH 15/15] RTCAudioDeviceModuleDelegate

---
 .../api/peerconnection/RTCAudioDeviceModule.h |  86 ++++--
 .../peerconnection/RTCAudioDeviceModule.mm    | 270 +++---------------
 2 files changed, 106 insertions(+), 250 deletions(-)

diff --git a/sdk/objc/api/peerconnection/RTCAudioDeviceModule.h b/sdk/objc/api/peerconnection/RTCAudioDeviceModule.h
index 58145b36c5..2b6522341d 100644
--- a/sdk/objc/api/peerconnection/RTCAudioDeviceModule.h
+++ b/sdk/objc/api/peerconnection/RTCAudioDeviceModule.h
@@ -28,20 +28,65 @@ typedef NS_ENUM(NSInteger, RTCSpeechActivityEvent) {
   RTCSpeechActivityEventEnded,
 };
 
-typedef void (^RTCDevicesDidUpdateCallback)();
-typedef void (^RTCSpeechActivityCallback)(RTCSpeechActivityEvent);
+@class RTC_OBJC_TYPE(RTCAudioDeviceModule);
+
+RTC_OBJC_EXPORT @protocol RTC_OBJC_TYPE(RTCAudioDeviceModuleDelegate)<NSObject>
+
+- (void)audioDeviceModule:(RTC_OBJC_TYPE(RTCAudioDeviceModule) *)audioDeviceModule
+    didReceiveSpeechActivityEvent:(RTCSpeechActivityEvent)speechActivityEvent
+    NS_SWIFT_NAME(audioDeviceModule(_:didReceiveSpeechActivityEvent:));
+
+// Engine events
+- (void)audioDeviceModule:(RTC_OBJC_TYPE(RTCAudioDeviceModule) *)audioDeviceModule
+          didCreateEngine:(AVAudioEngine *)engine
+    NS_SWIFT_NAME(audioDeviceModule(_:didCreateEngine:));
+
+- (void)audioDeviceModule:(RTC_OBJC_TYPE(RTCAudioDeviceModule) *)audioDeviceModule
+         willEnableEngine:(AVAudioEngine *)engine
+         isPlayoutEnabled:(BOOL)isPlayoutEnabled
+       isRecordingEnabled:(BOOL)isRecordingEnabled
+    NS_SWIFT_NAME(audioDeviceModule(_:willEnableEngine:isPlayoutEnabled:isRecordingEnabled:));
+
+- (void)audioDeviceModule:(RTC_OBJC_TYPE(RTCAudioDeviceModule) *)audioDeviceModule
+          willStartEngine:(AVAudioEngine *)engine
+         isPlayoutEnabled:(BOOL)isPlayoutEnabled
+       isRecordingEnabled:(BOOL)isRecordingEnabled
+    NS_SWIFT_NAME(audioDeviceModule(_:willStartEngine:isPlayoutEnabled:isRecordingEnabled:));
+
+- (void)audioDeviceModule:(RTC_OBJC_TYPE(RTCAudioDeviceModule) *)audioDeviceModule
+            didStopEngine:(AVAudioEngine *)engine
+         isPlayoutEnabled:(BOOL)isPlayoutEnabled
+       isRecordingEnabled:(BOOL)isRecordingEnabled
+    NS_SWIFT_NAME(audioDeviceModule(_:didStopEngine:isPlayoutEnabled:isRecordingEnabled:));
+
+- (void)audioDeviceModule:(RTC_OBJC_TYPE(RTCAudioDeviceModule) *)audioDeviceModule
+         didDisableEngine:(AVAudioEngine *)engine
+         isPlayoutEnabled:(BOOL)isPlayoutEnabled
+       isRecordingEnabled:(BOOL)isRecordingEnabled
+    NS_SWIFT_NAME(audioDeviceModule(_:didDisableEngine:isPlayoutEnabled:isRecordingEnabled:));
+
+- (void)audioDeviceModule:(RTC_OBJC_TYPE(RTCAudioDeviceModule) *)audioDeviceModule
+        willReleaseEngine:(AVAudioEngine *)engine
+    NS_SWIFT_NAME(audioDeviceModule(_:willReleaseEngine:));
+
+- (BOOL)audioDeviceModule:(RTC_OBJC_TYPE(RTCAudioDeviceModule) *)audioDeviceModule
+                      engine:(AVAudioEngine *)engine
+    configureInputFromSource:(AVAudioNode *)source
+               toDestination:(AVAudioNode *)destination
+                  withFormat:(AVAudioFormat *)format
+    NS_SWIFT_NAME(audioDeviceModule(_:engine:configureInputFromSource:toDestination:format:));
+
+- (BOOL)audioDeviceModule:(RTC_OBJC_TYPE(RTCAudioDeviceModule) *)audioDeviceModule
+                       engine:(AVAudioEngine *)engine
+    configureOutputFromSource:(AVAudioNode *)source
+                toDestination:(AVAudioNode *)destination
+                   withFormat:(AVAudioFormat *)format
+    NS_SWIFT_NAME(audioDeviceModule(_:engine:configureOutputFromSource:toDestination:format:));
+
+- (void)audioDeviceModuleDidUpdateDevices:(RTC_OBJC_TYPE(RTCAudioDeviceModule) *)audioDeviceModule
+    NS_SWIFT_NAME(audioDeviceModuleDidUpdateDevices(_:));
 
-typedef void (^RTCOnEngineDidCreate)(AVAudioEngine *);
-typedef void (^RTCOnEngineWillEnable)(AVAudioEngine *, BOOL, BOOL);
-typedef void (^RTCOnEngineWillStart)(AVAudioEngine *, BOOL, BOOL);
-typedef void (^RTCOnEngineDidStop)(AVAudioEngine *, BOOL, BOOL);
-typedef void (^RTCOnEngineDidDisable)(AVAudioEngine *, BOOL, BOOL);
-typedef void (^RTCOnEngineWillRelease)(AVAudioEngine *);
-
-typedef bool (^RTCOnEngineWillConnectInput)(AVAudioEngine *, AVAudioNode *, AVAudioNode *,
-                                            AVAudioFormat *);
-typedef bool (^RTCOnEngineWillConnectOutput)(AVAudioEngine *, AVAudioNode *, AVAudioNode *,
-                                             AVAudioFormat *);
+@end
 
 RTC_OBJC_EXPORT
 @interface RTC_OBJC_TYPE (RTCAudioDeviceModule) : NSObject
@@ -61,19 +106,6 @@ RTC_OBJC_EXPORT
 - (BOOL)trySetOutputDevice:(nullable RTC_OBJC_TYPE(RTCIODevice) *)device;
 - (BOOL)trySetInputDevice:(nullable RTC_OBJC_TYPE(RTCIODevice) *)device;
 
-- (BOOL)setDevicesDidUpdateCallback:(nullable RTCDevicesDidUpdateCallback)callback;
-- (BOOL)setSpeechActivityCallback:(nullable RTCSpeechActivityCallback)callback;
-
-- (BOOL)setOnEngineDidCreateCallback:(nullable RTCOnEngineDidCreate)callback;
-- (BOOL)setOnEngineWillEnableCallback:(nullable RTCOnEngineWillEnable)callback;
-- (BOOL)setOnEngineWillStartCallback:(nullable RTCOnEngineWillStart)callback;
-- (BOOL)setOnEngineDidStopCallback:(nullable RTCOnEngineDidStop)callback;
-- (BOOL)setOnEngineDidDisableCallback:(nullable RTCOnEngineDidDisable)callback;
-- (BOOL)setOnEngineWillReleaseCallback:(nullable RTCOnEngineWillRelease)callback;
-
-- (BOOL)setOnEngineWillConnectInputCallback:(nullable RTCOnEngineWillConnectInput)callback;
-- (BOOL)setOnEngineWillConnectOutputCallback:(nullable RTCOnEngineWillConnectOutput)callback;
-
 - (BOOL)startPlayout;
 - (BOOL)stopPlayout;
 - (BOOL)initPlayout;
@@ -90,6 +122,8 @@ RTC_OBJC_EXPORT
 
 @property(nonatomic, getter=isInitRecordingPersistentMode) BOOL initRecordingPersistentMode;
 
+@property(nonatomic, weak, nullable) id<RTC_OBJC_TYPE(RTCAudioDeviceModuleDelegate)> observer;
+
 // Manual rendering.
 @property(nonatomic, readonly, getter=isManualRenderingMode) BOOL manualRenderingMode;
 - (BOOL)setManualRenderingMode:(BOOL)enabled;
diff --git a/sdk/objc/api/peerconnection/RTCAudioDeviceModule.mm b/sdk/objc/api/peerconnection/RTCAudioDeviceModule.mm
index 0050ed6535..4fe5649515 100644
--- a/sdk/objc/api/peerconnection/RTCAudioDeviceModule.mm
+++ b/sdk/objc/api/peerconnection/RTCAudioDeviceModule.mm
@@ -26,186 +26,77 @@
 
 class AudioDeviceObserver : public webrtc::AudioDeviceObserver {
  public:
-  AudioDeviceObserver() : lock_(OS_UNFAIR_LOCK_INIT) {}
+  AudioDeviceObserver(RTC_OBJC_TYPE(RTCAudioDeviceModule) * adm) { adm_ = adm; }
 
-  void OnDevicesUpdated() override {
-    os_unfair_lock_lock(&lock_);
-    if (on_devices_did_update_callback_) {
-      on_devices_did_update_callback_();
-    }
-    os_unfair_lock_unlock(&lock_);
-  }
+  void OnDevicesUpdated() override { [delegate_ audioDeviceModuleDidUpdateDevices:adm_]; }
 
   void OnSpeechActivityEvent(webrtc::AudioDeviceModule::SpeechActivityEvent event) override {
-    os_unfair_lock_lock(&lock_);
-    if (on_speech_activity_callback_) {
-      on_speech_activity_callback_(ConvertSpeechActivityEvent(event));
-    }
-    os_unfair_lock_unlock(&lock_);
+    [delegate_ audioDeviceModule:adm_
+        didReceiveSpeechActivityEvent:ConvertSpeechActivityEvent(event)];
   }
 
   void OnEngineDidCreate(AVAudioEngine *engine) override {
-    os_unfair_lock_lock(&lock_);
-    if (on_engine_did_create_) {
-      on_engine_did_create_(engine);
-    }
-    os_unfair_lock_unlock(&lock_);
+    [delegate_ audioDeviceModule:adm_ didCreateEngine:engine];
   }
 
   void OnEngineWillEnable(AVAudioEngine *engine, bool playout_enabled,
                           bool recording_enabled) override {
-    os_unfair_lock_lock(&lock_);
-    if (on_engine_will_enable_) {
-      on_engine_will_enable_(engine, playout_enabled, recording_enabled);
-    }
-    os_unfair_lock_unlock(&lock_);
+    [delegate_ audioDeviceModule:adm_
+                willEnableEngine:engine
+                isPlayoutEnabled:playout_enabled
+              isRecordingEnabled:recording_enabled];
   }
 
   void OnEngineWillStart(AVAudioEngine *engine, bool playout_enabled,
                          bool recording_enabled) override {
-    os_unfair_lock_lock(&lock_);
-    if (on_engine_will_start_) {
-      on_engine_will_start_(engine, playout_enabled, recording_enabled);
-    }
-    os_unfair_lock_unlock(&lock_);
+    [delegate_ audioDeviceModule:adm_
+                 willStartEngine:engine
+                isPlayoutEnabled:playout_enabled
+              isRecordingEnabled:recording_enabled];
   }
 
   void OnEngineDidStop(AVAudioEngine *engine, bool playout_enabled,
                        bool recording_enabled) override {
-    os_unfair_lock_lock(&lock_);
-    if (on_engine_did_stop_) {
-      on_engine_did_stop_(engine, playout_enabled, recording_enabled);
-    }
-    os_unfair_lock_unlock(&lock_);
+    [delegate_ audioDeviceModule:adm_
+                   didStopEngine:engine
+                isPlayoutEnabled:playout_enabled
+              isRecordingEnabled:recording_enabled];
   }
 
   void OnEngineDidDisable(AVAudioEngine *engine, bool playout_enabled,
                           bool recording_enabled) override {
-    os_unfair_lock_lock(&lock_);
-    if (on_engine_did_disable_) {
-      on_engine_did_disable_(engine, playout_enabled, recording_enabled);
-    }
-    os_unfair_lock_unlock(&lock_);
+    [delegate_ audioDeviceModule:adm_
+                didDisableEngine:engine
+                isPlayoutEnabled:playout_enabled
+              isRecordingEnabled:recording_enabled];
   }
 
   void OnEngineWillRelease(AVAudioEngine *engine) override {
-    os_unfair_lock_lock(&lock_);
-    if (on_engine_will_release_) {
-      on_engine_will_release_(engine);
-    }
-    os_unfair_lock_unlock(&lock_);
+    [delegate_ audioDeviceModule:adm_ willReleaseEngine:engine];
   }
 
   bool OnEngineWillConnectInput(AVAudioEngine *engine, AVAudioNode *src, AVAudioNode *dst,
                                 AVAudioFormat *format) override {
-    bool result = false;
-    os_unfair_lock_lock(&lock_);
-    if (on_engine_will_connect_input_) {
-      result = on_engine_will_connect_input_(engine, src, dst, format);
-    }
-    os_unfair_lock_unlock(&lock_);
-    return result;
+    return [delegate_ audioDeviceModule:adm_
+                                 engine:engine
+               configureInputFromSource:src
+                          toDestination:dst
+                             withFormat:format];
   }
 
   bool OnEngineWillConnectOutput(AVAudioEngine *engine, AVAudioNode *src, AVAudioNode *dst,
                                  AVAudioFormat *format) override {
-    bool result = false;
-    os_unfair_lock_lock(&lock_);
-    if (on_engine_will_connect_output_) {
-      result = on_engine_will_connect_output_(engine, src, dst, format);
-    }
-    os_unfair_lock_unlock(&lock_);
-    return result;
+    return [delegate_ audioDeviceModule:adm_
+                                 engine:engine
+              configureOutputFromSource:src
+                          toDestination:dst
+                             withFormat:format];
   }
 
-  //
-
-  void SetDevicesUpdatedCallBack(RTCDevicesDidUpdateCallback cb) {
-    os_unfair_lock_lock(&lock_);
-    on_devices_did_update_callback_ = cb;
-    os_unfair_lock_unlock(&lock_);
-  }
-
-  void SetOnSpeechActivityCallBack(RTCSpeechActivityCallback cb) {
-    os_unfair_lock_lock(&lock_);
-    on_speech_activity_callback_ = cb;
-    os_unfair_lock_unlock(&lock_);
-  }
-
-  void SetOnEngineDidCreateCallback(RTCOnEngineDidCreate cb) {
-    os_unfair_lock_lock(&lock_);
-    on_engine_did_create_ = cb;
-    os_unfair_lock_unlock(&lock_);
-  }
-
-  void SetOnEngineWillEnableCallback(RTCOnEngineWillEnable cb) {
-    os_unfair_lock_lock(&lock_);
-    on_engine_will_enable_ = cb;
-    os_unfair_lock_unlock(&lock_);
-  }
-
-  void SetOnEngineWillStartCallback(RTCOnEngineWillStart cb) {
-    os_unfair_lock_lock(&lock_);
-    on_engine_will_start_ = cb;
-    os_unfair_lock_unlock(&lock_);
-  }
-
-  void SetOnEngineDidStopCallback(RTCOnEngineDidStop cb) {
-    os_unfair_lock_lock(&lock_);
-    on_engine_did_stop_ = cb;
-    os_unfair_lock_unlock(&lock_);
-  }
-
-  void SetOnEngineDidDisableCallback(RTCOnEngineDidDisable cb) {
-    os_unfair_lock_lock(&lock_);
-    on_engine_did_disable_ = cb;
-    os_unfair_lock_unlock(&lock_);
-  }
-
-  void SetOnEngineWillReleaseCallback(RTCOnEngineWillRelease cb) {
-    os_unfair_lock_lock(&lock_);
-    on_engine_will_release_ = cb;
-    os_unfair_lock_unlock(&lock_);
-  }
-
-  void SetOnEngineWillConnectInputCallback(RTCOnEngineWillConnectInput cb) {
-    os_unfair_lock_lock(&lock_);
-    on_engine_will_connect_input_ = cb;
-    os_unfair_lock_unlock(&lock_);
-  }
-
-  void SetOnEngineWillConnectOutputCallback(RTCOnEngineWillConnectOutput cb) {
-    os_unfair_lock_lock(&lock_);
-    on_engine_will_connect_output_ = cb;
-    os_unfair_lock_unlock(&lock_);
-  }
-
-  bool IsAnyCallbackAttached() {
-    os_unfair_lock_lock(&lock_);
-    bool result = on_devices_did_update_callback_ != nullptr ||
-                  on_speech_activity_callback_ != nullptr || on_engine_did_create_ != nullptr ||
-                  on_engine_will_enable_ != nullptr || on_engine_will_start_ != nullptr ||
-                  on_engine_did_stop_ != nullptr || on_engine_did_disable_ != nullptr ||
-                  on_engine_will_release_ != nullptr || on_engine_will_connect_input_ != nullptr ||
-                  on_engine_will_connect_output_ != nullptr;
-    os_unfair_lock_unlock(&lock_);
-    return result;
-  }
+  __weak id<RTC_OBJC_TYPE(RTCAudioDeviceModuleDelegate)> delegate_;
 
  private:
-  os_unfair_lock lock_;
-  RTCDevicesDidUpdateCallback on_devices_did_update_callback_;
-  RTCSpeechActivityCallback on_speech_activity_callback_;
-
-  RTCOnEngineDidCreate on_engine_did_create_;
-  RTCOnEngineWillEnable on_engine_will_enable_;
-  RTCOnEngineWillStart on_engine_will_start_;
-  RTCOnEngineDidStop on_engine_did_stop_;
-  RTCOnEngineDidDisable on_engine_did_disable_;
-  RTCOnEngineWillRelease on_engine_will_release_;
-
-  RTCOnEngineWillConnectInput on_engine_will_connect_input_;
-  RTCOnEngineWillConnectOutput on_engine_will_connect_output_;
+  __weak RTC_OBJC_TYPE(RTCAudioDeviceModule) * adm_;
 
   RTCSpeechActivityEvent ConvertSpeechActivityEvent(
       webrtc::AudioDeviceModule::SpeechActivityEvent event) {
@@ -226,6 +117,17 @@ @implementation RTC_OBJC_TYPE (RTCAudioDeviceModule) {
   AudioDeviceObserver *_observer;
 }
 
+- (id<RTC_OBJC_TYPE(RTCAudioDeviceModuleDelegate)>)observer {
+  return _workerThread->BlockingCall([self] { return _observer->delegate_; });
+}
+
+- (void)setObserver:(id<RTC_OBJC_TYPE(RTCAudioDeviceModuleDelegate)>)observer {
+  _workerThread->BlockingCall([self, observer] {
+    _observer->delegate_ = observer;
+    _native->SetObserver(observer != nil ? _observer : nullptr);
+  });
+}
+
 - (instancetype)initWithNativeModule:(rtc::scoped_refptr<webrtc::AudioDeviceModule>)module
                         workerThread:(rtc::Thread *)workerThread {
   RTCLogInfo(@"RTCAudioDeviceModule initWithNativeModule:workerThread:");
@@ -234,7 +136,7 @@ - (instancetype)initWithNativeModule:(rtc::scoped_refptr<webrtc::AudioDeviceModu
   _native = module;
   _workerThread = workerThread;
 
-  _observer = new AudioDeviceObserver();
+  _observer = new AudioDeviceObserver(self);
 
   return self;
 }
@@ -384,86 +286,6 @@ - (BOOL)initAndStartRecording {
   return _workerThread->BlockingCall([module] { return module->InitAndStartRecording() == 0; });
 }
 
-- (BOOL)setDevicesDidUpdateCallback:(nullable RTCDevicesDidUpdateCallback)callback {
-  _observer->SetDevicesUpdatedCallBack(callback);
-  webrtc::AudioDeviceObserver *observer = _observer->IsAnyCallbackAttached() ? _observer : nullptr;
-  _workerThread->BlockingCall([self, observer] { _native->SetObserver(observer); });
-
-  return YES;
-}
-
-- (BOOL)setSpeechActivityCallback:(nullable RTCSpeechActivityCallback)callback {
-  _observer->SetOnSpeechActivityCallBack(callback);
-  webrtc::AudioDeviceObserver *observer = _observer->IsAnyCallbackAttached() ? _observer : nullptr;
-  _workerThread->BlockingCall([self, observer] { _native->SetObserver(observer); });
-
-  return YES;
-}
-
-- (BOOL)setOnEngineDidCreateCallback:(nullable RTCOnEngineDidCreate)callback {
-  _observer->SetOnEngineDidCreateCallback(callback);
-  webrtc::AudioDeviceObserver *observer = _observer->IsAnyCallbackAttached() ? _observer : nullptr;
-  _workerThread->BlockingCall([self, observer] { _native->SetObserver(observer); });
-
-  return YES;
-}
-
-- (BOOL)setOnEngineWillEnableCallback:(nullable RTCOnEngineWillEnable)callback {
-  _observer->SetOnEngineWillEnableCallback(callback);
-  webrtc::AudioDeviceObserver *observer = _observer->IsAnyCallbackAttached() ? _observer : nullptr;
-  _workerThread->BlockingCall([self, observer] { _native->SetObserver(observer); });
-
-  return YES;
-}
-
-- (BOOL)setOnEngineWillStartCallback:(nullable RTCOnEngineWillStart)callback {
-  _observer->SetOnEngineWillStartCallback(callback);
-  webrtc::AudioDeviceObserver *observer = _observer->IsAnyCallbackAttached() ? _observer : nullptr;
-  _workerThread->BlockingCall([self, observer] { _native->SetObserver(observer); });
-
-  return YES;
-}
-
-- (BOOL)setOnEngineDidStopCallback:(nullable RTCOnEngineDidStop)callback {
-  _observer->SetOnEngineDidStopCallback(callback);
-  webrtc::AudioDeviceObserver *observer = _observer->IsAnyCallbackAttached() ? _observer : nullptr;
-  _workerThread->BlockingCall([self, observer] { _native->SetObserver(observer); });
-
-  return YES;
-}
-
-- (BOOL)setOnEngineDidDisableCallback:(nullable RTCOnEngineDidDisable)callback {
-  _observer->SetOnEngineDidDisableCallback(callback);
-  webrtc::AudioDeviceObserver *observer = _observer->IsAnyCallbackAttached() ? _observer : nullptr;
-  _workerThread->BlockingCall([self, observer] { _native->SetObserver(observer); });
-
-  return YES;
-}
-
-- (BOOL)setOnEngineWillReleaseCallback:(nullable RTCOnEngineWillRelease)callback {
-  _observer->SetOnEngineWillReleaseCallback(callback);
-  webrtc::AudioDeviceObserver *observer = _observer->IsAnyCallbackAttached() ? _observer : nullptr;
-  _workerThread->BlockingCall([self, observer] { _native->SetObserver(observer); });
-
-  return YES;
-}
-
-- (BOOL)setOnEngineWillConnectInputCallback:(nullable RTCOnEngineWillConnectInput)callback {
-  _observer->SetOnEngineWillConnectInputCallback(callback);
-  webrtc::AudioDeviceObserver *observer = _observer->IsAnyCallbackAttached() ? _observer : nullptr;
-  _workerThread->BlockingCall([self, observer] { _native->SetObserver(observer); });
-
-  return YES;
-}
-
-- (BOOL)setOnEngineWillConnectOutputCallback:(nullable RTCOnEngineWillConnectOutput)callback {
-  _observer->SetOnEngineWillConnectOutputCallback(callback);
-  webrtc::AudioDeviceObserver *observer = _observer->IsAnyCallbackAttached() ? _observer : nullptr;
-  _workerThread->BlockingCall([self, observer] { _native->SetObserver(observer); });
-
-  return YES;
-}
-
 - (BOOL)isPlayoutInitialized {
   return _workerThread->BlockingCall([self] { return _native->PlayoutIsInitialized(); });
 }