From 99855287eb740a55f6d1f7df082be7ab08e5270f Mon Sep 17 00:00:00 2001
From: Bart Brouns <bart@magnetophon.nl>
Date: Wed, 4 Dec 2024 21:02:49 +0100
Subject: [PATCH] wip svf based eq

---
 Cargo.toml        |  1 +
 src/delay_tap.rs  |  4 ++-
 src/lib.rs        | 83 ++++++++++++++++++++++++++++++-----------------
 src/svf_simper.rs | 74 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 132 insertions(+), 30 deletions(-)
 create mode 100644 src/svf_simper.rs
diff --git a/Cargo.toml b/Cargo.toml
index 213f717..0846207 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -18,6 +18,7 @@ crate-type = ["cdylib", "lib"]
 # Remove the `assert_process_allocs` feature to allow allocations on the audio
 # thread in debug builds.
 nih_plug = { git = "https://github.com/robbert-vdh/nih-plug.git", features = ["assert_process_allocs", "standalone"] }
+# nih_plug = { git = "https://github.com/robbert-vdh/nih-plug.git", features = ["assert_process_allocs"] }
 bit_mask_ring_buf = "0.5"
 
 nih_plug_vizia = { git = "https://github.com/robbert-vdh/nih-plug.git" }
diff --git a/src/delay_tap.rs b/src/delay_tap.rs
index 87acd30..f17f54a 100644
--- a/src/delay_tap.rs
+++ b/src/delay_tap.rs
@@ -3,7 +3,7 @@ use std::sync::Arc;
 use synfx_dsp::fh_va::FilterParams;
 use synfx_dsp::fh_va::LadderFilter;
 
-use crate::{Equalizer, MAX_BLOCK_SIZE, NO_LEARNED_NOTE};
+use crate::{Equalizer, SVFSimper, MAX_BLOCK_SIZE, NO_LEARNED_NOTE};
 
 #[derive(Debug, Clone)]
 pub struct DelayTap {
@@ -14,6 +14,7 @@ pub struct DelayTap {
     pub ladders: LadderFilter,
     pub eq_l: Equalizer<f32>,
     pub eq_r: Equalizer<f32>,
+    pub shelving_eq: SVFSimper,
     pub mute_in_delayed: Box<[bool]>,
     /// Fades between 0 and 1 with timings based on the global attack and release settings.
     pub amp_envelope: Smoother<f32>,
@@ -53,6 +54,7 @@ impl DelayTap {
             ladders: LadderFilter::new(filter_params),
             eq_l: Equalizer::new(0.0),
             eq_r: Equalizer::new(0.0),
+            shelving_eq: SVFSimper::new(1000.0, 0.0, 48000.0),
             mute_in_delayed: vec![false; MAX_BLOCK_SIZE].into_boxed_slice(),
             amp_envelope: Smoother::new(SmoothingStyle::Linear(13.0)),
             internal_id: 0,
diff --git a/src/lib.rs b/src/lib.rs
index e24d025..836fcc6 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -45,6 +45,7 @@ use simple_eq::design::Curve;
 use simple_eq::Equalizer;
 use std::ops::Index;
 use std::simd::f32x4;
+use std::simd::num::SimdFloat;
 use std::sync::atomic::{
     AtomicBool, AtomicI32, AtomicU32, AtomicU64, AtomicU8, AtomicUsize, Ordering,
 };
@@ -53,7 +54,9 @@ use synfx_dsp::fh_va::{FilterParams, LadderMode};
 
 mod delay_tap;
 mod editor;
+mod svf_simper;
 use delay_tap::DelayTap;
+use svf_simper::SVFSimper;
 
 // max seconds per tap
 const MAX_TAP_SECONDS: usize = 20;
@@ -75,12 +78,12 @@ const MUTE_IN: usize = 0;
 const MUTE_OUT: usize = 1;
 const CLEAR_TAPS: usize = 2;
 const LOCK_TAPS: usize = 3;
-const MAX_HAAS_MS: f32 = 5.0;
+const MAX_HAAS_MS: f32 = 0.0;
 const NO_GUI_SMOOTHING: f32 = f32::MAX;
-const MIN_EQ_GAIN: f32 = -13.0;
+const MIN_EQ_GAIN: f32 = -23.0;
 const PANNER_EQ_FREQ: f32 = 18_000.0;
 const PANNER_EQ_Q: f32 = 0.42;
-const MIN_PAN_GAIN: f32 = -4.2;
+const MIN_PAN_GAIN: f32 = -0.0;
 const DEFAULT_TEMPO: f32 = 60.0;
 
 pub struct Del2 {
@@ -646,6 +649,7 @@ impl Plugin for Del2 {
         // Initialize filter parameters for each tap
         self.initialize_filter_parameters();
         for delay_tap in &mut self.delay_taps {
+            delay_tap.shelving_eq.set(5000.0, 0.0, sample_rate);
             delay_tap.eq_l.set_sample_rate(sample_rate);
             delay_tap.eq_r.set_sample_rate(sample_rate);
             // workaround for comment from upstream:
@@ -921,13 +925,52 @@ impl Plugin for Del2 {
                         }
 
                         for i in (block_start..block_end).step_by(2) {
+                            // let eq_gain_1 = delay_tap.eq_gain.next();
+                            // let eq_gain_2 = delay_tap.eq_gain.next();
+                            // let eq_gain_l_lin_1 = util::db_to_gain_fast(eq_gain_1.min(0.0));
+                            // let eq_gain_l_lin_2 = util::db_to_gain_fast(eq_gain_2.min(0.0));
+                            // let eq_gain_r_lin_1 =
+                            // util::db_to_gain_fast((eq_gain_1 * -1.0).min(0.0));
+                            // let eq_gain_r_lin_2 =
+                            // util::db_to_gain_fast((eq_gain_2 * -1.0).min(0.0));
+                            // let eq_gain_frame = f32x4::from_array([
+                            // eq_gain_l_lin_1,
+                            // eq_gain_l_lin_2,
+                            // eq_gain_r_lin_1,
+                            // eq_gain_r_lin_2,
+                            // ]);
                             let frame = f32x4::from_array([
                                 delay_tap.delayed_audio_l[i],
                                 delay_tap.delayed_audio_r[i],
                                 delay_tap.delayed_audio_l.get(i + 1).copied().unwrap_or(0.0),
                                 delay_tap.delayed_audio_r.get(i + 1).copied().unwrap_or(0.0),
                             ]);
-                            let frame_out = *delay_tap.ladders.tick_pivotal(frame).as_array();
+
+                            let (eq_gain_1, eq_gain_2) =
+                                (delay_tap.eq_gain.next(), delay_tap.eq_gain.next());
+
+                            // Prepare inputs and perform min operation using SIMD
+                            let gain_values =
+                                f32x4::from_array([eq_gain_1, eq_gain_2, -eq_gain_1, -eq_gain_2]);
+                            let clamped_gain_values = gain_values
+                                .simd_clamp(f32x4::splat(-std::f32::MAX), f32x4::splat(0.0));
+
+                            // Apply db_to_gain_fast using direct lane access
+                            let mut lin_gains = [0.0; 4];
+                            for i in 0..4 {
+                                lin_gains[i] = util::db_to_gain_fast(clamped_gain_values[i]);
+                            }
+
+                            let eq_gain_frame = f32x4::from_array(lin_gains);
+                            // let eq_gain_frame = f32x4::from_array(lin_gains.to_array());
+
+                            // let frame_out = *delay_tap.ladders.tick_pivotal(frame).as_array();
+                            let frame_filtered = *delay_tap.ladders.tick_pivotal(frame).as_array();
+                            let frame_out = delay_tap
+                                .shelving_eq
+                                .highshelf(frame_filtered.into(), eq_gain_frame);
+
+                            // let frame_out = *delay_tap.ladders.tick_linear(frame).as_array();
                             delay_tap.delayed_audio_l[i] = frame_out[0];
                             delay_tap.delayed_audio_r[i] = frame_out[1];
                             if i + 1 < block_end {
@@ -941,32 +984,14 @@ impl Plugin for Del2 {
                         for (value_idx, sample_idx) in (block_start..block_end).enumerate() {
                             let post_filter_gain = dry_wet[value_idx] * wet_gain[value_idx]
                                 / (drive * global_drive[value_idx]);
-                            let eq_gain = delay_tap.eq_gain.next();
+                            // let eq_gain = delay_tap.eq_gain.next();
                             let pan_gain = delay_tap.pan_gain.next();
-                            delay_tap.eq_l.set(
-                                0,
-                                Curve::Peak,
-                                PANNER_EQ_FREQ,
-                                PANNER_EQ_Q,
-                                eq_gain.min(0.0),
-                            );
-                            delay_tap.eq_r.set(
-                                0,
-                                Curve::Peak,
-                                PANNER_EQ_FREQ,
-                                PANNER_EQ_Q,
-                                (eq_gain * -1.0).min(0.0),
-                            );
-                            let left = delay_tap.eq_l.process(
-                                delay_tap.delayed_audio_l[sample_idx]
-                                    * post_filter_gain
-                                    * util::db_to_gain_fast(pan_gain.min(0.0)),
-                            );
-                            let right = delay_tap.eq_r.process(
-                                delay_tap.delayed_audio_r[sample_idx]
-                                    * post_filter_gain
-                                    * util::db_to_gain_fast((pan_gain * -1.0).min(0.0)),
-                            );
+                            let left = delay_tap.delayed_audio_l[sample_idx]
+                                * post_filter_gain
+                                * util::db_to_gain_fast(pan_gain.min(0.0));
+                            let right = delay_tap.delayed_audio_r[sample_idx]
+                                * post_filter_gain
+                                * util::db_to_gain_fast((pan_gain * -1.0).min(0.0));
                             output[0][sample_idx] += left;
                             output[1][sample_idx] += right;
                             amplitude += (left.abs() + right.abs()) * 0.5;
diff --git a/src/svf_simper.rs b/src/svf_simper.rs
new file mode 100644
index 0000000..6a17afd
--- /dev/null
+++ b/src/svf_simper.rs
@@ -0,0 +1,74 @@
+// slightly adapted from https://github.com/wrl/baseplug/blob/trunk/examples/svf/svf_simper.rs
+// the original only outputs the lowpass, I need both the low and the high-pass
+
+// implemented from https://cytomic.com/files/dsp/SvfLinearTrapOptimised2.pdf
+// thanks, andy!
+
+use std::f32::consts;
+
+use std::simd::f32x4;
+
+#[derive(Debug, Clone)]
+pub struct SVFSimper {
+    pub a1: f32x4,
+    pub a2: f32x4,
+    pub a3: f32x4,
+    pub k: f32x4,
+
+    pub ic1eq: f32x4,
+    pub ic2eq: f32x4,
+}
+
+impl SVFSimper {
+    pub fn new(cutoff: f32, resonance: f32, sample_rate: f32) -> Self {
+        let g = (consts::PI * (cutoff / sample_rate)).tan();
+        // let k = 2f32 - (1.9f32 * resonance.min(1f32).max(0f32));
+        let k = 2f32 - (2.0f32 * resonance.min(1f32).max(0f32));
+
+        let a1 = 1.0 / (1.0 + (g * (g + k)));
+        let a2 = g * a1;
+        let a3 = g * a2;
+
+        SVFSimper {
+            a1: f32x4::splat(a1),
+            a2: f32x4::splat(a2),
+            a3: f32x4::splat(a3),
+            k: f32x4::splat(k),
+
+            ic1eq: f32x4::splat(0.0),
+            ic2eq: f32x4::splat(0.0),
+        }
+    }
+
+    pub fn set(&mut self, cutoff: f32, resonance: f32, sample_rate: f32) {
+        let new = Self::new(cutoff, resonance, sample_rate);
+
+        self.a1 = new.a1;
+        self.a2 = new.a2;
+        self.a3 = new.a3;
+        self.k = new.k;
+    }
+
+    #[inline]
+    pub fn process(&mut self, v0: f32x4) -> f32x4 {
+        let v3 = v0 - self.ic2eq;
+        let v1 = (self.a1 * self.ic1eq) + (self.a2 * v3);
+        let v2 = self.ic2eq + (self.a2 * self.ic1eq) + (self.a3 * v3);
+
+        self.ic1eq = (f32x4::splat(2.0) * v1) - self.ic1eq;
+        self.ic2eq = (f32x4::splat(2.0) * v2) - self.ic2eq;
+
+        v2
+    }
+
+    pub fn highshelf(&mut self, v0: f32x4, lin_gain: f32x4) -> f32x4 {
+        let v3 = v0 - self.ic2eq;
+        let v1 = (self.a1 * self.ic1eq) + (self.a2 * v3);
+        let v2 = self.ic2eq + (self.a2 * self.ic1eq) + (self.a3 * v3);
+
+        self.ic1eq = (f32x4::splat(2.0) * v1) - self.ic1eq;
+        self.ic2eq = (f32x4::splat(2.0) * v2) - self.ic2eq;
+
+        v2 + lin_gain * (v0 - self.k * v1 - v2)
+    }
+}