diff --git a/Cargo.toml b/Cargo.toml index 213f717..0846207 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ crate-type = ["cdylib", "lib"] # Remove the `assert_process_allocs` feature to allow allocations on the audio # thread in debug builds. nih_plug = { git = "https://github.com/robbert-vdh/nih-plug.git", features = ["assert_process_allocs", "standalone"] } +# nih_plug = { git = "https://github.com/robbert-vdh/nih-plug.git", features = ["assert_process_allocs"] } bit_mask_ring_buf = "0.5" nih_plug_vizia = { git = "https://github.com/robbert-vdh/nih-plug.git" } diff --git a/src/delay_tap.rs b/src/delay_tap.rs index 87acd30..f17f54a 100644 --- a/src/delay_tap.rs +++ b/src/delay_tap.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use synfx_dsp::fh_va::FilterParams; use synfx_dsp::fh_va::LadderFilter; -use crate::{Equalizer, MAX_BLOCK_SIZE, NO_LEARNED_NOTE}; +use crate::{Equalizer, SVFSimper, MAX_BLOCK_SIZE, NO_LEARNED_NOTE}; #[derive(Debug, Clone)] pub struct DelayTap { @@ -14,6 +14,7 @@ pub struct DelayTap { pub ladders: LadderFilter, pub eq_l: Equalizer, pub eq_r: Equalizer, + pub shelving_eq: SVFSimper, pub mute_in_delayed: Box<[bool]>, /// Fades between 0 and 1 with timings based on the global attack and release settings. pub amp_envelope: Smoother, @@ -53,6 +54,7 @@ impl DelayTap { ladders: LadderFilter::new(filter_params), eq_l: Equalizer::new(0.0), eq_r: Equalizer::new(0.0), + shelving_eq: SVFSimper::new(1000.0, 0.0, 48000.0), mute_in_delayed: vec![false; MAX_BLOCK_SIZE].into_boxed_slice(), amp_envelope: Smoother::new(SmoothingStyle::Linear(13.0)), internal_id: 0, diff --git a/src/lib.rs b/src/lib.rs index e24d025..836fcc6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -45,6 +45,7 @@ use simple_eq::design::Curve; use simple_eq::Equalizer; use std::ops::Index; use std::simd::f32x4; +use std::simd::num::SimdFloat; use std::sync::atomic::{ AtomicBool, AtomicI32, AtomicU32, AtomicU64, AtomicU8, AtomicUsize, Ordering, }; @@ -53,7 +54,9 @@ use synfx_dsp::fh_va::{FilterParams, LadderMode}; mod delay_tap; mod editor; +mod svf_simper; use delay_tap::DelayTap; +use svf_simper::SVFSimper; // max seconds per tap const MAX_TAP_SECONDS: usize = 20; @@ -75,12 +78,12 @@ const MUTE_IN: usize = 0; const MUTE_OUT: usize = 1; const CLEAR_TAPS: usize = 2; const LOCK_TAPS: usize = 3; -const MAX_HAAS_MS: f32 = 5.0; +const MAX_HAAS_MS: f32 = 0.0; const NO_GUI_SMOOTHING: f32 = f32::MAX; -const MIN_EQ_GAIN: f32 = -13.0; +const MIN_EQ_GAIN: f32 = -23.0; const PANNER_EQ_FREQ: f32 = 18_000.0; const PANNER_EQ_Q: f32 = 0.42; -const MIN_PAN_GAIN: f32 = -4.2; +const MIN_PAN_GAIN: f32 = -0.0; const DEFAULT_TEMPO: f32 = 60.0; pub struct Del2 { @@ -646,6 +649,7 @@ impl Plugin for Del2 { // Initialize filter parameters for each tap self.initialize_filter_parameters(); for delay_tap in &mut self.delay_taps { + delay_tap.shelving_eq.set(5000.0, 0.0, sample_rate); delay_tap.eq_l.set_sample_rate(sample_rate); delay_tap.eq_r.set_sample_rate(sample_rate); // workaround for comment from upstream: @@ -921,13 +925,52 @@ impl Plugin for Del2 { } for i in (block_start..block_end).step_by(2) { + // let eq_gain_1 = delay_tap.eq_gain.next(); + // let eq_gain_2 = delay_tap.eq_gain.next(); + // let eq_gain_l_lin_1 = util::db_to_gain_fast(eq_gain_1.min(0.0)); + // let eq_gain_l_lin_2 = util::db_to_gain_fast(eq_gain_2.min(0.0)); + // let eq_gain_r_lin_1 = + // util::db_to_gain_fast((eq_gain_1 * -1.0).min(0.0)); + // let eq_gain_r_lin_2 = + // util::db_to_gain_fast((eq_gain_2 * -1.0).min(0.0)); + // let eq_gain_frame = f32x4::from_array([ + // eq_gain_l_lin_1, + // eq_gain_l_lin_2, + // eq_gain_r_lin_1, + // eq_gain_r_lin_2, + // ]); let frame = f32x4::from_array([ delay_tap.delayed_audio_l[i], delay_tap.delayed_audio_r[i], delay_tap.delayed_audio_l.get(i + 1).copied().unwrap_or(0.0), delay_tap.delayed_audio_r.get(i + 1).copied().unwrap_or(0.0), ]); - let frame_out = *delay_tap.ladders.tick_pivotal(frame).as_array(); + + let (eq_gain_1, eq_gain_2) = + (delay_tap.eq_gain.next(), delay_tap.eq_gain.next()); + + // Prepare inputs and perform min operation using SIMD + let gain_values = + f32x4::from_array([eq_gain_1, eq_gain_2, -eq_gain_1, -eq_gain_2]); + let clamped_gain_values = gain_values + .simd_clamp(f32x4::splat(-std::f32::MAX), f32x4::splat(0.0)); + + // Apply db_to_gain_fast using direct lane access + let mut lin_gains = [0.0; 4]; + for i in 0..4 { + lin_gains[i] = util::db_to_gain_fast(clamped_gain_values[i]); + } + + let eq_gain_frame = f32x4::from_array(lin_gains); + // let eq_gain_frame = f32x4::from_array(lin_gains.to_array()); + + // let frame_out = *delay_tap.ladders.tick_pivotal(frame).as_array(); + let frame_filtered = *delay_tap.ladders.tick_pivotal(frame).as_array(); + let frame_out = delay_tap + .shelving_eq + .highshelf(frame_filtered.into(), eq_gain_frame); + + // let frame_out = *delay_tap.ladders.tick_linear(frame).as_array(); delay_tap.delayed_audio_l[i] = frame_out[0]; delay_tap.delayed_audio_r[i] = frame_out[1]; if i + 1 < block_end { @@ -941,32 +984,14 @@ impl Plugin for Del2 { for (value_idx, sample_idx) in (block_start..block_end).enumerate() { let post_filter_gain = dry_wet[value_idx] * wet_gain[value_idx] / (drive * global_drive[value_idx]); - let eq_gain = delay_tap.eq_gain.next(); + // let eq_gain = delay_tap.eq_gain.next(); let pan_gain = delay_tap.pan_gain.next(); - delay_tap.eq_l.set( - 0, - Curve::Peak, - PANNER_EQ_FREQ, - PANNER_EQ_Q, - eq_gain.min(0.0), - ); - delay_tap.eq_r.set( - 0, - Curve::Peak, - PANNER_EQ_FREQ, - PANNER_EQ_Q, - (eq_gain * -1.0).min(0.0), - ); - let left = delay_tap.eq_l.process( - delay_tap.delayed_audio_l[sample_idx] - * post_filter_gain - * util::db_to_gain_fast(pan_gain.min(0.0)), - ); - let right = delay_tap.eq_r.process( - delay_tap.delayed_audio_r[sample_idx] - * post_filter_gain - * util::db_to_gain_fast((pan_gain * -1.0).min(0.0)), - ); + let left = delay_tap.delayed_audio_l[sample_idx] + * post_filter_gain + * util::db_to_gain_fast(pan_gain.min(0.0)); + let right = delay_tap.delayed_audio_r[sample_idx] + * post_filter_gain + * util::db_to_gain_fast((pan_gain * -1.0).min(0.0)); output[0][sample_idx] += left; output[1][sample_idx] += right; amplitude += (left.abs() + right.abs()) * 0.5; diff --git a/src/svf_simper.rs b/src/svf_simper.rs new file mode 100644 index 0000000..6a17afd --- /dev/null +++ b/src/svf_simper.rs @@ -0,0 +1,74 @@ +// slightly adapted from https://github.com/wrl/baseplug/blob/trunk/examples/svf/svf_simper.rs +// the original only outputs the lowpass, I need both the low and the high-pass + +// implemented from https://cytomic.com/files/dsp/SvfLinearTrapOptimised2.pdf +// thanks, andy! + +use std::f32::consts; + +use std::simd::f32x4; + +#[derive(Debug, Clone)] +pub struct SVFSimper { + pub a1: f32x4, + pub a2: f32x4, + pub a3: f32x4, + pub k: f32x4, + + pub ic1eq: f32x4, + pub ic2eq: f32x4, +} + +impl SVFSimper { + pub fn new(cutoff: f32, resonance: f32, sample_rate: f32) -> Self { + let g = (consts::PI * (cutoff / sample_rate)).tan(); + // let k = 2f32 - (1.9f32 * resonance.min(1f32).max(0f32)); + let k = 2f32 - (2.0f32 * resonance.min(1f32).max(0f32)); + + let a1 = 1.0 / (1.0 + (g * (g + k))); + let a2 = g * a1; + let a3 = g * a2; + + SVFSimper { + a1: f32x4::splat(a1), + a2: f32x4::splat(a2), + a3: f32x4::splat(a3), + k: f32x4::splat(k), + + ic1eq: f32x4::splat(0.0), + ic2eq: f32x4::splat(0.0), + } + } + + pub fn set(&mut self, cutoff: f32, resonance: f32, sample_rate: f32) { + let new = Self::new(cutoff, resonance, sample_rate); + + self.a1 = new.a1; + self.a2 = new.a2; + self.a3 = new.a3; + self.k = new.k; + } + + #[inline] + pub fn process(&mut self, v0: f32x4) -> f32x4 { + let v3 = v0 - self.ic2eq; + let v1 = (self.a1 * self.ic1eq) + (self.a2 * v3); + let v2 = self.ic2eq + (self.a2 * self.ic1eq) + (self.a3 * v3); + + self.ic1eq = (f32x4::splat(2.0) * v1) - self.ic1eq; + self.ic2eq = (f32x4::splat(2.0) * v2) - self.ic2eq; + + v2 + } + + pub fn highshelf(&mut self, v0: f32x4, lin_gain: f32x4) -> f32x4 { + let v3 = v0 - self.ic2eq; + let v1 = (self.a1 * self.ic1eq) + (self.a2 * v3); + let v2 = self.ic2eq + (self.a2 * self.ic1eq) + (self.a3 * v3); + + self.ic1eq = (f32x4::splat(2.0) * v1) - self.ic1eq; + self.ic2eq = (f32x4::splat(2.0) * v2) - self.ic2eq; + + v2 + lin_gain * (v0 - self.k * v1 - v2) + } +}