diff --git a/Forge/Math/Internal/SimdTypes.h b/Forge/Math/Internal/SimdTypes.h
index 42102681fe..1dfb4cd6e3 100644
--- a/Forge/Math/Internal/SimdTypes.h
+++ b/Forge/Math/Internal/SimdTypes.h
@@ -17,14 +17,14 @@
     #define TF_SIMDI_MAX 0xFFFFFFFF
     #define TF_SIMDF_MAX 0xFFFFFFFF
 
-    typedef __m128 TSimdFloat32x4;
-    typedef __m128i TSimdInt32x4;
+    typedef __m128 Tsimd_f32x4_t;
+    typedef __m128i Tsimd_i32x4_t;
 
-    typedef __m128 TSimdFloat32x3;
-    typedef __m128i TSimdInt32x3;
+    typedef __m128 Tsimd_f32x3_t;
+    typedef __m128i Tsimd_i32x3_t;
 
-    typedef __m128 TSimdFloat32x2;
-    typedef __m128i TSimdInt32x2;
+    typedef __m128 Tsimd_f32x2_t;
+    typedef __m128i Tsimd_i32x2_t;
 #elif defined(TF_FEATURE_CPU_NEON)
     #include <arm_neon.h>
     
@@ -33,14 +33,14 @@
     
     #define TF_SIMDI_MAX 0xFFFFFFFF
     
-    typedef float32x4_t TSimdFloat32x4;
-    typedef int32x4_t TSimdInt32x4;
+    typedef float32x4_t Tsimd_f32x4_t;
+    typedef int32x4_t Tsimd_i32x4_t;
 
-    typedef float32x4_t TSimdFloat32x3;
-    typedef int32x4_t TSimdInt32x3;
+    typedef float32x4_t Tsimd_f32x3_t;
+    typedef int32x4_t Tsimd_i32x3_t;
 
-    typedef float32x2_t TSimdFloat32x2;
-    typedef int32x2_t TSimdInt32x2;
+    typedef float32x2_t Tsimd_f32x2_t;
+    typedef int32x2_t Tsimd_i32x2_t;
 #elif defined(TF_FEATURE_CPU_SCALAR)
     #include <cmath>
     
@@ -49,35 +49,44 @@
     
     #define TF_SIMDI_MAX 0xFFFFFFFF
     
-    typedef struct { float   v[4]; } TSimdFloat32x4;
-    typedef struct { int32_t v[4]; } TSimdInt32x4;
+    typedef struct { float   v[4]; } Tsimd_f32x4_t;
+    typedef struct { int32_t v[4]; } Tsimd_i32x4_t;
 
-    typedef struct { float   v[3]; } TSimdFloat32x3;
-    typedef struct { int32_t v[3]; } TSimdInt32x3;
+    typedef struct { float   v[3]; } Tsimd_f32x3_t;
+    typedef struct { int32_t v[3]; } Tsimd_i32x3_t;
     
-    typedef struct { float   v[2]; } TSimdFloat32x2;
-    typedef struct { int32_t v[2]; } TSimdInt32x2;
+    typedef struct { float   v[2]; } Tsimd_f32x2_t;
+    typedef struct { int32_t v[2]; } Tsimd_i32x2_t;
 #endif
 
 // TODO: keep it simple only implement square matricies
 // everything is column major
 
-struct TSimdFloat4 {
-  TSimdFloat32x4 mRow; 
+struct TSimdQuatFloat {
+  Tsimd_f32x4_t mValue; 
 };
 
-struct TSimdQuatFloat {
-  TSimdFloat32x4 mValue; 
+struct Tsimd_f32x4x4_s {
+    union {
+        struct {
+            Tsimd_f32x4_t mCol0;
+            Tsimd_f32x4_t mCol1;
+            Tsimd_f32x4_t mCol2;
+            Tsimd_f32x4_t mCol3;
+        };
+        Tsimd_f32x4_t mCol[4];
+    };
 };
 
+
 struct TSimdFloat4x1 {
     union
     {
         struct
         {
-            TSimdFloat32x4 mCol0;
+            Tsimd_f32x4_t mCol0;
         };
-        TSimdFloat32x4 mCol[1];
+        Tsimd_f32x4_t mCol[1];
     };
 };
 
@@ -87,21 +96,21 @@ struct TSimdFloat4x2
     {
         struct
         {
-            TSimdFloat32x4 mCol0;
-            TSimdFloat32x4 mCol1;
+            Tsimd_f32x4_t mCol0;
+            Tsimd_f32x4_t mCol1;
         };
-        TSimdFloat32x4 mCol[2];
+        Tsimd_f32x4_t mCol[2];
     };
 };
 
 struct TSimdFloat4x3 {
   union {
     struct {
-      TSimdFloat32x4 mCol0; 
-      TSimdFloat32x4 mCol1; 
-      TSimdFloat32x4 mCol2; 
+      Tsimd_f32x4_t mCol0; 
+      Tsimd_f32x4_t mCol1; 
+      Tsimd_f32x4_t mCol2; 
     };
-    TSimdFloat32x4 mCol[3]; 
+    Tsimd_f32x4_t mCol[3]; 
   };
 };
 
@@ -111,26 +120,26 @@ struct TSimdFloat4x4
     {
         struct
         {
-            TSimdFloat32x4 mCol0;
-            TSimdFloat32x4 mCol1;
-            TSimdFloat32x4 mCol2;
-            TSimdFloat32x4 mCol3;
+            Tsimd_f32x4_t mCol0;
+            Tsimd_f32x4_t mCol1;
+            Tsimd_f32x4_t mCol2;
+            Tsimd_f32x4_t mCol3;
         };
-        TSimdFloat32x4 mCol[4];
+        Tsimd_f32x4_t mCol[4];
     };
 };
 
 struct TSimdFloat3
 {
-  TSimdFloat32x3 mRow; 
+  Tsimd_f32x3_t mRow; 
 };
 
 struct TSimdFloat3x1 {
     union {
         struct {
-            TSimdFloat32x3 mCol0;
+            Tsimd_f32x3_t mCol0;
         };
-        TSimdFloat32x3 mCol[1];
+        Tsimd_f32x3_t mCol[1];
     };
 };
 
@@ -140,10 +149,10 @@ struct TSimdFloat3x2
     {
         struct
         {
-            TSimdFloat32x3 mCol0;
-            TSimdFloat32x3 mCol1;
+            Tsimd_f32x3_t mCol0;
+            Tsimd_f32x3_t mCol1;
         };
-        TSimdFloat32x3 mCol[2];
+        Tsimd_f32x3_t mCol[2];
     };
 };
 
@@ -153,25 +162,25 @@ struct TSimdFloat3x3
     {
         struct
         {
-            TSimdFloat32x3 mCol0;
-            TSimdFloat32x3 mCol1;
-            TSimdFloat32x3 mCol2;
+            Tsimd_f32x3_t mCol0;
+            Tsimd_f32x3_t mCol1;
+            Tsimd_f32x3_t mCol2;
         };
-        TSimdFloat32x3 mCol[3];
+        Tsimd_f32x3_t mCol[3];
     };
 };
 
 struct TSimdFloat2 {
-  TSimdFloat32x2 mRow; 
+  Tsimd_f32x2_t mRow; 
 };
 
 struct TSimdFloat2x1
 {
     union {
         struct {
-          TSimdFloat32x2 mCol0; 
+          Tsimd_f32x2_t mCol0; 
         };
-        TSimdFloat32x2 mCol[1];
+        Tsimd_f32x2_t mCol[1];
     };
 };
 
@@ -179,10 +188,10 @@ struct TSimdFloat2x2
 {
     union {
         struct {
-            TSimdFloat32x2 mCol0;
-            TSimdFloat32x2 mCol1;
+            Tsimd_f32x2_t mCol0;
+            Tsimd_f32x2_t mCol1;
         };
-        TSimdFloat32x2 mCol[2];
+        Tsimd_f32x2_t mCol[2];
     };
 };
 
diff --git a/Forge/Math/Internal/TF_Simd32x2_neon.inl b/Forge/Math/Internal/TF_Simd32x2_neon.inl
index fa3c57b9f9..c3c6803429 100644
--- a/Forge/Math/Internal/TF_Simd32x2_neon.inl
+++ b/Forge/Math/Internal/TF_Simd32x2_neon.inl
@@ -4,84 +4,84 @@
 #include "../TF_Simd32x2.h"
 #endif
 
-inline TSimdInt32x2   tfSimd2iSelect(TSimdInt32x2 arg0, TSimdInt32x2 arg1, TSimdInt32x2 mask) { return vbsl_s32(mask, arg1, arg1); }
-inline TSimdFloat32x2 tfSimd2fSelect(TSimdFloat32x2 arg0, TSimdFloat32x2 arg1, TSimdFloat32x2 mask) { return vbsl_f32(mask, arg1, arg2); }
+inline Tsimd_i32x2_t   tfS32x2ISelect(Tsimd_i32x2_t arg0, Tsimd_i32x2_t arg1, Tsimd_i32x2_t mask) { return vbsl_s32(mask, arg1, arg1); }
+inline Tsimd_f32x2_t tfS32x2FSelect(Tsimd_f32x2_t arg0, Tsimd_f32x2_t arg1, Tsimd_f32x2_t mask) { return vbsl_f32(mask, arg1, arg2); }
 
-inline TSimdFloat32x2 tfSimd2fZero() { return vmov_n_f32(0); }
-inline TSimdInt32x2   tfSimd2iZero() { return vmov_n_s32(0); }
+inline Tsimd_f32x2_t tfS32x2FZero() { return vmov_n_f32(0); }
+inline Tsimd_i32x2_t   tfS32x2IZero() { return vmov_n_s32(0); }
 
-inline TSimdInt32x2 tfSimd2iNot(TSimdInt32x2 value) { return vmvn_s32(value); }
-inline TSimdInt32x2 tfSimd2iAnd(TSimdInt32x2 arg1, TSimdInt32x2 arg2) { return vand_s32(arg1, arg2); }
-inline TSimdInt32x2 tfSimd2iAndNot(TSimdInt32x2 arg1, TSimdInt32x2 arg2) { return vand_s32(vmvn_s32(arg1), arg2); }
-inline TSimdInt32x2 tfSimd2iOr(TSimdInt32x2 arg1, TSimdInt32x2 arg2) { return vorr_s32(arg1, arg2); }
-inline TSimdInt32x2 tfSimd2iXor(TSimdInt32x2 arg1, TSimdInt32x2 arg2) { return veor_s32(arg1, arg2); }
+inline Tsimd_i32x2_t tfS32x2INot(Tsimd_i32x2_t value) { return vmvn_s32(value); }
+inline Tsimd_i32x2_t tfS32x2IAnd(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2) { return vand_s32(arg1, arg2); }
+inline Tsimd_i32x2_t tfS32x2IAndNot(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2) { return vand_s32(vmvn_s32(arg1), arg2); }
+inline Tsimd_i32x2_t tfS32x2IOr(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2) { return vorr_s32(arg1, arg2); }
+inline Tsimd_i32x2_t tfS32x2IXor(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2) { return veor_s32(arg1, arg2); }
 
-inline TSimdFloat32x2 tfSimd2fNot(TSimdFloat32x2 value) { return vreinterpret_f32_s32(vmvn_s32(vreinterpret_s32_f32(value))); }
-inline TSimdFloat32x2 tfSimd2fAnd(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2) {
+inline Tsimd_f32x2_t tfS32x2FNot(Tsimd_f32x2_t value) { return vreinterpret_f32_s32(vmvn_s32(vreinterpret_s32_f32(value))); }
+inline Tsimd_f32x2_t tfS32x2FAnd(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2) {
     return vreinterpret_f32_s32(vand_s32(vreinterpret_s32_f32(arg1), vreinterpret_s32_f32(arg2)));
 }
-inline TSimdFloat32x2 tfSimd2fAndNot(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2) {
+inline Tsimd_f32x2_t tfS32x2FAndNot(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2) {
     return vreinterpret_f32_s32(vand_s32(vmvn_s32(vreinterpret_s32_f32(arg1)), vreinterpret_s32_f32(arg2)));
 }
-inline TSimdFloat32x2 tfSimd2fOr(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2) {
+inline Tsimd_f32x2_t tfS32x2FOr(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2) {
     return vreinterpret_f32_s32(vorr_s32(vreinterpret_s32_f32(arg1), vreinterpret_s32_f32(arg2)));
 }
-inline TSimdFloat32x2 tfSimd2fXor(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2) {
+inline Tsimd_f32x2_t tfS32x2FXor(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2) {
     return vreinterpret_f32_s32(veor_s32(vreinterpret_s32_f32(arg1), vreinterpret_s32_f32(arg2)));
 }
 
-inline TSimdFloat32x2 tfSimd2fFloor(TSimdFloat32x2 value) { return vrndm_f32(value); }
-inline TSimdFloat32x2 tfSimd2fCeil(TSimdFloat32x2 value) { return vrndp_f32(value); }
-inline TSimdFloat32x2 tfSimd2fRound(TSimdFloat32x2 value) { return vrndn_f32(value); }
-inline TSimdFloat32x2 tfSimd2fTruncate(TSimdFloat32x2 value) { return tfSimd2iToSimd2f(tfSimd2fToSimd2i(value)); }
-inline TSimdFloat32x2 tfSimd2fMin(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2) { return vmin_f32(arg1, arg2); }
-inline TSimdFloat32x2 tfSimd2fMax(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2) { return vmax_f32(arg1, arg2); }
-inline TSimdFloat32x2 tfSimd2fClamp(TSimdFloat32x2 value, TSimdFloat32x2 min, TSimdFloat32x2 max) {
-    return tfSimd2fMax(min, tfSimd2fMin(value, max));
+inline Tsimd_f32x2_t tfS32x2FFloor(Tsimd_f32x2_t value) { return vrndm_f32(value); }
+inline Tsimd_f32x2_t tfS32x2FCeil(Tsimd_f32x2_t value) { return vrndp_f32(value); }
+inline Tsimd_f32x2_t tfS32x2FRound(Tsimd_f32x2_t value) { return vrndn_f32(value); }
+inline Tsimd_f32x2_t tfS32x2FTruncate(Tsimd_f32x2_t value) { return tfS32x2IToSimd2f(tfS32x2FToSimd2i(value)); }
+inline Tsimd_f32x2_t tfS32x2FMin(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2) { return vmin_f32(arg1, arg2); }
+inline Tsimd_f32x2_t tfS32x2FMax(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2) { return vmax_f32(arg1, arg2); }
+inline Tsimd_f32x2_t tfS32x2FClamp(Tsimd_f32x2_t value, Tsimd_f32x2_t min, Tsimd_f32x2_t max) {
+    return tfS32x2FMax(min, tfS32x2FMin(value, max));
 }
 
-inline TSimdInt32x2 tfSimd2fToSimd2i(TSimdFloat32x2 value) { return vreinterpret_s32_f32(value); }
+inline Tsimd_i32x2_t tfS32x2FToSimd2i(Tsimd_f32x2_t value) { return vreinterpret_s32_f32(value); }
 
-inline TSimdFloat32x2 tfSimd2iToSimd2f(TSimdInt32x2 value) { return vreinterpret_f32_s32(value); }
+inline Tsimd_f32x2_t tfS32x2IToSimd2f(Tsimd_i32x2_t value) { return vreinterpret_f32_s32(value); }
 
-inline float tfSimd2fSelectIndex0(TSimdFloat32x2 value) { return vget_lane_f32(value, 0); }
+inline float tfS32x2FSelectIndex0(Tsimd_f32x2_t value) { return vget_lane_f32(value, 0); }
 
-inline float tfSimd2fSelectIndex1(TSimdFloat32x2 value) { return vget_lane_f32(value, 1); }
+inline float tfS32x2FSelectIndex1(Tsimd_f32x2_t value) { return vget_lane_f32(value, 1); }
 
-inline TSimdFloat32x2 tfSimd2fAdd(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2) { return vadd_f32(arg1, arg2); }
-inline TSimdFloat32x2 tfSimd2fSub(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2) { return vsub_f32(arg1, arg2); }
-inline TSimdFloat32x2 tfSimd2fMul(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2) { return vmul_f32(arg1, arg2); }
-inline TSimdFloat32x2 tfSimd2fMadd(TSimdFloat32x2 mul1, TSimdFloat32x2 mul2, TSimdFloat32x2 add) { return vmla_f32(add, mul1, mul2); }
-inline TSimdFloat32x2 tfSimd2fDiv(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2) { return vdiv_f32(arg1, arg2); }
+inline Tsimd_f32x2_t tfS32x2FAdd(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2) { return vadd_f32(arg1, arg2); }
+inline Tsimd_f32x2_t tfS32x2FSub(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2) { return vsub_f32(arg1, arg2); }
+inline Tsimd_f32x2_t tfS32x2FMul(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2) { return vmul_f32(arg1, arg2); }
+inline Tsimd_f32x2_t tfS32x2FMadd(Tsimd_f32x2_t mul1, Tsimd_f32x2_t mul2, Tsimd_f32x2_t add) { return vmla_f32(add, mul1, mul2); }
+inline Tsimd_f32x2_t tfS32x2FDiv(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2) { return vdiv_f32(arg1, arg2); }
 
-inline TSimdFloat32x2 tfSimd2fAbs(TSimdFloat32x2 value) { return vabs_f32(value); }
+inline Tsimd_f32x2_t tfS32x2FAbs(Tsimd_f32x2_t value) { return vabs_f32(value); }
 
-inline TSimdFloat32x2 tfSimdFloat2Load(float x, float y) {
+inline Tsimd_f32x2_t tfSimdFloat2Load(float x, float y) {
     const float values[2] = { x, y };
     return vld1_f32(values);
 }
 
-inline TSimdInt32x2 tfSimd2iLoadImmediate(int32_t x, int32_t y) {
+inline Tsimd_i32x2_t tfS32x2ILoadImmediate(int32_t x, int32_t y) {
     const int32_t values[2] = { x, y };
     return vld1_s32(values);
 }
 
-inline TSimdFloat32x2 tfSimd2fSplatIndex0(TSimdFloat32x2 value) { return vdup_lane_f32(value, 0); }
+inline Tsimd_f32x2_t tfS32x2FSplatIndex0(Tsimd_f32x2_t value) { return vdup_lane_f32(value, 0); }
 
-inline TSimdFloat32x2 tfSimd2fSplatIndex1(TSimdFloat32x2 value) { return vdup_lane_f32(value, 1); }
+inline Tsimd_f32x2_t tfS32x2FSplatIndex1(Tsimd_f32x2_t value) { return vdup_lane_f32(value, 1); }
 
-inline TSimdInt32x2 tfSimd2iSplat(int32_t value) { return vdup_n_s32(value); }
+inline Tsimd_i32x2_t tfS32x2ISplat(int32_t value) { return vdup_n_s32(value); }
 
-inline TSimdFloat32x2 tfSimd2fSplat(float value) { return vdup_n_f32(value); }
+inline Tsimd_f32x2_t tfS32x2FSplat(float value) { return vdup_n_f32(value); }
 
-inline TSimdInt32x2 tfSimd2iCmpEq(TSimdInt32x2 arg1, TSimdInt32x2 arg2) { return vceq_s32(arg1, arg2); }
-inline TSimdInt32x2 tfSimd2iCmpNeq(TSimdInt32x2 arg1, TSimdInt32x2 arg2) { return vmvn_s32(vceq_s32(arg1, arg2)); }
-inline TSimdInt32x2 tfSimd2iCmpGt(TSimdInt32x2 arg1, TSimdInt32x2 arg2) { return vcgt_s32(arg1, arg2); }
-inline TSimdInt32x2 tfSimd2iCmpGtEq(TSimdInt32x2 arg1, TSimdInt32x2 arg2) { return vcgt_s32(arg1, arg2); }
-inline TSimdInt32x2 tfSimd2iCmpLt(TSimdInt32x2 arg1, TSimdInt32x2 arg2) { return vclt_s32(arg1, arg2); }
-inline TSimdInt32x2 tfSimd2iCmpLtEq(TSimdInt32x2 arg1, TSimdInt32x2 arg2) { return vcle_s32(arg1, arg2); }
+inline Tsimd_i32x2_t tfS32x2ICmpEq(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2) { return vceq_s32(arg1, arg2); }
+inline Tsimd_i32x2_t tfS32x2ICmpNeq(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2) { return vmvn_s32(vceq_s32(arg1, arg2)); }
+inline Tsimd_i32x2_t tfS32x2ICmpGt(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2) { return vcgt_s32(arg1, arg2); }
+inline Tsimd_i32x2_t tfS32x2ICmpGtEq(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2) { return vcgt_s32(arg1, arg2); }
+inline Tsimd_i32x2_t tfS32x2ICmpLt(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2) { return vclt_s32(arg1, arg2); }
+inline Tsimd_i32x2_t tfS32x2ICmpLtEq(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2) { return vcle_s32(arg1, arg2); }
 
-inline bool tfSimd2fCmpAllEq(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
+inline bool tfS32x2FCmpAllEq(TSimd32Fx4 arg1, TSimd32Fx4 arg2) {
 //    for (int i = 0; i < 2; i++) {
 //        if (arg1.v[i] != arg2.v[i]) {
 //            return false;
@@ -90,7 +90,7 @@ inline bool tfSimd2fCmpAllEq(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
     return true;
 }
 
-inline bool tfSimd2iCmpAllEq(TSimdInt32x2 arg1, TSimdInt32x2 arg2) {
+inline bool tfS32x2ICmpAllEq(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2) {
 //    for (int i = 0; i < 2; i++) {
 //        if (arg1.v[i] != arg2.v[i]) {
 //            return false;
diff --git a/Forge/Math/Internal/TF_Simd32x2_scalar.inl b/Forge/Math/Internal/TF_Simd32x2_scalar.inl
index 712d255bea..e3270f0d05 100644
--- a/Forge/Math/Internal/TF_Simd32x2_scalar.inl
+++ b/Forge/Math/Internal/TF_Simd32x2_scalar.inl
@@ -4,138 +4,138 @@
 #include "../TF_Simd32x2.h"
 #endif
 
-inline TSimdInt32x2 tfSimd2iSelect(TSimdInt32x2 arg0, TSimdInt32x2 arg1, TSimdInt32x2 mask) {
+inline Tsimd_i32x2_t tfS32x2ISelect(Tsimd_i32x2_t arg0, Tsimd_i32x2_t arg1, Tsimd_i32x2_t mask) {
     return { (mask.v[0] == 0) ? arg0.v[0] : arg1.v[0], (mask.v[1] == 0) ? arg0.v[1] : arg1.v[1] };
 }
-inline TSimdFloat32x2 tfSimd2fSelect(TSimdFloat32x2 arg0, TSimdFloat32x2 arg1, TSimdFloat32x2 mask) {
-    TSimdInt32x2 intMask = tfSimd2fToSimd2i(mask);
+inline Tsimd_f32x2_t tfS32x2FSelect(Tsimd_f32x2_t arg0, Tsimd_f32x2_t arg1, Tsimd_f32x2_t mask) {
+    Tsimd_i32x2_t intMask = tfS32x2FToSimd2i(mask);
     return { (intMask.v[0] == 0) ? arg0.v[0] : arg1.v[0], (intMask.v[1] == 0) ? arg0.v[1] : arg1.v[1] };
 }
 
-inline TSimdFloat32x2 tfSimd2fZero() { return { 0, 0 }; }
-inline TSimdInt32x2   tfSimd2iZero() { return { 0, 0 }; }
+inline Tsimd_f32x2_t tfS32x2FZero() { return { 0, 0 }; }
+inline Tsimd_i32x2_t   tfS32x2IZero() { return { 0, 0 }; }
 
-inline TSimdInt32x2 tfSimd2iNot(TSimdInt32x2 value) { return { ~value.v[0], ~value.v[1] }; }
-inline TSimdInt32x2 tfSimd2iAnd(TSimdInt32x2 arg1, TSimdInt32x2 arg2) { return { arg1.v[0] & arg2.v[0], arg1.v[1] & arg2.v[1] }; }
-inline TSimdInt32x2 tfSimd2iAndNot(TSimdInt32x2 arg1, TSimdInt32x2 arg2) { return { ~arg1.v[0] & arg2.v[0], ~arg1.v[1] & arg2.v[1] }; }
-inline TSimdInt32x2 tfSimd2iOr(TSimdInt32x2 arg1, TSimdInt32x2 arg2) { return { arg1.v[0] | arg2.v[0], arg1.v[1] | arg2.v[1] }; }
-inline TSimdInt32x2 tfSimd2iXor(TSimdInt32x2 arg1, TSimdInt32x2 arg2) { return { arg1.v[0] ^ arg2.v[0], arg1.v[1] ^ arg2.v[1] }; }
+inline Tsimd_i32x2_t tfS32x2INot(Tsimd_i32x2_t value) { return { ~value.v[0], ~value.v[1] }; }
+inline Tsimd_i32x2_t tfS32x2IAnd(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2) { return { arg1.v[0] & arg2.v[0], arg1.v[1] & arg2.v[1] }; }
+inline Tsimd_i32x2_t tfS32x2IAndNot(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2) { return { ~arg1.v[0] & arg2.v[0], ~arg1.v[1] & arg2.v[1] }; }
+inline Tsimd_i32x2_t tfS32x2IOr(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2) { return { arg1.v[0] | arg2.v[0], arg1.v[1] | arg2.v[1] }; }
+inline Tsimd_i32x2_t tfS32x2IXor(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2) { return { arg1.v[0] ^ arg2.v[0], arg1.v[1] ^ arg2.v[1] }; }
 
-inline TSimdFloat32x2 tfSimd2fNot(TSimdFloat32x2 value) {
-    TSimdInt32x2 result = { { ~((int32_t)value.v[0]), ~((int32_t)value.v[1]) } };
-    return tfSimd2iToSimd2f(result);
+inline Tsimd_f32x2_t tfS32x2FNot(Tsimd_f32x2_t value) {
+    Tsimd_i32x2_t result = { { ~((int32_t)value.v[0]), ~((int32_t)value.v[1]) } };
+    return tfS32x2IToSimd2f(result);
 }
-inline TSimdFloat32x2 tfSimd2fAnd(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2) {
-    TSimdInt32x2 result = { ((int32_t)arg1.v[0]) & ((int32_t)arg2.v[0]), ((int32_t)arg1.v[1]) & ((int32_t)arg2.v[1]) };
-    return tfSimd2iToSimd2f(result);
+inline Tsimd_f32x2_t tfS32x2FAnd(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2) {
+    Tsimd_i32x2_t result = { ((int32_t)arg1.v[0]) & ((int32_t)arg2.v[0]), ((int32_t)arg1.v[1]) & ((int32_t)arg2.v[1]) };
+    return tfS32x2IToSimd2f(result);
 }
-inline TSimdFloat32x2 tfSimd2fAndNot(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2) {
-    TSimdInt32x2 result = { { ~((int32_t)arg1.v[0]) & ((int32_t)arg2.v[0]), ~((int32_t)arg1.v[1]) & ((int32_t)arg2.v[1]) } };
-    return tfSimd2iToSimd2f(result);
+inline Tsimd_f32x2_t tfS32x2FAndNot(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2) {
+    Tsimd_i32x2_t result = { { ~((int32_t)arg1.v[0]) & ((int32_t)arg2.v[0]), ~((int32_t)arg1.v[1]) & ((int32_t)arg2.v[1]) } };
+    return tfS32x2IToSimd2f(result);
 }
-inline TSimdFloat32x2 tfSimd2fOr(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2) {
-    TSimdInt32x2 result = { { ((int32_t)arg1.v[0]) | ((int32_t)arg2.v[0]), ((int32_t)arg1.v[1]) | ((int32_t)arg2.v[1]) } };
-    return tfSimd2iToSimd2f(result);
+inline Tsimd_f32x2_t tfS32x2FOr(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2) {
+    Tsimd_i32x2_t result = { { ((int32_t)arg1.v[0]) | ((int32_t)arg2.v[0]), ((int32_t)arg1.v[1]) | ((int32_t)arg2.v[1]) } };
+    return tfS32x2IToSimd2f(result);
 }
-inline TSimdFloat32x2 tfSimd2fXor(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2) {
-    TSimdInt32x2 result = { { ((int32_t)arg1.v[0]) ^ ((int32_t)arg2.v[0]), ((int32_t)arg1.v[1]) ^ ((int32_t)arg2.v[1]) } };
-    return tfSimd2iToSimd2f(result);
+inline Tsimd_f32x2_t tfS32x2FXor(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2) {
+    Tsimd_i32x2_t result = { { ((int32_t)arg1.v[0]) ^ ((int32_t)arg2.v[0]), ((int32_t)arg1.v[1]) ^ ((int32_t)arg2.v[1]) } };
+    return tfS32x2IToSimd2f(result);
 }
 
-inline TSimdFloat32x2 tfSimd2fFloor(TSimdFloat32x2 value) { return { { floorf(value.v[0]), floorf(value.v[1]) } }; }
-inline TSimdFloat32x2 tfSimd2fCeil(TSimdFloat32x2 value) { return { { ceilf(value.v[0]), ceilf(value.v[1]) } }; }
-inline TSimdFloat32x2 tfSimd2fRound(TSimdFloat32x2 value) {
+inline Tsimd_f32x2_t tfS32x2FFloor(Tsimd_f32x2_t value) { return { { floorf(value.v[0]), floorf(value.v[1]) } }; }
+inline Tsimd_f32x2_t tfS32x2FCeil(Tsimd_f32x2_t value) { return { { ceilf(value.v[0]), ceilf(value.v[1]) } }; }
+inline Tsimd_f32x2_t tfS32x2FRound(Tsimd_f32x2_t value) {
     // While 'roundf' may seem the obvious choice here, it rounds halfway cases
     // away from zero regardless of the current rounding mode, but 'rintf' uses
     // the current rounding mode which is consistent with other implementations.
     return { { rintf(value.v[0]), rintf(value.v[1]) } };
 }
-inline TSimdFloat32x2 tfSimd2fTruncate(TSimdFloat32x2 value) { return tfSimd2iToSimd2f(tfSimd2fToSimd2i(value)); }
-inline TSimdFloat32x2 tfSimd2fMin(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2) {
+inline Tsimd_f32x2_t tfS32x2FTruncate(Tsimd_f32x2_t value) { return tfS32x2IToSimd2f(tfS32x2FToSimd2i(value)); }
+inline Tsimd_f32x2_t tfS32x2FMin(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2) {
     return { { fminf(arg1.v[0], arg2.v[0]), fminf(arg1.v[1], arg2.v[1]) } };
 }
-inline TSimdFloat32x2 tfSimd2fMax(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2) {
+inline Tsimd_f32x2_t tfS32x2FMax(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2) {
     return { { fmaxf(arg1.v[0], arg2.v[0]), fmaxf(arg1.v[1], arg2.v[1]) } };
 }
-inline TSimdFloat32x2 tfSimd2fClamp(TSimdFloat32x2 value, TSimdFloat32x2 min, TSimdFloat32x2 max) {
-    return tfSimd2fMax(min, tfSimd2fMin(value, max));
+inline Tsimd_f32x2_t tfS32x2FClamp(Tsimd_f32x2_t value, Tsimd_f32x2_t min, Tsimd_f32x2_t max) {
+    return tfS32x2FMax(min, tfS32x2FMin(value, max));
 }
 
-inline TSimdInt32x2 tfSimd2fToSimd2i(TSimdFloat32x2 value) { return { (int32_t)value.v[0], (int32_t)value.v[1] }; }
+inline Tsimd_i32x2_t tfS32x2FToSimd2i(Tsimd_f32x2_t value) { return { (int32_t)value.v[0], (int32_t)value.v[1] }; }
 
-inline TSimdFloat32x2 tfSimd2iToSimd2f(TSimdInt32x2 value) { return { (float)value.v[0], (float)value.v[1] }; }
+inline Tsimd_f32x2_t tfS32x2IToSimd2f(Tsimd_i32x2_t value) { return { (float)value.v[0], (float)value.v[1] }; }
 
-inline float tfSimd2fSelectIndex0(TSimdFloat32x2 value) { return value.v[0]; }
+inline float tfS32x2FSelectIndex0(Tsimd_f32x2_t value) { return value.v[0]; }
 
-inline float tfSimd2fSelectIndex1(TSimdFloat32x2 value) { return value.v[1]; }
+inline float tfS32x2FSelectIndex1(Tsimd_f32x2_t value) { return value.v[1]; }
 
-inline TSimdFloat32x2 tfSimd2fAdd(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2) {
+inline Tsimd_f32x2_t tfS32x2FAdd(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2) {
     return {
         arg1.v[0] + arg2.v[0],
         arg1.v[1] + arg2.v[1],
     };
 }
-inline TSimdFloat32x2 tfSimd2fSub(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2) {
+inline Tsimd_f32x2_t tfS32x2FSub(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2) {
     return {
         arg1.v[0] - arg2.v[0],
         arg1.v[1] - arg2.v[1],
     };
 }
-inline TSimdFloat32x2 tfSimd2fMul(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2) {
+inline Tsimd_f32x2_t tfS32x2FMul(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2) {
     return {
         arg1.v[0] * arg2.v[0],
         arg1.v[1] * arg2.v[1],
     };
 }
-inline TSimdFloat32x2 tfSimd2fMadd(TSimdFloat32x2 mul1, TSimdFloat32x2 mul2, TSimdFloat32x2 add) {
-    return tfSimd2fAdd(tfSimd2fMul(mul1, mul2), add);
+inline Tsimd_f32x2_t tfS32x2FMadd(Tsimd_f32x2_t mul1, Tsimd_f32x2_t mul2, Tsimd_f32x2_t add) {
+    return tfS32x2FAdd(tfS32x2FMul(mul1, mul2), add);
 }
-inline TSimdFloat32x2 tfSimd2fDiv(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2) {
+inline Tsimd_f32x2_t tfS32x2FDiv(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2) {
     return {
         arg1.v[0] / arg2.v[0],
         arg1.v[1] / arg2.v[1],
     };
 }
-inline TSimdFloat32x2 tfSimd2fAbs(TSimdFloat32x2 value) {
+inline Tsimd_f32x2_t tfS32x2FAbs(Tsimd_f32x2_t value) {
     return {
         abs(value.v[0]),
         abs(value.v[1]),
     };
 }
 
-inline TSimdFloat32x2 tfSimdFloat2Load(float x, float y) { return { x, y }; }
+inline Tsimd_f32x2_t tfSimdFloat2Load(float x, float y) { return { x, y }; }
 
-inline TSimdInt32x2 tfSimd2iLoadImmediate(int32_t x, int32_t y) { return { x, y }; }
+inline Tsimd_i32x2_t tfS32x2ILoadImmediate(int32_t x, int32_t y) { return { x, y }; }
 
-inline TSimdFloat32x2 tfSimd2fSplatIndex0(TSimdFloat32x2 value) { return { value.v[0], value.v[0] }; }
-inline TSimdFloat32x2 tfSimd2fSplatIndex1(TSimdFloat32x2 value) { return { value.v[1], value.v[1] }; }
+inline Tsimd_f32x2_t tfS32x2FSplatIndex0(Tsimd_f32x2_t value) { return { value.v[0], value.v[0] }; }
+inline Tsimd_f32x2_t tfS32x2FSplatIndex1(Tsimd_f32x2_t value) { return { value.v[1], value.v[1] }; }
 
-static inline TSimdFloat32x4 tfSimdFloat2To4Splat0(TSimdFloat32x2 value) { return { value.v[0], value.v[0], value.v[0], value.v[0] }; }
-static inline TSimdFloat32x4 tfSimdFloat2To4Splat1(TSimdFloat32x2 value) { return { value.v[1], value.v[1], value.v[1], value.v[1] }; }
+static inline Tsimd_f32x4_t tfSimdFloat2To4Splat0(Tsimd_f32x2_t value) { return { value.v[0], value.v[0], value.v[0], value.v[0] }; }
+static inline Tsimd_f32x4_t tfSimdFloat2To4Splat1(Tsimd_f32x2_t value) { return { value.v[1], value.v[1], value.v[1], value.v[1] }; }
 
-inline TSimdInt32x2 tfSimd2iSplat(int32_t value) { return { value, value }; }
-inline TSimdFloat32x2 tfSimd2fSplat(float value) { return { value, value }; }
+inline Tsimd_i32x2_t tfS32x2ISplat(int32_t value) { return { value, value }; }
+inline Tsimd_f32x2_t tfS32x2FSplat(float value) { return { value, value }; }
 
-inline TSimdInt32x2 tfSimd2iCmpEq(TSimdInt32x2 arg1, TSimdInt32x2 arg2) {
+inline Tsimd_i32x2_t tfS32x2ICmpEq(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2) {
     return { { (arg1.v[0] == arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[1] == arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000 } };
 }
-inline TSimdInt32x2 tfSimd2iCmpNeq(TSimdInt32x2 arg1, TSimdInt32x2 arg2) {
+inline Tsimd_i32x2_t tfS32x2ICmpNeq(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2) {
     return { { (arg1.v[0] != arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[1] != arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000 } };
 }
-inline TSimdInt32x2 tfSimd2iCmpGt(TSimdInt32x2 arg1, TSimdInt32x2 arg2) {
+inline Tsimd_i32x2_t tfS32x2ICmpGt(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2) {
     return { { (arg1.v[0] > arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[1] > arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000 } };
 }
-inline TSimdInt32x2 tfSimd2iCmpGtEq(TSimdInt32x2 arg1, TSimdInt32x2 arg2) {
+inline Tsimd_i32x2_t tfS32x2ICmpGtEq(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2) {
     return { { (arg1.v[0] >= arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[1] >= arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000 } };
 }
-inline TSimdInt32x2 tfSimd2iCmpLt(TSimdInt32x2 arg1, TSimdInt32x2 arg2) {
+inline Tsimd_i32x2_t tfS32x2ICmpLt(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2) {
     return { { (arg1.v[0] < arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[1] < arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000 } };
 }
-inline TSimdInt32x2 tfSimd2iCmpLtEq(TSimdInt32x2 arg1, TSimdInt32x2 arg2) {
+inline Tsimd_i32x2_t tfS32x2ICmpLtEq(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2) {
     return { { (arg1.v[0] < arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[1] < arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000 } };
 }
 
-inline bool tfSimd2fCmpAllEq(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
+inline bool tfS32x2FCmpAllEq(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
     for (int i = 0; i < 2; i++) {
         if (arg1.v[i] != arg2.v[i]) {
             return false;
@@ -144,7 +144,7 @@ inline bool tfSimd2fCmpAllEq(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
     return true;
 }
 
-inline bool tfSimd2iCmpAllEq(TSimdInt32x2 arg1, TSimdInt32x2 arg2) {
+inline bool tfS32x2ICmpAllEq(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2) {
     for (int i = 0; i < 2; i++) {
         if (arg1.v[i] != arg2.v[i]) {
             return false;
@@ -153,7 +153,7 @@ inline bool tfSimd2iCmpAllEq(TSimdInt32x2 arg1, TSimdInt32x2 arg2) {
     return true;
 }
 
-static inline bool tfSimdFloat32x2CmpAllLt(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2) {
+static inline bool tfS32x2FCmpAllLt(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2) {
     for (int i = 0; i < 2; i++) {
         if (arg1.v[i] >= arg2.v[i]) {
             return false;
diff --git a/Forge/Math/Internal/TF_Simd32x2_sse.inl b/Forge/Math/Internal/TF_Simd32x2_sse.inl
index c2d7715297..9cd5011245 100644
--- a/Forge/Math/Internal/TF_Simd32x2_sse.inl
+++ b/Forge/Math/Internal/TF_Simd32x2_sse.inl
@@ -4,105 +4,105 @@
 #include "../TF_Simd32x2.h"
 #endif
 
-inline TSimdInt32x2   tfSimd2iSelect(TSimdInt32x2 arg0, TSimdInt32x2 arg1, TSimdInt32x2 mask) { return _mm_blendv_epi8(arg0, arg1, mask); }
-inline TSimdFloat32x2 tfSimd2fSelect(TSimdFloat32x2 arg0, TSimdFloat32x2 arg1, TSimdFloat32x2 mask) {
+inline Tsimd_i32x2_t   tfS32x2ISelect(Tsimd_i32x2_t arg0, Tsimd_i32x2_t arg1, Tsimd_i32x2_t mask) { return _mm_blendv_epi8(arg0, arg1, mask); }
+inline Tsimd_f32x2_t tfS32x2FSelect(Tsimd_f32x2_t arg0, Tsimd_f32x2_t arg1, Tsimd_f32x2_t mask) {
     return _mm_blendv_ps(arg0, arg1, mask);
 }
 
-inline TSimdFloat32x2 tfSimd2fZero() { return _mm_setzero_ps(); }
-inline TSimdInt32x2   tfSimd2iZero() { return _mm_setzero_si128(); }
+inline Tsimd_f32x2_t tfS32x2FZero() { return _mm_setzero_ps(); }
+inline Tsimd_i32x2_t   tfS32x2IZero() { return _mm_setzero_si128(); }
 
-inline TSimdInt32x2 tfSimd2iNot(TSimdInt32x2 value) {
-    const TSimdInt32x2 invert = tfSimd2iSplat(TF_SIMDI_MAX);
+inline Tsimd_i32x2_t tfS32x2INot(Tsimd_i32x2_t value) {
+    const Tsimd_i32x2_t invert = tfS32x2ISplat(TF_SIMDI_MAX);
     return _mm_andnot_si128(value, invert);
 }
-inline TSimdInt32x2 tfSimd2iAnd(TSimdInt32x2 arg1, TSimdInt32x2 arg2) { return _mm_and_si128(arg1, arg2); }
-inline TSimdInt32x2 tfSimd2iAndNot(TSimdInt32x2 arg1, TSimdInt32x2 arg2) { return _mm_andnot_si128(arg1, arg2); }
-inline TSimdInt32x2 tfSimd2iOr(TSimdInt32x2 arg1, TSimdInt32x2 arg2) { return _mm_or_si128(arg1, arg2); }
-inline TSimdInt32x2 tfSimd2iXor(TSimdInt32x2 arg1, TSimdInt32x2 arg2) { return _mm_xor_si128(arg1, arg2); }
+inline Tsimd_i32x2_t tfS32x2IAnd(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2) { return _mm_and_si128(arg1, arg2); }
+inline Tsimd_i32x2_t tfS32x2IAndNot(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2) { return _mm_andnot_si128(arg1, arg2); }
+inline Tsimd_i32x2_t tfS32x2IOr(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2) { return _mm_or_si128(arg1, arg2); }
+inline Tsimd_i32x2_t tfS32x2IXor(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2) { return _mm_xor_si128(arg1, arg2); }
 
-inline TSimdFloat32x2 tfSimd2fNot(TSimdFloat32x2 value) {
-    const TSimdFloat32x2 invert = tfSimd2fSplat((float)(0xFFFFFFFF));
+inline Tsimd_f32x2_t tfS32x2FNot(Tsimd_f32x2_t value) {
+    const Tsimd_f32x2_t invert = tfS32x2FSplat((float)(0xFFFFFFFF));
     return _mm_andnot_ps(value, invert);
 }
-inline TSimdFloat32x2 tfSimd2fAnd(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2) { return _mm_and_ps(arg1, arg2); }
-inline TSimdFloat32x2 tfSimd2fAndNot(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2) { return _mm_andnot_ps(arg1, arg2); }
-inline TSimdFloat32x2 tfSimd2fOr(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2) { return _mm_or_ps(arg1, arg2); }
-inline TSimdFloat32x2 tfSimd2fXor(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2) { return _mm_xor_ps(arg1, arg2); }
-
-inline TSimdFloat32x2 tfSimd2fFloor(TSimdFloat32x2 value) { return _mm_floor_ps(value); }
-inline TSimdFloat32x2 tfSimd2fCeil(TSimdFloat32x2 value) { return _mm_ceil_ps(value); }
-inline TSimdFloat32x2 tfSimd2fRound(TSimdFloat32x2 value) { return _mm_round_ps(value, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); }
-inline TSimdFloat32x2 tfSimd2fTruncate(TSimdFloat32x2 value) { return tfSimd2iToSimd2f(tfSimd2fToSimd2i(value)); }
-inline TSimdFloat32x2 tfSimd2fMin(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2) { return _mm_min_ps(arg1, arg2); }
-inline TSimdFloat32x2 tfSimd2fMax(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2) { return _mm_max_ps(arg1, arg2); }
-inline TSimdFloat32x2 tfSimd2fClamp(TSimdFloat32x2 value, TSimdFloat32x2 min, TSimdFloat32x2 max) {
-    return tfSimd2fMax(min, tfSimd2fMin(value, max));
+inline Tsimd_f32x2_t tfS32x2FAnd(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2) { return _mm_and_ps(arg1, arg2); }
+inline Tsimd_f32x2_t tfS32x2FAndNot(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2) { return _mm_andnot_ps(arg1, arg2); }
+inline Tsimd_f32x2_t tfS32x2FOr(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2) { return _mm_or_ps(arg1, arg2); }
+inline Tsimd_f32x2_t tfS32x2FXor(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2) { return _mm_xor_ps(arg1, arg2); }
+
+inline Tsimd_f32x2_t tfS32x2FFloor(Tsimd_f32x2_t value) { return _mm_floor_ps(value); }
+inline Tsimd_f32x2_t tfS32x2FCeil(Tsimd_f32x2_t value) { return _mm_ceil_ps(value); }
+inline Tsimd_f32x2_t tfS32x2FRound(Tsimd_f32x2_t value) { return _mm_round_ps(value, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); }
+inline Tsimd_f32x2_t tfS32x2FTruncate(Tsimd_f32x2_t value) { return tfS32x2IToSimd2f(tfS32x2FToSimd2i(value)); }
+inline Tsimd_f32x2_t tfS32x2FMin(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2) { return _mm_min_ps(arg1, arg2); }
+inline Tsimd_f32x2_t tfS32x2FMax(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2) { return _mm_max_ps(arg1, arg2); }
+inline Tsimd_f32x2_t tfS32x2FClamp(Tsimd_f32x2_t value, Tsimd_f32x2_t min, Tsimd_f32x2_t max) {
+    return tfS32x2FMax(min, tfS32x2FMin(value, max));
 }
 
-inline TSimdInt32x2 tfSimd2fToSimd2i(TSimdFloat32x2 value) { return _mm_castps_si128(value); }
+inline Tsimd_i32x2_t tfS32x2FToSimd2i(Tsimd_f32x2_t value) { return _mm_castps_si128(value); }
 
-inline TSimdFloat32x2 tfSimd2iToSimd2f(TSimdInt32x2 value) { return _mm_castsi128_ps(value); }
+inline Tsimd_f32x2_t tfS32x2IToSimd2f(Tsimd_i32x2_t value) { return _mm_castsi128_ps(value); }
 
-inline float tfSimd2fSelectIndex0(TSimdFloat32x2 value) { return _mm_cvtss_f32(value); }
+inline float tfS32x2FSelectIndex0(Tsimd_f32x2_t value) { return _mm_cvtss_f32(value); }
 
-inline float tfSimd2fSelectIndex1(TSimdFloat32x2 value) { return tfSimd2fSelectIndex0(tfSimd2fSplatIndex1(value)); }
+inline float tfS32x2FSelectIndex1(Tsimd_f32x2_t value) { return tfS32x2FSelectIndex0(tfS32x2FSplatIndex1(value)); }
 
-inline TSimdFloat32x2 tfSimd2fAdd(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2) { return _mm_add_ps(arg1, arg2); }
-inline TSimdFloat32x2 tfSimd2fSub(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2) { return _mm_sub_ps(arg1, arg2); }
-inline TSimdFloat32x2 tfSimd2fMul(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2) { return _mm_mul_ps(arg1, arg2); }
-inline TSimdFloat32x2 tfSimd2fMadd(TSimdFloat32x2 mul1, TSimdFloat32x2 mul2, TSimdFloat32x2 add) {
+inline Tsimd_f32x2_t tfS32x2FAdd(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2) { return _mm_add_ps(arg1, arg2); }
+inline Tsimd_f32x2_t tfS32x2FSub(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2) { return _mm_sub_ps(arg1, arg2); }
+inline Tsimd_f32x2_t tfS32x2FMul(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2) { return _mm_mul_ps(arg1, arg2); }
+inline Tsimd_f32x2_t tfS32x2FMadd(Tsimd_f32x2_t mul1, Tsimd_f32x2_t mul2, Tsimd_f32x2_t add) {
 #if 0
     return _mm_fmadd_ps(mul1, mul2, add); // Requires FMA CPUID
 #else
-    return tfSimd2fAdd(tfSimd2fMul(mul1, mul2), add);
+    return tfS32x2FAdd(tfS32x2FMul(mul1, mul2), add);
 #endif
 }
-inline TSimdFloat32x2 tfSimd2fDiv(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2) { return _mm_div_ps(arg1, arg2); }
+inline Tsimd_f32x2_t tfS32x2FDiv(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2) { return _mm_div_ps(arg1, arg2); }
 
-inline TSimdFloat32x2 tfSimd2fAbs(TSimdFloat32x2 value) {
-    const TSimdFloat32x4 signMask = tfSimd2iToSimd2f(tfSimd2iSplat(0x7FFFFFFF));
+inline Tsimd_f32x2_t tfS32x2FAbs(Tsimd_f32x2_t value) {
+    const Tsimd_f32x4_t signMask = tfS32x2IToSimd2f(tfS32x2ISplat(0x7FFFFFFF));
     return _mm_and_ps(value, signMask);
 }
 
-inline TSimdFloat32x2 tfSimdFloat2Load(float x, float y) { return _mm_set_ps(0.0f, 0.0f, y, x); }
+inline Tsimd_f32x2_t tfSimdFloat2Load(float x, float y) { return _mm_set_ps(0.0f, 0.0f, y, x); }
 
-inline TSimdInt32x2 tfSimd2iLoadImmediate(int32_t x, int32_t y) { return _mm_set_epi32(0.0f, 0.0f, x, y); }
+inline Tsimd_i32x2_t tfS32x2ILoadImmediate(int32_t x, int32_t y) { return _mm_set_epi32(0.0f, 0.0f, x, y); }
 
-inline TSimdFloat32x2 tfSimd2fSplatIndex0(TSimdFloat32x2 value) { return _mm_shuffle_ps(value, value, _MM_SHUFFLE(0, 0, 0, 0)); }
-inline TSimdFloat32x2 tfSimd2fSplatIndex1(TSimdFloat32x2 value) { return _mm_shuffle_ps(value, value, _MM_SHUFFLE(1, 1, 1, 1)); }
+inline Tsimd_f32x2_t tfS32x2FSplatIndex0(Tsimd_f32x2_t value) { return _mm_shuffle_ps(value, value, _MM_SHUFFLE(0, 0, 0, 0)); }
+inline Tsimd_f32x2_t tfS32x2FSplatIndex1(Tsimd_f32x2_t value) { return _mm_shuffle_ps(value, value, _MM_SHUFFLE(1, 1, 1, 1)); }
 
-static inline TSimdFloat32x4 tfSimdFloat2To4Splat0(TSimdFloat32x2 value) { return _mm_shuffle_ps(value, value, _MM_SHUFFLE(0, 0, 0, 0)); }
-static inline TSimdFloat32x4 tfSimdFloat2To4Splat1(TSimdFloat32x2 value) { return _mm_shuffle_ps(value, value, _MM_SHUFFLE(1, 1, 1, 1)); }
+static inline Tsimd_f32x4_t tfSimdFloat2To4Splat0(Tsimd_f32x2_t value) { return _mm_shuffle_ps(value, value, _MM_SHUFFLE(0, 0, 0, 0)); }
+static inline Tsimd_f32x4_t tfSimdFloat2To4Splat1(Tsimd_f32x2_t value) { return _mm_shuffle_ps(value, value, _MM_SHUFFLE(1, 1, 1, 1)); }
 
-inline TSimdInt32x2 tfSimd2iSplat(int32_t value) { return _mm_set1_epi32(value); }
-inline TSimdFloat32x2 tfSimd2fSplat(float value) { return _mm_set1_ps(value); }
+inline Tsimd_i32x2_t tfS32x2ISplat(int32_t value) { return _mm_set1_epi32(value); }
+inline Tsimd_f32x2_t tfS32x2FSplat(float value) { return _mm_set1_ps(value); }
 
-inline TSimdInt32x2 tfSimd2iCmpEq(TSimdInt32x2 arg1, TSimdInt32x2 arg2) { return _mm_cmpeq_epi32(arg1, arg2); }
-inline TSimdInt32x2 tfSimd2iCmpNeq(TSimdInt32x2 arg1, TSimdInt32x2 arg2) {
+inline Tsimd_i32x2_t tfS32x2ICmpEq(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2) { return _mm_cmpeq_epi32(arg1, arg2); }
+inline Tsimd_i32x2_t tfS32x2ICmpNeq(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2) {
     return _mm_xor_si128(_mm_cmpeq_epi32(arg1, arg2), _mm_set1_epi32((int32_t)0xFFFFFFFF));
 }
-inline TSimdInt32x2 tfSimd2iCmpGt(TSimdInt32x2 arg1, TSimdInt32x2 arg2) { return _mm_cmpgt_epi32(arg1, arg2); }
-inline TSimdInt32x2 tfSimd2iCmpGtEq(TSimdInt32x2 arg1, TSimdInt32x2 arg2) {
+inline Tsimd_i32x2_t tfS32x2ICmpGt(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2) { return _mm_cmpgt_epi32(arg1, arg2); }
+inline Tsimd_i32x2_t tfS32x2ICmpGtEq(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2) {
     return _mm_or_si128(_mm_cmpgt_epi32(arg1, arg2), _mm_cmpeq_epi32(arg1, arg2));
 }
-inline TSimdInt32x2 tfSimd2iCmpLt(TSimdInt32x2 arg1, TSimdInt32x2 arg2) { return _mm_cmplt_epi32(arg1, arg2); }
-inline TSimdInt32x2 tfSimd2iCmpLtEq(TSimdInt32x2 arg1, TSimdInt32x2 arg2) {
+inline Tsimd_i32x2_t tfS32x2ICmpLt(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2) { return _mm_cmplt_epi32(arg1, arg2); }
+inline Tsimd_i32x2_t tfS32x2ICmpLtEq(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2) {
     return _mm_or_si128(_mm_cmplt_epi32(arg1, arg2), _mm_cmpeq_epi32(arg1, arg2));
 }
 
-inline bool tfSimd2fCmpAllEq(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
-    TSimdFloat32x4 compare = tfSimd2fCmpEq(arg1, arg2);
+inline bool tfS32x2FCmpAllEq(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+    Tsimd_f32x4_t compare = tfS32x2FCmpEq(arg1, arg2);
     return (_mm_movemask_ps(compare) & 0b0011) == 0b0011;
 }
 
-inline bool tfSimd2iCmpAllEq(TSimdInt32x2 arg1, TSimdInt32x2 arg2) {
-    const TSimdInt32x2 compare = tfSimd2iCmpEq(arg1, arg2);
+inline bool tfS32x2ICmpAllEq(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2) {
+    const Tsimd_i32x2_t compare = tfS32x2ICmpEq(arg1, arg2);
     return (_mm_movemask_epi8(compare) & 0b0011) == 0b0011;
 }
 
-static inline bool tfSimdFloat32x2CmpAllLt(TSimdFloat32x2 a, TSimdFloat32x2 b) {
-    TSimdFloat32x3 compare = tfSimd2fCmpLt(a, b);
+static inline bool tfS32x2FCmpAllLt(Tsimd_f32x2_t a, Tsimd_f32x2_t b) {
+    Tsimd_f32x3_t compare = tfS32x2FCmpLt(a, b);
     return (_mm_movemask_ps(compare) & 0b0011) == 0b0011;
 }
 
diff --git a/Forge/Math/Internal/TF_Simd32x3_neon.inl b/Forge/Math/Internal/TF_Simd32x3_neon.inl
index 37f3f941ca..0dd34ca426 100644
--- a/Forge/Math/Internal/TF_Simd32x3_neon.inl
+++ b/Forge/Math/Internal/TF_Simd32x3_neon.inl
@@ -4,93 +4,93 @@
 #include "../TF_Simd32x3.h"
 #endif
 
-inline TSimdInt32x3   tfSimd3iSelect(TSimdInt32x3 arg0, TSimdInt32x3 arg1, TSimdInt32x3 mask) { return vbslq_s32(mask, arg0, arg1); }
-inline TSimdFloat32x3 tfSimd3fSelect(TSimdFloat32x3 arg0, TSimdFloat32x3 arg1, TSimdFloat32x3 mask) { return vbslq_f32(mask, arg1, arg1); }
+inline Tsimd_i32x3_t   tfS32x3iSelect(Tsimd_i32x3_t arg0, Tsimd_i32x3_t arg1, Tsimd_i32x3_t mask) { return vbslq_s32(mask, arg0, arg1); }
+inline Tsimd_f32x3_t tfS32x3FSelect(Tsimd_f32x3_t arg0, Tsimd_f32x3_t arg1, Tsimd_f32x3_t mask) { return vbslq_f32(mask, arg1, arg1); }
 
-inline TSimdFloat32x3 tfSimd3fZero() { return vmovq_n_f32(0.0f); }
-inline TSimdInt32x3   tfSimd3iZero() { return vmovq_n_s32(0); }
+inline Tsimd_f32x3_t tfS32x3FZero() { return vmovq_n_f32(0.0f); }
+inline Tsimd_i32x3_t   tfS32x3iZero() { return vmovq_n_s32(0); }
 
-inline TSimdInt32x3 tfSimd3iNot(TSimdInt32x3 value) { return vmvnq_s32(value); }
-inline TSimdInt32x3 tfSimd3iAnd(TSimdInt32x3 arg1, TSimdInt32x3 arg2) { return vandq_s32(arg1, arg2); }
-inline TSimdInt32x3 tfSimd3iAndNot(TSimdInt32x3 arg1, TSimdInt32x3 arg2) { return vandq_s32(vmvnq_s32(arg1), arg2); }
-inline TSimdInt32x3 tfSimd3iOr(TSimdInt32x3 arg1, TSimdInt32x3 arg2) { return vorrq_s32(arg1, arg2); }
-inline TSimdInt32x3 tfSimd3iXor(TSimdInt32x3 arg1, TSimdInt32x3 arg2) { return veorq_s32(arg1, arg2); }
+inline Tsimd_i32x3_t tfS32x3iNot(Tsimd_i32x3_t value) { return vmvnq_s32(value); }
+inline Tsimd_i32x3_t tfS32x3iAnd(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2) { return vandq_s32(arg1, arg2); }
+inline Tsimd_i32x3_t tfS32x3iAndNot(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2) { return vandq_s32(vmvnq_s32(arg1), arg2); }
+inline Tsimd_i32x3_t tfS32x3iOr(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2) { return vorrq_s32(arg1, arg2); }
+inline Tsimd_i32x3_t tfS32x3iXor(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2) { return veorq_s32(arg1, arg2); }
 
-inline TSimdFloat32x3 tfSimd3fNot(TSimdFloat32x3 value) { return vreinterpretq_f32_s32(vmvnq_s32(vreinterpretq_s32_f32(value))); }
-inline TSimdFloat32x3 tfSimd3fAnd(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) {
+inline Tsimd_f32x3_t tfS32x3FNot(Tsimd_f32x3_t value) { return vreinterpretq_f32_s32(vmvnq_s32(vreinterpretq_s32_f32(value))); }
+inline Tsimd_f32x3_t tfS32x3FAnd(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2) {
     return vreinterpretq_f32_s32(vandq_s32(vreinterpretq_s32_f32(arg1), vreinterpretq_s32_f32(arg2)));
 }
-inline TSimdFloat32x3 tfSimd3fAndNot(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) {
+inline Tsimd_f32x3_t tfS32x3FAndNot(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2) {
     return vreinterpretq_f32_s32(vandq_s32(vmvnq_s32(vreinterpretq_s32_f32(arg1)), vreinterpretq_s32_f32(arg2)));
 }
-inline TSimdFloat32x3 tfSimd3fOr(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) {
+inline Tsimd_f32x3_t tfS32x3FOr(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2) {
     return vreinterpretq_f32_s32(vorrq_s32(vreinterpretq_s32_f32(arg1), vreinterpretq_s32_f32(arg2)));
 }
-inline TSimdFloat32x3 tfSimd3fXor(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) {
+inline Tsimd_f32x3_t tfS32x3FXor(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2) {
     return vreinterpretq_f32_s32(veorq_s32(vreinterpretq_s32_f32(arg1), vreinterpretq_s32_f32(arg2)));
 }
 
-inline TSimdFloat32x3 tfSimd3fFloor(TSimdFloat32x3 value) { return vrndmq_f32(value); }
-inline TSimdFloat32x3 tfSimd3fCeil(TSimdFloat32x3 value) { return vrndpq_f32(value); }
-inline TSimdFloat32x3 tfSimd3fRound(TSimdFloat32x3 value) { return vrndnq_f32(value); }
-inline TSimdFloat32x3 tfSimd3fTruncate(TSimdFloat32x3 value) { return tfSimd3iToSimd3f(tfSimd3fToSimd3i(value)); }
-inline TSimdFloat32x3 tfSimd3fMin(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) { return vminq_f32(arg1, arg2); }
-inline TSimdFloat32x3 tfSimd3fMax(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) { return vmaxq_f32(arg1, arg2); }
-inline TSimdFloat32x3 tfSimd3fClamp(TSimdFloat32x3 value, TSimdFloat32x3 min, TSimdFloat32x3 max) {
-    return tfSimd3fMax(min, tfSimd3fMin(value, max));
+inline Tsimd_f32x3_t tfS32x3FFloor(Tsimd_f32x3_t value) { return vrndmq_f32(value); }
+inline Tsimd_f32x3_t tfS32x3FCeil(Tsimd_f32x3_t value) { return vrndpq_f32(value); }
+inline Tsimd_f32x3_t tfS32x3FRound(Tsimd_f32x3_t value) { return vrndnq_f32(value); }
+inline Tsimd_f32x3_t tfS32x3FTruncate(Tsimd_f32x3_t value) { return tfS32x3iToSimd3f(tfS32x3FToSimd3i(value)); }
+inline Tsimd_f32x3_t tfS32x3FMin(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2) { return vminq_f32(arg1, arg2); }
+inline Tsimd_f32x3_t tfS32x3FMax(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2) { return vmaxq_f32(arg1, arg2); }
+inline Tsimd_f32x3_t tfS32x3FClamp(Tsimd_f32x3_t value, Tsimd_f32x3_t min, Tsimd_f32x3_t max) {
+    return tfS32x3FMax(min, tfS32x3FMin(value, max));
 }
 
-inline TSimdInt32x3 tfSimd3fToSimd3i(TSimdFloat32x3 value) { return vreinterpretq_f32_s32(value); }
+inline Tsimd_i32x3_t tfS32x3FToSimd3i(Tsimd_f32x3_t value) { return vreinterpretq_f32_s32(value); }
 
-inline TSimdFloat32x3 tfSimd3iToSimd3f(TSimdInt32x3 value) { return vreinterpretq_s32_f32(value); }
+inline Tsimd_f32x3_t tfS32x3iToSimd3f(Tsimd_i32x3_t value) { return vreinterpretq_s32_f32(value); }
 
-inline float tfSimd3fSelectIndex0(TSimdFloat32x3 value) { return vgetq_lane_f32(value, 0); }
+inline float tfS32x3FSelectIndex0(Tsimd_f32x3_t value) { return vgetq_lane_f32(value, 0); }
 
-inline float tfSimd3fSelectIndex1(TSimdFloat32x3 value) { return vgetq_lane_f32(value, 1); }
+inline float tfS32x3FSelectIndex1(Tsimd_f32x3_t value) { return vgetq_lane_f32(value, 1); }
 
-inline float tfSimd3fSelectIndex2(TSimdFloat32x3 value) { return vgetq_lane_f32(value, 2); }
+inline float tfS32x3FSelectIndex2(Tsimd_f32x3_t value) { return vgetq_lane_f32(value, 2); }
 
-inline TSimdFloat32x3 tfSimd3fAdd(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) { return vaddq_f32(arg1, arg2); }
-inline TSimdFloat32x3 tfSimd3fSub(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) { return vsubq_f32(arg1, arg2); }
-inline TSimdFloat32x3 tfSimd3fMul(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) { return vmulq_f32(arg1, arg2); }
-inline TSimdFloat32x3 tfSimd3fMadd(TSimdFloat32x3 mul1, TSimdFloat32x3 mul2, TSimdFloat32x3 add) { return vmlaq_f32(add, mul1, mul2); }
-inline TSimdFloat32x3 tfSimd3fDiv(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) { return vdivq_f32(arg1, arg2); }
+inline Tsimd_f32x3_t tfS32x3FAdd(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2) { return vaddq_f32(arg1, arg2); }
+inline Tsimd_f32x3_t tfS32x3FSub(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2) { return vsubq_f32(arg1, arg2); }
+inline Tsimd_f32x3_t tfS32x3FMul(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2) { return vmulq_f32(arg1, arg2); }
+inline Tsimd_f32x3_t tfS32x3FMadd(Tsimd_f32x3_t mul1, Tsimd_f32x3_t mul2, Tsimd_f32x3_t add) { return vmlaq_f32(add, mul1, mul2); }
+inline Tsimd_f32x3_t tfS32x3FDiv(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2) { return vdivq_f32(arg1, arg2); }
 
-inline TSimdFloat32x3 tfSimd3fAbs(TSimdFloat32x3 value) { return vabsq_f32(value); }
+inline Tsimd_f32x3_t tfS32x3FAbs(Tsimd_f32x3_t value) { return vabsq_f32(value); }
 
-inline TSimdFloat32x3 tfSimdFloat3Load(float x, float y, float z) {
+inline Tsimd_f32x3_t tfSimdFloat3Load(float x, float y, float z) {
     const float values[4] = { x, y, z, 0.0f };
     return vld1q_f32(values);
 }
 
-inline TSimdInt32x3 tfSimdInt3Load(int32_t x, int32_t y, int32_t z) {
+inline Tsimd_i32x3_t tfSimdInt3Load(int32_t x, int32_t y, int32_t z) {
     const int32_t values[4] = { x, y, z, 0 };
     return vld1q_s32(values);
 }
 
-inline TSimdFloat32x2 tfSimd3fToSimd2f(TSimdFloat32x3 value) { return vget_low_f32(value); }
+inline Tsimd_f32x2_t tfS32x3FToSimd2f(Tsimd_f32x3_t value) { return vget_low_f32(value); }
 
-static inline TSimdFloat32x4 tfSimdFloat3To4Splat0(TSimdFloat32x3 value) { return vdupq_laneq_f32(value, 0); }
-static inline TSimdFloat32x4 tfSimdFloat3To4Splat1(TSimdFloat32x3 value) { return vdupq_laneq_f32(value, 1); }
-static inline TSimdFloat32x4 tfSimdFloat3To4Splat2(TSimdFloat32x3 value) { return vdupq_laneq_f32(value, 2); }
+static inline Tsimd_f32x4_t tfS32x3FTo32x4FSplat0(Tsimd_f32x3_t value) { return vdupq_laneq_f32(value, 0); }
+static inline Tsimd_f32x4_t tfS32x3FTo32x4FSplat1(Tsimd_f32x3_t value) { return vdupq_laneq_f32(value, 1); }
+static inline Tsimd_f32x4_t tfS32x3FTo32x4FSplat2(Tsimd_f32x3_t value) { return vdupq_laneq_f32(value, 2); }
 
-inline TSimdFloat32x3 tfSimd3fSplatIndex0(TSimdFloat32x3 value) { return vdupq_laneq_f32(value, 0); }
+inline Tsimd_f32x3_t tfS32x3FSplatIndex0(Tsimd_f32x3_t value) { return vdupq_laneq_f32(value, 0); }
 
-inline TSimdFloat32x3 tfSimd3fSplatIndex1(TSimdFloat32x3 value) { return vdupq_laneq_f32(value, 1); }
+inline Tsimd_f32x3_t tfS32x3FSplatIndex1(Tsimd_f32x3_t value) { return vdupq_laneq_f32(value, 1); }
 
-inline TSimdFloat32x3 tfSimd3fSplatIndex2(TSimdFloat32x3 value) { return vdupq_laneq_f32(value, 2); }
+inline Tsimd_f32x3_t tfS32x3FSplatIndex2(Tsimd_f32x3_t value) { return vdupq_laneq_f32(value, 2); }
 
-inline TSimdInt32x3 tfSimd3iSplat(int32_t value) { return vdupq_n_s32(value); }
+inline Tsimd_i32x3_t tfS32x3iSplat(int32_t value) { return vdupq_n_s32(value); }
 
-inline TSimdFloat32x3 tfSimd3fSplat(float value) { return vdupq_n_f32(value); }
+inline Tsimd_f32x3_t tfS32x3FSplat(float value) { return vdupq_n_f32(value); }
 
-inline TSimdInt32x3 tfSimd3iCmpEq(TSimdInt32x3 arg1, TSimdInt32x3 arg2) { return vceqq_s32(arg1, arg2); }
-inline TSimdInt32x3 tfSimd3iCmpNeq(TSimdInt32x3 arg1, TSimdInt32x3 arg2) { return vmvnq_s32(vceqq_s32(arg1, arg2)); }
-inline TSimdInt32x3 tfSimd3iCmpGt(TSimdInt32x3 arg1, TSimdInt32x3 arg2) { return vcgtq_s32(arg1, arg2); }
-inline TSimdInt32x3 tfSimd3iCmpGtEq(TSimdInt32x3 arg1, TSimdInt32x3 arg2) { return vcgeq_s32(arg1, arg2); }
-inline TSimdInt32x3 tfSimd3iCmpLt(TSimdInt32x3 arg1, TSimdInt32x3 arg2) { return vcltq_s32(arg1, arg2); }
-inline TSimdInt32x3 tfSimd3iCmpLtEq(TSimdInt32x3 arg1, TSimdInt32x3 arg2) { return vcleq_s32(arg1, arg2); }
-inline bool         tfSimd3fCmpAllEq(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) {
+inline Tsimd_i32x3_t tfS32x3iCmpEq(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2) { return vceqq_s32(arg1, arg2); }
+inline Tsimd_i32x3_t tfS32x3iCmpNeq(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2) { return vmvnq_s32(vceqq_s32(arg1, arg2)); }
+inline Tsimd_i32x3_t tfS32x3iCmpGt(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2) { return vcgtq_s32(arg1, arg2); }
+inline Tsimd_i32x3_t tfS32x3iCmpGtEq(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2) { return vcgeq_s32(arg1, arg2); }
+inline Tsimd_i32x3_t tfS32x3iCmpLt(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2) { return vcltq_s32(arg1, arg2); }
+inline Tsimd_i32x3_t tfS32x3iCmpLtEq(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2) { return vcleq_s32(arg1, arg2); }
+inline bool         tfS32x3FCmpAllEq(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2) {
    // for (int i = 0; i < 3; i++) {
    //     if (arg1.v[i] != arg2.v[i]) {
    //         return false;
@@ -99,7 +99,7 @@ inline bool         tfSimd3fCmpAllEq(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) {
     return true;
 }
 
-inline bool tfSimd3iCmpAllEq(TSimdInt32x3 arg1, TSimdInt32x3 arg2) {
+inline bool tfS32x3iCmpAllEq(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2) {
    // for (int i = 0; i < 3; i++) {
    //     if (arg1.v[i] != arg2.v[i]) {
    //         return false;
diff --git a/Forge/Math/Internal/TF_Simd32x3_scalar.inl b/Forge/Math/Internal/TF_Simd32x3_scalar.inl
index ffae5b895c..b0f6c85ed9 100644
--- a/Forge/Math/Internal/TF_Simd32x3_scalar.inl
+++ b/Forge/Math/Internal/TF_Simd32x3_scalar.inl
@@ -4,115 +4,115 @@
 #include "../TF_Simd32x3.h"
 #endif
 
-inline TSimdInt32x3 tfSimd3iSelect(TSimdInt32x3 arg0, TSimdInt32x3 arg1, TSimdInt32x3 mask) {
+inline Tsimd_i32x3_t tfS32x3iSelect(Tsimd_i32x3_t arg0, Tsimd_i32x3_t arg1, Tsimd_i32x3_t mask) {
     return { (mask.v[0] == 0) ? arg0.v[0] : arg1.v[0], (mask.v[1] == 0) ? arg0.v[1] : arg1.v[1], (mask.v[2] == 0) ? arg0.v[2] : arg1.v[2] };
 }
-inline TSimdFloat32x3 tfSimd3fSelect(TSimdFloat32x3 arg0, TSimdFloat32x3 arg1, TSimdFloat32x3 mask) {
-    TSimdInt32x3 intMask = tfSimd3fToSimd3i(mask);
+inline Tsimd_f32x3_t tfS32x3FSelect(Tsimd_f32x3_t arg0, Tsimd_f32x3_t arg1, Tsimd_f32x3_t mask) {
+    Tsimd_i32x3_t intMask = tfS32x3FToSimd3i(mask);
     return { (intMask.v[0] == 0) ? arg0.v[0] : arg1.v[0], (intMask.v[1] == 0) ? arg0.v[1] : arg1.v[1],
              (intMask.v[2] == 0) ? arg0.v[2] : arg1.v[2] };
 }
 
-inline TSimdFloat32x3 tfSimd3fZero() { return { 0, 0, 0 }; }
-inline TSimdInt32x3   tfSimd3iZero() { return { 0, 0, 0 }; }
+inline Tsimd_f32x3_t tfS32x3FZero() { return { 0, 0, 0 }; }
+inline Tsimd_i32x3_t   tfS32x3iZero() { return { 0, 0, 0 }; }
 
-inline TSimdInt32x3 tfSimd3iNot(TSimdInt32x3 value) { return { ~value.v[0], ~value.v[1], ~value.v[2] }; }
-inline TSimdInt32x3 tfSimd3iAnd(TSimdInt32x3 arg1, TSimdInt32x3 arg2) {
+inline Tsimd_i32x3_t tfS32x3iNot(Tsimd_i32x3_t value) { return { ~value.v[0], ~value.v[1], ~value.v[2] }; }
+inline Tsimd_i32x3_t tfS32x3iAnd(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2) {
     return { arg1.v[0] & arg2.v[0], arg1.v[1] & arg2.v[1], arg1.v[2] & arg2.v[2] };
 }
-inline TSimdInt32x3 tfSimd3iAndNot(TSimdInt32x3 arg1, TSimdInt32x3 arg2) {
+inline Tsimd_i32x3_t tfS32x3iAndNot(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2) {
     return { ~arg1.v[0] & arg2.v[0], ~arg1.v[1] & arg2.v[1], ~arg1.v[2] & arg2.v[2] };
 }
-inline TSimdInt32x3 tfSimd3iOr(TSimdInt32x3 arg1, TSimdInt32x3 arg2) {
+inline Tsimd_i32x3_t tfS32x3iOr(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2) {
     return { arg1.v[0] | arg2.v[0], arg1.v[1] | arg2.v[1], arg1.v[2] | arg2.v[2] };
 }
-inline TSimdInt32x3 tfSimd3iXor(TSimdInt32x3 arg1, TSimdInt32x3 arg2) {
+inline Tsimd_i32x3_t tfS32x3iXor(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2) {
     return { arg1.v[0] ^ arg2.v[0], arg1.v[1] ^ arg2.v[1], arg1.v[2] ^ arg2.v[2] };
 }
 
-inline TSimdFloat32x3 tfSimd3fNot(TSimdFloat32x3 value) {
-    TSimdInt32x3 result = { { ~((int32_t)value.v[0]), ~((int32_t)value.v[1]), ~((int32_t)value.v[2]) } };
-    return tfSimd3iToSimd3f(result);
+inline Tsimd_f32x3_t tfS32x3FNot(Tsimd_f32x3_t value) {
+    Tsimd_i32x3_t result = { { ~((int32_t)value.v[0]), ~((int32_t)value.v[1]), ~((int32_t)value.v[2]) } };
+    return tfS32x3iToSimd3f(result);
 }
-inline TSimdFloat32x3 tfSimd3fAnd(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) {
-    TSimdInt32x3 result = { ((int32_t)arg1.v[0]) & ((int32_t)arg2.v[0]), ((int32_t)arg1.v[1]) & ((int32_t)arg2.v[1]),
+inline Tsimd_f32x3_t tfS32x3FAnd(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2) {
+    Tsimd_i32x3_t result = { ((int32_t)arg1.v[0]) & ((int32_t)arg2.v[0]), ((int32_t)arg1.v[1]) & ((int32_t)arg2.v[1]),
                             ((int32_t)arg1.v[2]) & ((int32_t)arg2.v[2]) };
-    return tfSimd3iToSimd3f(result);
+    return tfS32x3iToSimd3f(result);
 }
-inline TSimdFloat32x3 tfSimd3fAndNot(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) {
-    TSimdInt32x3 result = { { ~((int32_t)arg1.v[0]) & ((int32_t)arg2.v[0]), ~((int32_t)arg1.v[1]) & ((int32_t)arg2.v[1]),
+inline Tsimd_f32x3_t tfS32x3FAndNot(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2) {
+    Tsimd_i32x3_t result = { { ~((int32_t)arg1.v[0]) & ((int32_t)arg2.v[0]), ~((int32_t)arg1.v[1]) & ((int32_t)arg2.v[1]),
                               ~((int32_t)arg1.v[2]) & ((int32_t)arg2.v[2]) } };
-    return tfSimd3iToSimd3f(result);
+    return tfS32x3iToSimd3f(result);
 }
-inline TSimdFloat32x3 tfSimd3fOr(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) {
-    TSimdInt32x3 result = { { ((int32_t)arg1.v[0]) | ((int32_t)arg2.v[0]), ((int32_t)arg1.v[1]) | ((int32_t)arg2.v[1]),
+inline Tsimd_f32x3_t tfS32x3FOr(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2) {
+    Tsimd_i32x3_t result = { { ((int32_t)arg1.v[0]) | ((int32_t)arg2.v[0]), ((int32_t)arg1.v[1]) | ((int32_t)arg2.v[1]),
                               ((int32_t)arg1.v[2]) | ((int32_t)arg2.v[2]) } };
-    return tfSimd3iToSimd3f(result);
+    return tfS32x3iToSimd3f(result);
 }
-inline TSimdFloat32x3 tfSimd3fXor(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) {
-    TSimdInt32x3 result = { { ((int32_t)arg1.v[0]) ^ ((int32_t)arg2.v[0]), ((int32_t)arg1.v[1]) ^ ((int32_t)arg2.v[1]),
+inline Tsimd_f32x3_t tfS32x3FXor(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2) {
+    Tsimd_i32x3_t result = { { ((int32_t)arg1.v[0]) ^ ((int32_t)arg2.v[0]), ((int32_t)arg1.v[1]) ^ ((int32_t)arg2.v[1]),
                               ((int32_t)arg1.v[2]) ^ ((int32_t)arg2.v[2]) } };
-    return tfSimd3iToSimd3f(result);
+    return tfS32x3iToSimd3f(result);
 }
 
-inline TSimdFloat32x3 tfSimd3fFloor(TSimdFloat32x3 value) { return { { floorf(value.v[0]), floorf(value.v[1]), floorf(value.v[2]) } }; }
-inline TSimdFloat32x3 tfSimd3fCeil(TSimdFloat32x3 value) { return { { ceilf(value.v[0]), ceilf(value.v[1]), ceilf(value.v[2]) } }; }
-inline TSimdFloat32x3 tfSimd3fRound(TSimdFloat32x3 value) {
+inline Tsimd_f32x3_t tfS32x3FFloor(Tsimd_f32x3_t value) { return { { floorf(value.v[0]), floorf(value.v[1]), floorf(value.v[2]) } }; }
+inline Tsimd_f32x3_t tfS32x3FCeil(Tsimd_f32x3_t value) { return { { ceilf(value.v[0]), ceilf(value.v[1]), ceilf(value.v[2]) } }; }
+inline Tsimd_f32x3_t tfS32x3FRound(Tsimd_f32x3_t value) {
     // While 'roundf' may seem the obvious choice here, it rounds halfway cases
     // away from zero regardless of the current rounding mode, but 'rintf' uses
     // the current rounding mode which is consistent with other implementations.
     return { { rintf(value.v[0]), rintf(value.v[1]), rintf(value.v[2]) } };
 }
-inline TSimdFloat32x3 tfSimd3fTruncate(TSimdFloat32x3 value) { return tfSimd3iToSimd3f(tfSimd3fToSimd3i(value)); }
-inline TSimdFloat32x3 tfSimd3fMin(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) {
+inline Tsimd_f32x3_t tfS32x3FTruncate(Tsimd_f32x3_t value) { return tfS32x3iToSimd3f(tfS32x3FToSimd3i(value)); }
+inline Tsimd_f32x3_t tfS32x3FMin(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2) {
     return { { fminf(arg1.v[0], arg2.v[0]), fminf(arg1.v[1], arg2.v[1]), fminf(arg1.v[2], arg2.v[2]) } };
 }
-inline TSimdFloat32x3 tfSimd3fMax(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) {
+inline Tsimd_f32x3_t tfS32x3FMax(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2) {
     return { { fmaxf(arg1.v[0], arg2.v[0]), fmaxf(arg1.v[1], arg2.v[1]), fmaxf(arg1.v[2], arg2.v[2]) } };
 }
-inline TSimdFloat32x3 tfSimd3fClamp(TSimdFloat32x3 value, TSimdFloat32x3 min, TSimdFloat32x3 max) {
-    return tfSimd3fMax(min, tfSimd3fMin(value, max));
+inline Tsimd_f32x3_t tfS32x3FClamp(Tsimd_f32x3_t value, Tsimd_f32x3_t min, Tsimd_f32x3_t max) {
+    return tfS32x3FMax(min, tfS32x3FMin(value, max));
 }
 
-inline TSimdInt32x3 tfSimd3fToSimd3i(TSimdFloat32x3 value) { return { (int32_t)value.v[0], (int32_t)value.v[1], (int32_t)value.v[2] }; }
+inline Tsimd_i32x3_t tfS32x3FToSimd3i(Tsimd_f32x3_t value) { return { (int32_t)value.v[0], (int32_t)value.v[1], (int32_t)value.v[2] }; }
 
-inline TSimdFloat32x3 tfSimd3iToSimd3f(TSimdInt32x3 value) { return { (float)value.v[0], (float)value.v[1], (float)value.v[2] }; }
+inline Tsimd_f32x3_t tfS32x3iToSimd3f(Tsimd_i32x3_t value) { return { (float)value.v[0], (float)value.v[1], (float)value.v[2] }; }
 
-inline float tfSimd3fSelectIndex0(TSimdFloat32x3 value) { return value.v[0]; }
+inline float tfS32x3FSelectIndex0(Tsimd_f32x3_t value) { return value.v[0]; }
 
-inline float tfSimd3fSelectIndex1(TSimdFloat32x3 value) { return value.v[1]; }
+inline float tfS32x3FSelectIndex1(Tsimd_f32x3_t value) { return value.v[1]; }
 
-inline float tfSimd3fSelectIndex2(TSimdFloat32x3 value) { return value.v[2]; }
+inline float tfS32x3FSelectIndex2(Tsimd_f32x3_t value) { return value.v[2]; }
 
-inline TSimdFloat32x3 tfSimd3fAdd(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) {
+inline Tsimd_f32x3_t tfS32x3FAdd(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2) {
     return {
         arg1.v[0] + arg2.v[0],
         arg1.v[1] + arg2.v[1],
         arg1.v[2] + arg2.v[2],
     };
 }
-inline TSimdFloat32x3 tfSimd3fSub(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) {
+inline Tsimd_f32x3_t tfS32x3FSub(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2) {
     return {
         arg1.v[0] - arg2.v[0],
         arg1.v[1] - arg2.v[1],
         arg1.v[2] - arg2.v[2],
     };
 }
-inline TSimdFloat32x3 tfSimd3fMul(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) {
+inline Tsimd_f32x3_t tfS32x3FMul(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2) {
     return {
         arg1.v[0] * arg2.v[0],
         arg1.v[1] * arg2.v[1],
         arg1.v[2] * arg2.v[2],
     };
 }
-inline TSimdFloat32x3 tfSimd3fMadd(TSimdFloat32x3 mul1, TSimdFloat32x3 mul2, TSimdFloat32x3 add) {
-    return tfSimd3fAdd(tfSimd3fMul(mul1, mul2), add);
+inline Tsimd_f32x3_t tfS32x3FMadd(Tsimd_f32x3_t mul1, Tsimd_f32x3_t mul2, Tsimd_f32x3_t add) {
+    return tfS32x3FAdd(tfS32x3FMul(mul1, mul2), add);
 }
-inline TSimdFloat32x3 tfSimd3fDiv(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) {
+inline Tsimd_f32x3_t tfS32x3FDiv(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2) {
     return { arg1.v[0] / arg2.v[0], arg1.v[1] / arg2.v[1], arg1.v[2] / arg2.v[2] };
 }
 
-inline TSimdFloat32x3 tfSimd3fAbs(TSimdFloat32x3 value) {
+inline Tsimd_f32x3_t tfS32x3FAbs(Tsimd_f32x3_t value) {
     return {
         abs(value.v[0]),
         abs(value.v[1]),
@@ -120,47 +120,47 @@ inline TSimdFloat32x3 tfSimd3fAbs(TSimdFloat32x3 value) {
     };
 }
 
-inline TSimdFloat32x3 tfSimdFloat3x32Load(float x, float y, float z) { return { x, y, z }; }
-inline TSimdInt32x3 tfSimdInt3x32Load(int32_t x, int32_t y, int32_t z) { return { x, y, z }; }
+inline Tsimd_f32x3_t tfSimd3x32FLoad(float x, float y, float z) { return { x, y, z }; }
+inline Tsimd_i32x3_t tfSimd3x32ILoad(int32_t x, int32_t y, int32_t z) { return { x, y, z }; }
 
-inline TSimdFloat32x2 tfSimd3fToSimd2f(TSimdFloat32x3 value) { return { value.v[0], value.v[1] }; }
+inline Tsimd_f32x2_t tfS32x3FToSimd2f(Tsimd_f32x3_t value) { return { value.v[0], value.v[1] }; }
 
-static inline TSimdFloat32x4 tfSimdFloat3To4Splat0(TSimdFloat32x3 value) { return { value.v[0], value.v[0], value.v[0], value.v[0] }; }
-static inline TSimdFloat32x4 tfSimdFloat3To4Splat1(TSimdFloat32x3 value) { return { value.v[1], value.v[1], value.v[1], value.v[1] }; }
-static inline TSimdFloat32x4 tfSimdFloat3To4Splat2(TSimdFloat32x3 value) { return { value.v[2], value.v[2], value.v[2], value.v[2] }; }
+static inline Tsimd_f32x4_t tfS32x3FTo32x4FSplat0(Tsimd_f32x3_t value) { return { value.v[0], value.v[0], value.v[0], value.v[0] }; }
+static inline Tsimd_f32x4_t tfS32x3FTo32x4FSplat1(Tsimd_f32x3_t value) { return { value.v[1], value.v[1], value.v[1], value.v[1] }; }
+static inline Tsimd_f32x4_t tfS32x3FTo32x4FSplat2(Tsimd_f32x3_t value) { return { value.v[2], value.v[2], value.v[2], value.v[2] }; }
 
-inline TSimdFloat32x3 tfSimd3fSplatIndex0(TSimdFloat32x3 value) { return { value.v[0], value.v[0], value.v[0] }; }
-inline TSimdFloat32x3 tfSimd3fSplatIndex1(TSimdFloat32x3 value) { return { value.v[1], value.v[1], value.v[1] }; }
-inline TSimdFloat32x3 tfSimd3fSplatIndex2(TSimdFloat32x3 value) { return { value.v[2], value.v[2], value.v[2] }; }
+inline Tsimd_f32x3_t tfS32x3FSplatIndex0(Tsimd_f32x3_t value) { return { value.v[0], value.v[0], value.v[0] }; }
+inline Tsimd_f32x3_t tfS32x3FSplatIndex1(Tsimd_f32x3_t value) { return { value.v[1], value.v[1], value.v[1] }; }
+inline Tsimd_f32x3_t tfS32x3FSplatIndex2(Tsimd_f32x3_t value) { return { value.v[2], value.v[2], value.v[2] }; }
 
-inline TSimdInt32x3 tfSimd3iSplat(int32_t value) { return { value, value, value }; }
-inline TSimdFloat32x3 tfSimd3fSplat(float value) { return { value, value, value }; }
+inline Tsimd_i32x3_t tfS32x3iSplat(int32_t value) { return { value, value, value }; }
+inline Tsimd_f32x3_t tfS32x3FSplat(float value) { return { value, value, value }; }
 
-inline TSimdInt32x3 tfSimd3iCmpEq(TSimdInt32x3 arg1, TSimdInt32x3 arg2) {
+inline Tsimd_i32x3_t tfS32x3iCmpEq(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2) {
     return { { (arg1.v[0] == arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[1] == arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
                (arg1.v[2] == arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000 } };
 }
-inline TSimdInt32x3 tfSimd3iCmpNeq(TSimdInt32x3 arg1, TSimdInt32x3 arg2) {
+inline Tsimd_i32x3_t tfS32x3iCmpNeq(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2) {
     return { { (arg1.v[0] != arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[1] != arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
                (arg1.v[2] != arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000 } };
 }
-inline TSimdInt32x3 tfSimd3iCmpGt(TSimdInt32x3 arg1, TSimdInt32x3 arg2) {
+inline Tsimd_i32x3_t tfS32x3iCmpGt(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2) {
     return { { (arg1.v[0] > arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[1] > arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
                (arg1.v[2] > arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000 } };
 }
-inline TSimdInt32x3 tfSimd3iCmpGtEq(TSimdInt32x3 arg1, TSimdInt32x3 arg2) {
+inline Tsimd_i32x3_t tfS32x3iCmpGtEq(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2) {
     return { { (arg1.v[0] >= arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[1] >= arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
                (arg1.v[2] >= arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000 } };
 }
-inline TSimdInt32x3 tfSimd3iCmpLt(TSimdInt32x3 arg1, TSimdInt32x3 arg2) {
+inline Tsimd_i32x3_t tfS32x3iCmpLt(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2) {
     return { { (arg1.v[0] < arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[1] < arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
                (arg1.v[2] < arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000 } };
 }
-inline TSimdInt32x3 tfSimd3iCmpLtEq(TSimdInt32x3 arg1, TSimdInt32x3 arg2) {
+inline Tsimd_i32x3_t tfS32x3iCmpLtEq(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2) {
     return { { (arg1.v[0] < arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[1] < arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
                (arg1.v[2] < arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000 } };
 }
-inline bool tfSimd3fCmpAllEq(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) {
+inline bool tfS32x3FCmpAllEq(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2) {
     for (int i = 0; i < 3; i++) {
         if (arg1.v[i] != arg2.v[i]) {
             return false;
@@ -169,7 +169,7 @@ inline bool tfSimd3fCmpAllEq(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) {
     return true;
 }
 
-inline bool tfSimd3iCmpAllEq(TSimdInt32x3 arg1, TSimdInt32x3 arg2) {
+inline bool tfS32x3iCmpAllEq(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2) {
     for (int i = 0; i < 3; i++) {
         if (arg1.v[i] != arg2.v[i]) {
             return false;
@@ -178,7 +178,7 @@ inline bool tfSimd3iCmpAllEq(TSimdInt32x3 arg1, TSimdInt32x3 arg2) {
     return true;
 }
 
-static inline bool tfSimdFloat32x3CmpAllLt(TSimdFloat32x3 a, TSimdFloat32x3 b) {
+static inline bool tfSimdFloat32x3CmpAllLt(Tsimd_f32x3_t a, Tsimd_f32x3_t b) {
     for (int i = 0; i < 3; i++) {
         if (a.v[i] >= b.v[i]) {
             return false;
@@ -187,13 +187,13 @@ static inline bool tfSimdFloat32x3CmpAllLt(TSimdFloat32x3 a, TSimdFloat32x3 b) {
     return true;
 }
 
-static inline TSimdFloat32x3 tfSimdFloat3x32ReplaceIndex0ByValue(TSimdFloat32x3 input, float value) {
+static inline Tsimd_f32x3_t tfSimd3x32FReplaceIndex0ByValue(Tsimd_f32x3_t input, float value) {
     return {value, input.v[1], input.v[2]};
 }
-static inline TSimdFloat32x3 tfSimdFloat3x32ReplaceIndex1ByValue(TSimdFloat32x3 input, float value){
+static inline Tsimd_f32x3_t tfSimd3x32FReplaceIndex1ByValue(Tsimd_f32x3_t input, float value){
     return {input.v[0], value, input.v[2]};
 };
-static inline TSimdFloat32x3 tfSimdFloat3x32ReplaceIndex2ByValue(TSimdFloat32x3 input, float value){
+static inline Tsimd_f32x3_t tfSimd3x32FReplaceIndex2ByValue(Tsimd_f32x3_t input, float value){
     return {input.v[0], input.v[1], value};
 };
 
diff --git a/Forge/Math/Internal/TF_Simd32x3_sse.inl b/Forge/Math/Internal/TF_Simd32x3_sse.inl
index 04c50f531a..9b114cdaea 100644
--- a/Forge/Math/Internal/TF_Simd32x3_sse.inl
+++ b/Forge/Math/Internal/TF_Simd32x3_sse.inl
@@ -4,111 +4,111 @@
 #include "../TF_Simd32x3.h"
 #endif
 
-inline TSimdInt32x3   tfSimd3iSelect(TSimdInt32x3 arg0, TSimdInt32x3 arg1, TSimdInt32x3 mask) { return _mm_blendv_epi8(arg0, arg1, mask); }
-inline TSimdFloat32x3 tfSimd3fSelect(TSimdFloat32x3 arg0, TSimdFloat32x3 arg1, TSimdFloat32x3 mask) {
+inline Tsimd_i32x3_t   tfS32x3iSelect(Tsimd_i32x3_t arg0, Tsimd_i32x3_t arg1, Tsimd_i32x3_t mask) { return _mm_blendv_epi8(arg0, arg1, mask); }
+inline Tsimd_f32x3_t tfS32x3FSelect(Tsimd_f32x3_t arg0, Tsimd_f32x3_t arg1, Tsimd_f32x3_t mask) {
     return _mm_blendv_ps(arg0, arg1, mask);
 }
 
-inline TSimdFloat32x3 tfSimd3fZero() { return _mm_setzero_ps(); }
-inline TSimdInt32x3   tfSimd3iZero() { return _mm_setzero_si128(); }
+inline Tsimd_f32x3_t tfS32x3FZero() { return _mm_setzero_ps(); }
+inline Tsimd_i32x3_t   tfS32x3iZero() { return _mm_setzero_si128(); }
 
-inline TSimdInt32x3 tfSimd3iNot(TSimdInt32x3 value) { return _mm_andnot_si128(value, _mm_set1_epi32(TF_SIMDI_MAX)); }
-inline TSimdInt32x3 tfSimd3iAnd(TSimdInt32x3 arg1, TSimdInt32x3 arg2) { return _mm_and_si128(arg1, arg2); }
-inline TSimdInt32x3 tfSimd3iAndNot(TSimdInt32x3 arg1, TSimdInt32x3 arg2) { return _mm_andnot_si128(arg1, arg2); }
-inline TSimdInt32x3 tfSimd3iOr(TSimdInt32x3 arg1, TSimdInt32x3 arg2) { return _mm_or_si128(arg1, arg2); }
-inline TSimdInt32x3 tfSimd3iXor(TSimdInt32x3 arg1, TSimdInt32x3 arg2) { return _mm_xor_si128(arg1, arg2); }
+inline Tsimd_i32x3_t tfS32x3iNot(Tsimd_i32x3_t value) { return _mm_andnot_si128(value, _mm_set1_epi32(TF_SIMDI_MAX)); }
+inline Tsimd_i32x3_t tfS32x3iAnd(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2) { return _mm_and_si128(arg1, arg2); }
+inline Tsimd_i32x3_t tfS32x3iAndNot(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2) { return _mm_andnot_si128(arg1, arg2); }
+inline Tsimd_i32x3_t tfS32x3iOr(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2) { return _mm_or_si128(arg1, arg2); }
+inline Tsimd_i32x3_t tfS32x3iXor(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2) { return _mm_xor_si128(arg1, arg2); }
 
-inline TSimdFloat32x3 tfSimd3fNot(TSimdFloat32x3 value) {
-    const TSimdFloat32x3 invert = tfSimd3fSplat((float)(0xFFFFFFFF));
+inline Tsimd_f32x3_t tfS32x3FNot(Tsimd_f32x3_t value) {
+    const Tsimd_f32x3_t invert = tfS32x3FSplat((float)(0xFFFFFFFF));
     return _mm_andnot_ps(value, invert);
 }
-inline TSimdFloat32x3 tfSimd3fAnd(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) { return _mm_and_ps(arg1, arg2); }
-inline TSimdFloat32x3 tfSimd3fAndNot(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) { return _mm_andnot_ps(arg1, arg2); }
-inline TSimdFloat32x3 tfSimd3fOr(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) { return _mm_or_ps(arg1, arg2); }
-inline TSimdFloat32x3 tfSimd3fXor(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) { return _mm_xor_ps(arg1, arg2); }
-
-inline TSimdFloat32x3 tfSimd3fFloor(TSimdFloat32x3 value) { return _mm_floor_ps(value); }
-inline TSimdFloat32x3 tfSimd3fCeil(TSimdFloat32x3 value) { return _mm_ceil_ps(value); }
-inline TSimdFloat32x3 tfSimd3fRound(TSimdFloat32x3 value) { return _mm_round_ps(value, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); }
-inline TSimdFloat32x3 tfSimd3fTruncate(TSimdFloat32x3 value) { return tfSimd3iToSimd3f(tfSimd3fToSimd3i(value)); }
-inline TSimdFloat32x3 tfSimd3fMin(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) { return _mm_min_ps(arg1, arg2); }
-inline TSimdFloat32x3 tfSimd3fMax(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) { return _mm_max_ps(arg1, arg2); }
-inline TSimdFloat32x3 tfSimd3fClamp(TSimdFloat32x3 value, TSimdFloat32x3 min, TSimdFloat32x3 max) {
-    return tfSimd3fMax(min, tfSimd3fMin(value, max));
+inline Tsimd_f32x3_t tfS32x3FAnd(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2) { return _mm_and_ps(arg1, arg2); }
+inline Tsimd_f32x3_t tfS32x3FAndNot(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2) { return _mm_andnot_ps(arg1, arg2); }
+inline Tsimd_f32x3_t tfS32x3FOr(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2) { return _mm_or_ps(arg1, arg2); }
+inline Tsimd_f32x3_t tfS32x3FXor(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2) { return _mm_xor_ps(arg1, arg2); }
+
+inline Tsimd_f32x3_t tfS32x3FFloor(Tsimd_f32x3_t value) { return _mm_floor_ps(value); }
+inline Tsimd_f32x3_t tfS32x3FCeil(Tsimd_f32x3_t value) { return _mm_ceil_ps(value); }
+inline Tsimd_f32x3_t tfS32x3FRound(Tsimd_f32x3_t value) { return _mm_round_ps(value, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); }
+inline Tsimd_f32x3_t tfS32x3FTruncate(Tsimd_f32x3_t value) { return tfS32x3iToSimd3f(tfS32x3FToSimd3i(value)); }
+inline Tsimd_f32x3_t tfS32x3FMin(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2) { return _mm_min_ps(arg1, arg2); }
+inline Tsimd_f32x3_t tfS32x3FMax(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2) { return _mm_max_ps(arg1, arg2); }
+inline Tsimd_f32x3_t tfS32x3FClamp(Tsimd_f32x3_t value, Tsimd_f32x3_t min, Tsimd_f32x3_t max) {
+    return tfS32x3FMax(min, tfS32x3FMin(value, max));
 }
 
-inline TSimdInt32x3 tfSimd3fToSimd3i(TSimdFloat32x3 value) { return _mm_castps_si128(value); }
-inline TSimdFloat32x3 tfSimd3iToSimd3f(TSimdInt32x3 value) { return _mm_castsi128_ps(value); }
+inline Tsimd_i32x3_t tfS32x3FToSimd3i(Tsimd_f32x3_t value) { return _mm_castps_si128(value); }
+inline Tsimd_f32x3_t tfS32x3iToSimd3f(Tsimd_i32x3_t value) { return _mm_castsi128_ps(value); }
 
-inline float tfSimd3fSelectIndex0(TSimdFloat32x3 value) { return _mm_cvtss_f32(value); }
-inline float tfSimd3fSelectIndex1(TSimdFloat32x3 value) { return tfSimd3fSelectIndex0(tfSimd3fSplatIndex1(value)); }
-inline float tfSimd3fSelectIndex2(TSimdFloat32x3 value) { return tfSimd3fSelectIndex0(tfSimd3fSplatIndex2(value)); }
+inline float tfS32x3FSelectIndex0(Tsimd_f32x3_t value) { return _mm_cvtss_f32(value); }
+inline float tfS32x3FSelectIndex1(Tsimd_f32x3_t value) { return tfS32x3FSelectIndex0(tfS32x3FSplatIndex1(value)); }
+inline float tfS32x3FSelectIndex2(Tsimd_f32x3_t value) { return tfS32x3FSelectIndex0(tfS32x3FSplatIndex2(value)); }
 
-static inline TSimdFloat32x3 tfSimdFloat3x32ReplaceIndex0ByValue(TSimdFloat32x3 input, float value) {
-    return _mm_blend_ps(input, tfSimd3fSplat(value), 0b0001);
+static inline Tsimd_f32x3_t tfSimd3x32FReplaceIndex0ByValue(Tsimd_f32x3_t input, float value) {
+    return _mm_blend_ps(input, tfS32x3FSplat(value), 0b0001);
 }
-static inline TSimdFloat32x3 tfSimdFloat3x32ReplaceIndex1ByValue(TSimdFloat32x3 input, float value) {
-    return _mm_blend_ps(input, tfSimd3fSplat(value), 0b0010);
+static inline Tsimd_f32x3_t tfSimd3x32FReplaceIndex1ByValue(Tsimd_f32x3_t input, float value) {
+    return _mm_blend_ps(input, tfS32x3FSplat(value), 0b0010);
 }
-static inline TSimdFloat32x3 tfSimdFloat3x32ReplaceIndex2ByValue(TSimdFloat32x3 input, float value) {
-    return _mm_blend_ps(input, tfSimd3fSplat(value), 0b0100);
+static inline Tsimd_f32x3_t tfSimd3x32FReplaceIndex2ByValue(Tsimd_f32x3_t input, float value) {
+    return _mm_blend_ps(input, tfS32x3FSplat(value), 0b0100);
 }
 
-inline TSimdFloat32x3 tfSimd3fAdd(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) { return _mm_add_ps(arg1, arg2); }
-inline TSimdFloat32x3 tfSimd3fSub(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) { return _mm_sub_ps(arg1, arg2); }
-inline TSimdFloat32x3 tfSimd3fMul(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) { return _mm_mul_ps(arg1, arg2); }
-inline TSimdFloat32x3 tfSimd3fMadd(TSimdFloat32x3 mul1, TSimdFloat32x3 mul2, TSimdFloat32x3 add) {
+inline Tsimd_f32x3_t tfS32x3FAdd(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2) { return _mm_add_ps(arg1, arg2); }
+inline Tsimd_f32x3_t tfS32x3FSub(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2) { return _mm_sub_ps(arg1, arg2); }
+inline Tsimd_f32x3_t tfS32x3FMul(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2) { return _mm_mul_ps(arg1, arg2); }
+inline Tsimd_f32x3_t tfS32x3FMadd(Tsimd_f32x3_t mul1, Tsimd_f32x3_t mul2, Tsimd_f32x3_t add) {
 #if 0
     return _mm_fmadd_ps(mul1, mul2, add); // Requires FMA CPUID
 #else
-    return tfSimd3fAdd(tfSimd3fMul(mul1, mul2), add);
+    return tfS32x3FAdd(tfS32x3FMul(mul1, mul2), add);
 #endif
 }
-inline TSimdFloat32x3 tfSimd3fDiv(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) { return _mm_div_ps(arg1, arg2); }
+inline Tsimd_f32x3_t tfS32x3FDiv(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2) { return _mm_div_ps(arg1, arg2); }
 
-inline TSimdFloat32x3 tfSimd3fAbs(TSimdFloat32x3 value) {
-    const TSimdFloat32x4 signMask = tfSimd3iToSimd3f(tfSimd3iSplat(0x7FFFFFFF));
+inline Tsimd_f32x3_t tfS32x3FAbs(Tsimd_f32x3_t value) {
+    const Tsimd_f32x4_t signMask = tfS32x3iToSimd3f(tfS32x3iSplat(0x7FFFFFFF));
     return _mm_and_ps(value, signMask);
 }
 
-inline TSimdFloat32x3 tfSimdFloat3x32Load(float x, float y, float z) { return _mm_set_ps(0.0f, z, y, x); }
-inline TSimdInt32x3 tfSimdInt3x32Load(int32_t x, int32_t y, int32_t z) { return _mm_set_epi32(0.0f, x, y, z); }
+inline Tsimd_f32x3_t tfSimd3x32FLoad(float x, float y, float z) { return _mm_set_ps(0.0f, z, y, x); }
+inline Tsimd_i32x3_t tfSimd3x32ILoad(int32_t x, int32_t y, int32_t z) { return _mm_set_epi32(0.0f, x, y, z); }
 
-inline TSimdFloat32x2 tfSimd3fToSimd2f(TSimdFloat32x3 value) { return value; }
+inline Tsimd_f32x2_t tfS32x3FToSimd2f(Tsimd_f32x3_t value) { return value; }
 
-static inline TSimdFloat32x4 tfSimdFloat3To4Splat0(TSimdFloat32x3 value) { return _mm_shuffle_ps(value, value, _MM_SHUFFLE(0, 0, 0, 0)); }
-static inline TSimdFloat32x4 tfSimdFloat3To4Splat1(TSimdFloat32x3 value) { return _mm_shuffle_ps(value, value, _MM_SHUFFLE(1, 1, 1, 1)); }
-static inline TSimdFloat32x4 tfSimdFloat3To4Splat2(TSimdFloat32x3 value) { return _mm_shuffle_ps(value, value, _MM_SHUFFLE(2, 2, 2, 2)); }
+static inline Tsimd_f32x4_t tfS32x3FTo32x4FSplat0(Tsimd_f32x3_t value) { return _mm_shuffle_ps(value, value, _MM_SHUFFLE(0, 0, 0, 0)); }
+static inline Tsimd_f32x4_t tfS32x3FTo32x4FSplat1(Tsimd_f32x3_t value) { return _mm_shuffle_ps(value, value, _MM_SHUFFLE(1, 1, 1, 1)); }
+static inline Tsimd_f32x4_t tfS32x3FTo32x4FSplat2(Tsimd_f32x3_t value) { return _mm_shuffle_ps(value, value, _MM_SHUFFLE(2, 2, 2, 2)); }
 
-inline TSimdFloat32x3 tfSimd3fSplatIndex0(TSimdFloat32x3 value) { return _mm_shuffle_ps(value, value, _MM_SHUFFLE(0, 0, 0, 0)); }
-inline TSimdFloat32x3 tfSimd3fSplatIndex1(TSimdFloat32x3 value) { return _mm_shuffle_ps(value, value, _MM_SHUFFLE(1, 1, 1, 1)); }
-inline TSimdFloat32x3 tfSimd3fSplatIndex2(TSimdFloat32x3 value) { return _mm_shuffle_ps(value, value, _MM_SHUFFLE(2, 2, 2, 2)); }
+inline Tsimd_f32x3_t tfS32x3FSplatIndex0(Tsimd_f32x3_t value) { return _mm_shuffle_ps(value, value, _MM_SHUFFLE(0, 0, 0, 0)); }
+inline Tsimd_f32x3_t tfS32x3FSplatIndex1(Tsimd_f32x3_t value) { return _mm_shuffle_ps(value, value, _MM_SHUFFLE(1, 1, 1, 1)); }
+inline Tsimd_f32x3_t tfS32x3FSplatIndex2(Tsimd_f32x3_t value) { return _mm_shuffle_ps(value, value, _MM_SHUFFLE(2, 2, 2, 2)); }
 
-inline TSimdInt32x3   tfSimd3iSplat(int32_t value) { return _mm_set1_epi32(value); }
-inline TSimdFloat32x3 tfSimd3fSplat(float value) { return _mm_set1_ps(value); }
+inline Tsimd_i32x3_t   tfS32x3iSplat(int32_t value) { return _mm_set1_epi32(value); }
+inline Tsimd_f32x3_t tfS32x3FSplat(float value) { return _mm_set1_ps(value); }
 
-inline TSimdInt32x3 tfSimd3iCmpEq(TSimdInt32x3 arg1, TSimdInt32x3 arg2) { return _mm_cmpeq_epi32(arg1, arg2); }
-inline TSimdInt32x3 tfSimd3iCmpNeq(TSimdInt32x3 arg1, TSimdInt32x3 arg2) {
+inline Tsimd_i32x3_t tfS32x3iCmpEq(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2) { return _mm_cmpeq_epi32(arg1, arg2); }
+inline Tsimd_i32x3_t tfS32x3iCmpNeq(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2) {
     return _mm_xor_si128(_mm_cmpeq_epi32(arg1, arg2), _mm_set1_epi32((int32_t)0xFFFFFFFF));
 }
-inline TSimdInt32x3 tfSimd3iCmpGt(TSimdInt32x3 arg1, TSimdInt32x3 arg2) { return _mm_cmpgt_epi32(arg1, arg2); }
-inline TSimdInt32x3 tfSimd3iCmpGtEq(TSimdInt32x3 arg1, TSimdInt32x3 arg2) {
+inline Tsimd_i32x3_t tfS32x3iCmpGt(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2) { return _mm_cmpgt_epi32(arg1, arg2); }
+inline Tsimd_i32x3_t tfS32x3iCmpGtEq(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2) {
     return _mm_or_si128(_mm_cmpgt_epi32(arg1, arg2), _mm_cmpeq_epi32(arg1, arg2));
 }
-inline TSimdInt32x3 tfSimd3iCmpLt(TSimdInt32x3 arg1, TSimdInt32x3 arg2) { return _mm_cmplt_epi32(arg1, arg2); }
-inline TSimdInt32x3 tfSimd3iCmpLtEq(TSimdInt32x3 arg1, TSimdInt32x3 arg2) {
+inline Tsimd_i32x3_t tfS32x3iCmpLt(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2) { return _mm_cmplt_epi32(arg1, arg2); }
+inline Tsimd_i32x3_t tfS32x3iCmpLtEq(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2) {
     return _mm_or_si128(_mm_cmplt_epi32(arg1, arg2), _mm_cmpeq_epi32(arg1, arg2));
 }
-inline bool tfSimd3fCmpAllEq(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) {
-    TSimdFloat32x3 compare = tfSimd3fCmpEq(arg1, arg2);
+inline bool tfS32x3FCmpAllEq(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2) {
+    Tsimd_f32x3_t compare = tfS32x3FCmpEq(arg1, arg2);
     return (_mm_movemask_ps(compare) & 0b0111) == 0b0111;
 }
-inline bool tfSimd3iCmpAllEq(TSimdInt32x3 arg1, TSimdInt32x3 arg2) {
-    const TSimdInt32x3 compare = tfSimd3iCmpEq(arg1, arg2);
+inline bool tfS32x3iCmpAllEq(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2) {
+    const Tsimd_i32x3_t compare = tfS32x3iCmpEq(arg1, arg2);
     return (_mm_movemask_epi8(compare) & 0b0111) == 0b0111;
 }
-static inline bool tfSimdFloat32x3CmpAllLt(TSimdFloat32x3 a, TSimdFloat32x3 b) {
-    TSimdFloat32x3 compare = tfSimd3fCmpLt(a, b);
+static inline bool tfSimdFloat32x3CmpAllLt(Tsimd_f32x3_t a, Tsimd_f32x3_t b) {
+    Tsimd_f32x3_t compare = tfS32x3FCmpLt(a, b);
     return (_mm_movemask_ps(compare) & 0b0111) == 0b0111;
 }
 
diff --git a/Forge/Math/Internal/TF_Simd32x4_neon.inl b/Forge/Math/Internal/TF_Simd32x4_neon.inl
index 5b8fa6e924..cc92670a69 100644
--- a/Forge/Math/Internal/TF_Simd32x4_neon.inl
+++ b/Forge/Math/Internal/TF_Simd32x4_neon.inl
@@ -4,118 +4,118 @@
 #include "../TF_Simd32x4.h"
 #endif
 
-static inline TSimdFloat32x4 tfSimd4fReplaceIndex0ByValue(TSimdFloat32x4 input, float value) { return vsetq_lane_f32(value, input, 0); }
-static inline TSimdFloat32x4 tfSimd4fReplaceIndex1ByValue(TSimdFloat32x4 input, float value) { return vsetq_lane_f32(value, input, 1); }
-static inline TSimdFloat32x4 tfSimd4fReplaceIndex2ByValue(TSimdFloat32x4 input, float value) { return vsetq_lane_f32(value, input, 2); }
-static inline TSimdFloat32x4 tfSimd4fReplaceIndex3ByValue(TSimdFloat32x4 input, float value) { return vsetq_lane_f32(value, input, 3); }
-
-inline TSimdInt32x4   tfSimd4iSelect(TSimdInt32x4 arg0, TSimdInt32x4 arg1, TSimdInt32x4 mask) { return vbslq_s32(mask, arg1, arg1); }
-inline TSimdFloat32x4 tfSimd4fSelect(TSimdFloat32x4 arg0, TSimdFloat32x4 arg1, TSimdFloat32x4 mask) { return vbslq_f32(mask, arg1, arg1); }
-
-inline TSimdFloat32x4 tfSimd4fZero() { return vmovq_n_f32(0.0f); }
-inline TSimdInt32x4   tfSimd4iZero() { return vmovq_n_s32(0); }
-
-inline TSimdInt32x4 tfSimd4iNot(TSimdInt32x4 value) { return vmvnq_s32(value); }
-inline TSimdInt32x4 tfSimd4iAnd(TSimdInt32x4 arg1, TSimdInt32x4 arg2) { return vandq_s32(arg1, arg2); }
-inline TSimdInt32x4 tfSimd4iAndNot(TSimdInt32x4 arg1, TSimdInt32x4 arg2) { return vandq_s32(vmvnq_s32(arg1), arg2); }
-inline TSimdInt32x4 tfSimd4iOr(TSimdInt32x4 arg1, TSimdInt32x4 arg2) { return vorrq_s32(arg1, arg2); }
-inline TSimdInt32x4 tfSimd4iXor(TSimdInt32x4 arg1, TSimdInt32x4 arg2) { return veorq_s32(arg1, arg2); }
-
-inline TSimdFloat32x4 tfSimd4fNot(TSimdFloat32x4 value) { return vreinterpretq_f32_s32(vmvnq_s32(vreinterpretq_s32_f32(value))); }
-inline TSimdFloat32x4 tfSimd4fAnd(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
-    return vreinterpretq_f32_s32(vandq_s32(vreinterpretq_s32_f32(arg1), vreinterpretq_s32_f32(arg2)));
-}
-inline TSimdFloat32x4 tfSimd4fAndNot(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
-    return vreinterpretq_f32_s32(vandq_s32(vmvnq_s32(vreinterpretq_s32_f32(arg1)), vreinterpretq_s32_f32(arg2)));
-}
-inline TSimdFloat32x4 tfSimd4fOr(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
-    return vreinterpretq_f32_s32(vorrq_s32(vreinterpretq_s32_f32(arg1), vreinterpretq_s32_f32(arg2)));
-}
-inline TSimdFloat32x4 tfSimd4fXor(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
-    return vreinterpretq_f32_s32(veorq_s32(vreinterpretq_s32_f32(arg1), vreinterpretq_s32_f32(arg2)));
-}
-
-inline TSimdFloat32x4 tfSimd4fFloor(TSimdFloat32x4 value) { return vrndmq_f32(value); }
-inline TSimdFloat32x4 tfSimd4fCeil(TSimdFloat32x4 value) { return vrndpq_f32(value); }
-inline TSimdFloat32x4 tfSimd4fRound(TSimdFloat32x4 value) { return vrndnq_f32(value); }
-inline TSimdFloat32x4 tfSimd4fTruncate(TSimdFloat32x4 value) { return tfSimd4iToSimd4f(tfSimd4fToSimd4i(value)); }
-inline TSimdFloat32x4 tfSimd4fMin(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) { return vminq_f32(arg1, arg2); }
-inline TSimdFloat32x4 tfSimd4fMax(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) { return vmaxq_f32(arg1, arg2); }
-inline TSimdFloat32x4 tfSimd4fClamp(TSimdFloat32x4 value, TSimdFloat32x4 min, TSimdFloat32x4 max) {
-    return tfSimd4fMax(min, tfSimd4fMin(value, max));
-}
-
-inline TSimdInt32x4 tfSimd4fToSimd4i(TSimdFloat32x4 value) { return vreinterpretq_f32_s32(value); }
-
-inline TSimdFloat32x4 tfSimd4iToSimd4f(TSimdInt32x4 value) { return vreinterpretq_s32_f32(value); }
-
-inline float tfSimd4fSelectIndex0(TSimdFloat32x4 value) { return vgetq_lane_f32(value, 0); }
-inline float tfSimd4fSelectIndex1(TSimdFloat32x4 value) { return vgetq_lane_f32(value, 1); }
-inline float tfSimd4fSelectIndex2(TSimdFloat32x4 value) { return vgetq_lane_f32(value, 2); }
-inline float tfSimd4fSelectIndex3(TSimdFloat32x4 value) { return vgetq_lane_f32(value, 3); }
-
-inline TSimdFloat32x4 tfSimd4fAdd(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) { return vaddq_f32(arg1, arg2); }
-inline TSimdFloat32x4 tfSimd4fSub(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) { return vsubq_f32(arg1, arg2); }
-inline TSimdFloat32x4 tfSimd4fMul(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) { return vmulq_f32(arg1, arg2); }
-inline TSimdFloat32x4 tfSimd4fMadd(TSimdFloat32x4 mul1, TSimdFloat32x4 mul2, TSimdFloat32x4 add) { return vmlaq_f32(add, mul1, mul2); }
-
-inline TSimdFloat32x4 tfSimd4fDiv(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) { return vdivq_f32(arg1, arg2); }
-
-inline TSimdFloat32x4 tfSimd4fAbs(TSimdFloat32x4 value) { return vabsq_f32(value); }
-inline TSimdFloat32x4 tfSimdFloat4x32Load(float x, float y, float z, float w) {
-    const float values[4] = { x, y, z, w };
-    return vld1q_f32(values);
-}
-
-inline TSimdInt32x4 tfSimdInt4x32Load(int32_t x, int32_t y, int32_t z, int32_t w) {
-    const int32_t values[4] = { x, y, z, w };
-    return vld1q_s32(values);
-}
-
-inline TSimdFloat32x2 tfSimd4fToSimd2f(TSimdFloat32x4 value) { return vget_low_f32(value); }
-
-inline TSimdFloat32x3 tfSimd4fToSimd3f(TSimdFloat32x4 value) { return value; }
-
-inline TSimdFloat32x4 tfSimd4fSplatIndex0(TSimdFloat32x4 value) { return vdupq_laneq_f32(value, 0); }
-
-inline TSimdFloat32x4 tfSimd4fSplatIndex1(TSimdFloat32x4 value) { return vdupq_laneq_f32(value, 1); }
-
-inline TSimdFloat32x4 tfSimd4fSplatIndex2(TSimdFloat32x4 value) { return vdupq_laneq_f32(value, 2); }
-
-inline TSimdFloat32x4 tfSimd4fSplatIndex3(TSimdFloat32x4 value) { return vdupq_laneq_f32(value, 3); }
-
-inline TSimdInt32x4 tfSimd4iSplat(int32_t value) { return vdupq_n_s32(value); }
-
-inline TSimdFloat32x4 tfSimd4fSplat(float value) { return vdupq_n_f32(value); }
-
-inline TSimdFloat32x4 tfSimd4fCmpEq(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) { return vreinterpretq_f32_s32(vceqq_f32(arg1, arg2)); }
-inline TSimdFloat32x4 tfSimd4fCmpNeq(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
-    return vreinterpretq_f32_s32(vmvnq_s32(vceqq_f32(arg1, arg2)));
-}
-inline TSimdFloat32x4 tfSimd4fCmpGt(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) { return vreinterpretq_f32_s32(vcgtq_f32(arg1, arg2)); }
-inline TSimdFloat32x4 tfSimd4fCmpGtEq(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) { return vreinterpretq_f32_s32(vcgeq_f32(arg1, arg2)); }
-inline TSimdFloat32x4 tfSimd4fCmpLt(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) { return vreinterpretq_f32_s32(vcltq_f32(arg1, arg2)); }
-inline TSimdFloat32x4 tfSimd4fCmpLtEq(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) { return vreinterpretq_f32_s32(vcleq_f32(arg1, arg2)); }
-
-inline TSimdInt32x4 tfSimd4iCmpEq(TSimdInt32x4 arg1, TSimdInt32x4 arg2) { return vceqq_s32(arg1, arg2); }
-inline TSimdInt32x4 tfSimd4iCmpNeq(TSimdInt32x4 arg1, TSimdInt32x4 arg2) { return vmvnq_s32(vceqq_s32(arg1, arg2)); }
-inline TSimdInt32x4 tfSimd4iCmpGt(TSimdInt32x4 arg1, TSimdInt32x4 arg2) { return vcgtq_s32(arg1, arg2); }
-inline TSimdInt32x4 tfSimd4iCmpGtEq(TSimdInt32x4 arg1, TSimdInt32x4 arg2) { return vcgeq_s32(arg1, arg2); }
-inline TSimdInt32x4 tfSimd4iCmpLt(TSimdInt32x4 arg1, TSimdInt32x4 arg2) { return vcltq_s32(arg1, arg2); }
-inline TSimdInt32x4 tfSimd4iCmpLtEq(TSimdInt32x4 arg1, TSimdInt32x4 arg2) { return vcleq_s32(arg1, arg2); }
-
-inline bool tfSimd4fCmpAllLt(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
-    return vminv_u32(vcltq_f32(arg1, arg2)) != 0;
-}
-
-inline bool tfSimd4fCmpAllGt(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
-    return vminv_u32(vcgtq_f32(arg1, arg2)) != 0;
-}
-
-inline bool tfSimd4fCmpAllEq(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
-    return vminv_u32(vceqq_f32(arg1, arg2)) != 0;
-}
-
-inline bool tfSimd4iCmpAllEq(TSimdInt32x4 arg1, TSimdInt32x4 arg2) {
-    return vminv_u32(vceqq_s32(arg1, arg2)) != 0;
-}
+//static inline TSimd32Fx4 tfSimd4fReplaceIndex0ByValue(TSimd32Fx4 input, float value) { return vsetq_lane_f32(value, input, 0); }
+//static inline TSimd32Fx4 tfSimd4fReplaceIndex1ByValue(TSimd32Fx4 input, float value) { return vsetq_lane_f32(value, input, 1); }
+//static inline TSimd32Fx4 tfSimd4fReplaceIndex2ByValue(TSimd32Fx4 input, float value) { return vsetq_lane_f32(value, input, 2); }
+//static inline TSimd32Fx4 tfSimd4fReplaceIndex3ByValue(TSimd32Fx4 input, float value) { return vsetq_lane_f32(value, input, 3); }
+//
+//inline TSimd32Ix4   tfSimd4iSelect(TSimd32Ix4 arg0, TSimd32Ix4 arg1, TSimd32Ix4 mask) { return vbslq_s32(mask, arg1, arg1); }
+//inline TSimd32Fx4 tfSimd4fSelect(TSimd32Fx4 arg0, TSimd32Fx4 arg1, TSimd32Fx4 mask) { return vbslq_f32(mask, arg1, arg1); }
+//
+//inline TSimd32Fx4 tfSimd4fZero() { return vmovq_n_f32(0.0f); }
+//inline TSimd32Ix4   tfSimd4iZero() { return vmovq_n_s32(0); }
+//
+//inline TSimd32Ix4 tfSimd4iNot(TSimd32Ix4 value) { return vmvnq_s32(value); }
+//inline TSimd32Ix4 tfSimd4iAnd(TSimd32Ix4 arg1, TSimd32Ix4 arg2) { return vandq_s32(arg1, arg2); }
+//inline TSimd32Ix4 tfSimd4iAndNot(TSimd32Ix4 arg1, TSimd32Ix4 arg2) { return vandq_s32(vmvnq_s32(arg1), arg2); }
+//inline TSimd32Ix4 tfSimd4iOr(TSimd32Ix4 arg1, TSimd32Ix4 arg2) { return vorrq_s32(arg1, arg2); }
+//inline TSimd32Ix4 tfSimd4iXor(TSimd32Ix4 arg1, TSimd32Ix4 arg2) { return veorq_s32(arg1, arg2); }
+//
+//inline TSimd32Fx4 tfSimd4fNot(TSimd32Fx4 value) { return vreinterpretq_f32_s32(vmvnq_s32(vreinterpretq_s32_f32(value))); }
+//inline TSimd32Fx4 tfSimd4fAnd(TSimd32Fx4 arg1, TSimd32Fx4 arg2) {
+//    return vreinterpretq_f32_s32(vandq_s32(vreinterpretq_s32_f32(arg1), vreinterpretq_s32_f32(arg2)));
+//}
+//inline TSimd32Fx4 tfSimd4fAndNot(TSimd32Fx4 arg1, TSimd32Fx4 arg2) {
+//    return vreinterpretq_f32_s32(vandq_s32(vmvnq_s32(vreinterpretq_s32_f32(arg1)), vreinterpretq_s32_f32(arg2)));
+//}
+//inline TSimd32Fx4 tfSimd4fOr(TSimd32Fx4 arg1, TSimd32Fx4 arg2) {
+//    return vreinterpretq_f32_s32(vorrq_s32(vreinterpretq_s32_f32(arg1), vreinterpretq_s32_f32(arg2)));
+//}
+//inline TSimd32Fx4 tfSimd4fXor(TSimd32Fx4 arg1, TSimd32Fx4 arg2) {
+//    return vreinterpretq_f32_s32(veorq_s32(vreinterpretq_s32_f32(arg1), vreinterpretq_s32_f32(arg2)));
+//}
+//
+//inline TSimd32Fx4 tfSimd4fFloor(TSimd32Fx4 value) { return vrndmq_f32(value); }
+//inline TSimd32Fx4 tfSimd4fCeil(TSimd32Fx4 value) { return vrndpq_f32(value); }
+//inline TSimd32Fx4 tfSimd4fRound(TSimd32Fx4 value) { return vrndnq_f32(value); }
+//inline TSimd32Fx4 tfSimd4fTruncate(TSimd32Fx4 value) { return tfSimd4iToSimd4f(tfSimd4fToSimd4i(value)); }
+//inline TSimd32Fx4 tfSimd4fMin(TSimd32Fx4 arg1, TSimd32Fx4 arg2) { return vminq_f32(arg1, arg2); }
+//inline TSimd32Fx4 tfSimd4fMax(TSimd32Fx4 arg1, TSimd32Fx4 arg2) { return vmaxq_f32(arg1, arg2); }
+//inline TSimd32Fx4 tfSimd4fClamp(TSimd32Fx4 value, TSimd32Fx4 min, TSimd32Fx4 max) {
+//    return tfSimd4fMax(min, tfSimd4fMin(value, max));
+//}
+//
+//inline TSimd32Ix4 tfSimd4fToSimd4i(TSimd32Fx4 value) { return vreinterpretq_f32_s32(value); }
+//
+//inline TSimd32Fx4 tfSimd4iToSimd4f(TSimd32Ix4 value) { return vreinterpretq_s32_f32(value); }
+//
+//inline float tfSimd4fSelectIndex0(TSimd32Fx4 value) { return vgetq_lane_f32(value, 0); }
+//inline float tfSimd4fSelectIndex1(TSimd32Fx4 value) { return vgetq_lane_f32(value, 1); }
+//inline float tfSimd4fSelectIndex2(TSimd32Fx4 value) { return vgetq_lane_f32(value, 2); }
+//inline float tfSimd4fSelectIndex3(TSimd32Fx4 value) { return vgetq_lane_f32(value, 3); }
+//
+//inline TSimd32Fx4 tfSimd4fAdd(TSimd32Fx4 arg1, TSimd32Fx4 arg2) { return vaddq_f32(arg1, arg2); }
+//inline TSimd32Fx4 tfSimd4fSub(TSimd32Fx4 arg1, TSimd32Fx4 arg2) { return vsubq_f32(arg1, arg2); }
+//inline TSimd32Fx4 tfSimd4fMul(TSimd32Fx4 arg1, TSimd32Fx4 arg2) { return vmulq_f32(arg1, arg2); }
+//inline TSimd32Fx4 tfSimd4fMadd(TSimd32Fx4 mul1, TSimd32Fx4 mul2, TSimd32Fx4 add) { return vmlaq_f32(add, mul1, mul2); }
+//
+//inline TSimd32Fx4 tfSimd4fDiv(TSimd32Fx4 arg1, TSimd32Fx4 arg2) { return vdivq_f32(arg1, arg2); }
+//
+//inline TSimd32Fx4 tfSimd4fAbs(TSimd32Fx4 value) { return vabsq_f32(value); }
+//inline TSimd32Fx4 tfSimdFloat4x32Load(float x, float y, float z, float w) {
+//    const float values[4] = { x, y, z, w };
+//    return vld1q_f32(values);
+//}
+//
+//inline TSimd32Ix4 tfSimdInt4x32Load(int32_t x, int32_t y, int32_t z, int32_t w) {
+//    const int32_t values[4] = { x, y, z, w };
+//    return vld1q_s32(values);
+//}
+//
+//inline Tsimd_f32x2_t tfSimd4fToSimd2f(TSimd32Fx4 value) { return vget_low_f32(value); }
+//
+//inline TSimd32Fx3 tfSimd4fToSimd3f(TSimd32Fx4 value) { return value; }
+//
+//inline TSimd32Fx4 tfSimd4fSplatIndex0(TSimd32Fx4 value) { return vdupq_laneq_f32(value, 0); }
+//
+//inline TSimd32Fx4 tfSimd4fSplatIndex1(TSimd32Fx4 value) { return vdupq_laneq_f32(value, 1); }
+//
+//inline TSimd32Fx4 tfSimd4fSplatIndex2(TSimd32Fx4 value) { return vdupq_laneq_f32(value, 2); }
+//
+//inline TSimd32Fx4 tfSimd4fSplatIndex3(TSimd32Fx4 value) { return vdupq_laneq_f32(value, 3); }
+//
+//inline TSimd32Ix4 tfSimd4iSplat(int32_t value) { return vdupq_n_s32(value); }
+//
+//inline TSimd32Fx4 tfSimd4fSplat(float value) { return vdupq_n_f32(value); }
+//
+//inline TSimd32Fx4 tfSimd4fCmpEq(TSimd32Fx4 arg1, TSimd32Fx4 arg2) { return vreinterpretq_f32_s32(vceqq_f32(arg1, arg2)); }
+//inline TSimd32Fx4 tfSimd4fCmpNeq(TSimd32Fx4 arg1, TSimd32Fx4 arg2) {
+//    return vreinterpretq_f32_s32(vmvnq_s32(vceqq_f32(arg1, arg2)));
+//}
+//inline TSimd32Fx4 tfSimd4fCmpGt(TSimd32Fx4 arg1, TSimd32Fx4 arg2) { return vreinterpretq_f32_s32(vcgtq_f32(arg1, arg2)); }
+//inline TSimd32Fx4 tfSimd4fCmpGtEq(TSimd32Fx4 arg1, TSimd32Fx4 arg2) { return vreinterpretq_f32_s32(vcgeq_f32(arg1, arg2)); }
+//inline TSimd32Fx4 tfSimd4fCmpLt(TSimd32Fx4 arg1, TSimd32Fx4 arg2) { return vreinterpretq_f32_s32(vcltq_f32(arg1, arg2)); }
+//inline TSimd32Fx4 tfSimd4fCmpLtEq(TSimd32Fx4 arg1, TSimd32Fx4 arg2) { return vreinterpretq_f32_s32(vcleq_f32(arg1, arg2)); }
+//
+//inline TSimd32Ix4 tfSimd4iCmpEq(TSimd32Ix4 arg1, TSimd32Ix4 arg2) { return vceqq_s32(arg1, arg2); }
+//inline TSimd32Ix4 tfSimd4iCmpNeq(TSimd32Ix4 arg1, TSimd32Ix4 arg2) { return vmvnq_s32(vceqq_s32(arg1, arg2)); }
+//inline TSimd32Ix4 tfSimd4iCmpGt(TSimd32Ix4 arg1, TSimd32Ix4 arg2) { return vcgtq_s32(arg1, arg2); }
+//inline TSimd32Ix4 tfSimd4iCmpGtEq(TSimd32Ix4 arg1, TSimd32Ix4 arg2) { return vcgeq_s32(arg1, arg2); }
+//inline TSimd32Ix4 tfSimd4iCmpLt(TSimd32Ix4 arg1, TSimd32Ix4 arg2) { return vcltq_s32(arg1, arg2); }
+//inline TSimd32Ix4 tfSimd4iCmpLtEq(TSimd32Ix4 arg1, TSimd32Ix4 arg2) { return vcleq_s32(arg1, arg2); }
+//
+//inline bool tfSimd4fCmpAllLt(TSimd32Fx4 arg1, TSimd32Fx4 arg2) {
+//    return vminv_u32(vcltq_f32(arg1, arg2)) != 0;
+//}
+//
+//inline bool tfSimd4fCmpAllGt(TSimd32Fx4 arg1, TSimd32Fx4 arg2) {
+//    return vminv_u32(vcgtq_f32(arg1, arg2)) != 0;
+//}
+//
+//inline bool tfSimd4fCmpAllEq(TSimd32Fx4 arg1, TSimd32Fx4 arg2) {
+//    return vminv_u32(vceqq_f32(arg1, arg2)) != 0;
+//}
+//
+//inline bool tfSimd4iCmpAllEq(TSimd32Ix4 arg1, TSimd32Ix4 arg2) {
+//    return vminv_u32(vceqq_s32(arg1, arg2)) != 0;
+//}
diff --git a/Forge/Math/Internal/TF_Simd32x4_scalar.inl b/Forge/Math/Internal/TF_Simd32x4_scalar.inl
index 4528168ed0..26dc6de116 100644
--- a/Forge/Math/Internal/TF_Simd32x4_scalar.inl
+++ b/Forge/Math/Internal/TF_Simd32x4_scalar.inl
@@ -1,253 +1,508 @@
 #if defined(__CLANGD__)
-#include "Forge/TF_Config.h"
-#undef TF_FEATURE_CPU_NEON  
-#undef TF_FEATURE_CPU_SSE  
 #define TF_FEATURE_CPU_SCALAR  
+#include "Forge/TF_Config.h"
 #include "../TF_Simd32x4.h"
 #endif
 
-static inline TSimdFloat32x4 tfSimd4fReplaceIndex0ByValue(TSimdFloat32x4 input, float value) {
-    return { value, input.v[1], input.v[2], input.v[3] };
-}
-static inline TSimdFloat32x4 tfSimd4fReplaceIndex1ByValue(TSimdFloat32x4 input, float value) {
-    return { input.v[0], value, input.v[2], input.v[3] };
-}
-static inline TSimdFloat32x4 tfSimd4fReplaceIndex2ByValue(TSimdFloat32x4 input, float value) {
-    return { input.v[0], input.v[1], value, input.v[3] };
-}
-static inline TSimdFloat32x4 tfSimd4fReplaceIndex3ByValue(TSimdFloat32x4 input, float value) {
-    return { input.v[0], input.v[1], input.v[2], value };
-}
+// Tsimd_f32x4_t
+static inline Tsimd_f32x4_t tfSimdZero_f32x4() { return {0,0,0,0}; }
+static inline Tsimd_f32x4_t tfSimdLoad_f32x4(float x, float y, float z, float w) { return { x, y, z, w }; }
+static inline Tsimd_f32x4_t tfSimdSplat_f32x4(float value) { return { value, value, value, value }; }
 
-inline TSimdInt32x4 tfSimd4iSelect(TSimdInt32x4 arg0, TSimdInt32x4 arg1, TSimdInt32x4 mask) {
-    return { (mask.v[0] == 0) ? arg0.v[0] : arg1.v[0], (mask.v[1] == 0) ? arg0.v[1] : arg1.v[1], (mask.v[2] == 0) ? arg0.v[2] : arg1.v[2],
-             (mask.v[3] == 0) ? arg0.v[3] : arg1.v[3] };
+static inline Tsimd_f32x4_t tfSimdSplat0_f32x4(Tsimd_f32x4_t value) { return { value.v[0], value.v[0], value.v[0], value.v[0] }; }
+static inline Tsimd_f32x4_t tfSimdSplat1_f32x4(Tsimd_f32x4_t value) { return { value.v[1], value.v[1], value.v[1], value.v[1] }; }
+static inline Tsimd_f32x4_t tfSimdSplat2_f32x4(Tsimd_f32x4_t value) { return { value.v[2], value.v[2], value.v[2], value.v[2] }; }
+static inline Tsimd_f32x4_t tfSimdSplat3_f32x4(Tsimd_f32x4_t value) { return { value.v[3], value.v[3], value.v[3], value.v[3] }; }
+
+static inline Tsimd_f32x4_t tfSimdDot_f32x4(Tsimd_f32x4_t a,Tsimd_f32x4_t b) {
+    const float result = tfSimdDot_f32x4_f32(a, b);
+    return { result, result, result, result };
 }
-inline TSimdFloat32x4 tfSimd4fSelect(TSimdFloat32x4 arg0, TSimdFloat32x4 arg1, TSimdFloat32x4 mask) {
-    TSimdInt32x4 intMask = tfSimd4fToSimd4i(mask);
-    return { (intMask.v[0] == 0) ? arg0.v[0] : arg1.v[0], (intMask.v[1] == 0) ? arg0.v[1] : arg1.v[1],
-             (intMask.v[2] == 0) ? arg0.v[2] : arg1.v[2], (intMask.v[3] == 0) ? arg0.v[3] : arg1.v[3] };
+
+static inline float tfSimdDot_f32x4_f32(Tsimd_f32x4_t a, Tsimd_f32x4_t b) {
+    const float result = (a.v[0] * b.v[0]) + (a.v[1] * b.v[1]) + (a.v[2] * b.v[2]) + (a.v[3] * b.v[3]);
+    return result;
 }
 
-inline TSimdFloat32x4 tfSimd4fZero() { return { 0, 0, 0, 0 }; }
-inline TSimdInt32x4   tfSimd4iZero() { return { 0, 0, 0, 0 }; }
+static inline float tfSimdSelect_f32x4(Tsimd_f32x4_t value, int index) {ASSERT(index < 4); return value.v[index];}
+static inline float tfSimdSelect0_f32x4(Tsimd_f32x4_t value) { return value.v[0]; }
+static inline float tfSimdSelect1_f32x4(Tsimd_f32x4_t value) { return value.v[1]; }
+static inline float tfSimdSelect2_f32x4(Tsimd_f32x4_t value) { return value.v[2]; }
+static inline float tfSimdSelect3_f32x4(Tsimd_f32x4_t value) { return value.v[3]; }
 
-inline TSimdInt32x4 tfSimd4iNot(TSimdInt32x4 value) { return { ~value.v[0], ~value.v[1], ~value.v[2], ~value.v[3] }; }
-inline TSimdInt32x4 tfSimd4iAnd(TSimdInt32x4 arg1, TSimdInt32x4 arg2) {
-    return { arg1.v[0] & arg2.v[0], arg1.v[1] & arg2.v[1], arg1.v[2] & arg2.v[2], arg1.v[3] & arg2.v[3] };
-}
-inline TSimdInt32x4 tfSimd4iAndNot(TSimdInt32x4 arg1, TSimdInt32x4 arg2) {
-    return { ~arg1.v[0] & arg2.v[0], ~arg1.v[1] & arg2.v[1], ~arg1.v[2] & arg2.v[2], ~arg1.v[3] & arg2.v[3] };
-}
-inline TSimdInt32x4 tfSimd4iOr(TSimdInt32x4 arg1, TSimdInt32x4 arg2) {
-    return { arg1.v[0] | arg2.v[0], arg1.v[1] | arg2.v[1], arg1.v[2] | arg2.v[2], arg1.v[3] | arg2.v[3] };
-}
-inline TSimdInt32x4 tfSimd4iXor(TSimdInt32x4 arg1, TSimdInt32x4 arg2) {
-    return { arg1.v[0] ^ arg2.v[0], arg1.v[1] ^ arg2.v[1], arg1.v[2] ^ arg2.v[2], arg1.v[3] ^ arg2.v[3] };
+static inline Tsimd_f32x4_t tfSimdAdd_f32x4(Tsimd_f32x4_t a, Tsimd_f32x4_t b) { return {
+    a.v[0] + b.v[0],
+    a.v[1] + b.v[1],
+    a.v[2] + b.v[2],
+    a.v[3] + b.v[3]
+}; }
+static inline Tsimd_f32x4_t tfSimdMul_f32x4(Tsimd_f32x4_t a, Tsimd_f32x4_t b) {
+    return { a.v[0] * b.v[0], a.v[1] * b.v[1], a.v[2] * b.v[2], a.v[3] * b.v[3] };
+}
+static inline Tsimd_f32x4_t tfSimdDiv_f32x4(Tsimd_f32x4_t a, Tsimd_f32x4_t b) {
+    return { a.v[0] / b.v[0], a.v[1] / b.v[1], a.v[2] / b.v[2], a.v[3] / b.v[3] };
+}
+static inline Tsimd_f32x4_t tfSimdAbs_f32x4(Tsimd_f32x4_t a) { return { fabsf(a.v[0]), fabsf(a.v[1]), fabsf(a.v[2]), fabsf(a.v[3]) }; }
+static inline Tsimd_f32x4_t tfSimdMadd_f32x4(Tsimd_f32x4_t a, Tsimd_f32x4_t b, Tsimd_f32x4_t c) {
+    return { (a.v[0] * b.v[0]) + c.v[0], (a.v[1] * b.v[1]) + c.v[1], (a.v[2] * b.v[2]) + c.v[2], (a.v[3] * b.v[3]) + +c.v[3] };
 }
 
-inline TSimdFloat32x4 tfSimd4fNot(TSimdFloat32x4 value) {
-    TSimdInt32x4 result = { { ~((int32_t)value.v[0]), ~((int32_t)value.v[1]), ~((int32_t)value.v[2]), ~((int32_t)value.v[3]) } };
-    return tfSimd4iToSimd4f(result);
-}
-inline TSimdFloat32x4 tfSimd4fAnd(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
-    TSimdInt32x4 result = { ((int32_t)arg1.v[0]) & ((int32_t)arg2.v[0]), ((int32_t)arg1.v[1]) & ((int32_t)arg2.v[1]),
-                            ((int32_t)arg1.v[2]) & ((int32_t)arg2.v[2]), ((int32_t)arg1.v[3]) & ((int32_t)arg2.v[3]) };
-    return tfSimd4iToSimd4f(result);
+static inline Tsimd_f32x4_t tfSimdNot_f32x4(Tsimd_f32x4_t value) {
+    return { 
+        (float)(~((int32_t)value.v[0])), 
+        (float)(~((int32_t)value.v[1])), 
+        (float)(~((int32_t)value.v[2])), 
+        (float)(~((int32_t)value.v[3])) };
+}
+static inline Tsimd_f32x4_t tfSimdAnd_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+    return { 
+        (float)(((int32_t)arg1.v[0]) & ((int32_t)arg1.v[0])), 
+        (float)(((int32_t)arg1.v[1]) & ((int32_t)arg1.v[1])), 
+        (float)(((int32_t)arg1.v[2]) & ((int32_t)arg1.v[2])), 
+        (float)(((int32_t)arg1.v[3]) & ((int32_t)arg1.v[3])) };
+}
+static inline Tsimd_f32x4_t tfSimdAndNot_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+    return { 
+        (float)(~((int32_t)arg1.v[0]) & ((int32_t)arg1.v[0])), 
+        (float)(~((int32_t)arg1.v[1]) & ((int32_t)arg1.v[1])), 
+        (float)(~((int32_t)arg1.v[2]) & ((int32_t)arg1.v[2])), 
+        (float)(~((int32_t)arg1.v[3]) & ((int32_t)arg1.v[3])) };
+
 }
-inline TSimdFloat32x4 tfSimd4fAndNot(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
-    TSimdInt32x4 result = { { ~((int32_t)arg1.v[0]) & ((int32_t)arg2.v[0]), ~((int32_t)arg1.v[1]) & ((int32_t)arg2.v[1]),
-                              ~((int32_t)arg1.v[2]) & ((int32_t)arg2.v[2]), ~((int32_t)arg1.v[3]) & ((int32_t)arg2.v[3]) } };
-    return tfSimd4iToSimd4f(result);
+static inline Tsimd_f32x4_t tfSimdOr_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+    return { 
+        (float)(((int32_t)arg1.v[0]) | ((int32_t)arg1.v[0])), 
+        (float)(((int32_t)arg1.v[1]) | ((int32_t)arg1.v[1])), 
+        (float)(((int32_t)arg1.v[2]) | ((int32_t)arg1.v[2])), 
+        (float)(((int32_t)arg1.v[3]) | ((int32_t)arg1.v[3])) };
 }
-inline TSimdFloat32x4 tfSimd4fOr(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
-    TSimdInt32x4 result = { { ((int32_t)arg1.v[0]) | ((int32_t)arg2.v[0]), ((int32_t)arg1.v[1]) | ((int32_t)arg2.v[1]),
-                              ((int32_t)arg1.v[2]) | ((int32_t)arg2.v[2]), ((int32_t)arg1.v[3]) | ((int32_t)arg2.v[3]) } };
-    return tfSimd4iToSimd4f(result);
+static inline Tsimd_f32x4_t tfSimdXor_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+    return { 
+        (float)(((int32_t)arg1.v[0]) ^ ((int32_t)arg1.v[0])), 
+        (float)(((int32_t)arg1.v[1]) ^ ((int32_t)arg1.v[1])), 
+        (float)(((int32_t)arg1.v[2]) ^ ((int32_t)arg1.v[2])), 
+        (float)(((int32_t)arg1.v[3]) ^ ((int32_t)arg1.v[3])) };
 }
-inline TSimdFloat32x4 tfSimd4fXor(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
-    TSimdInt32x4 result = { { ((int32_t)arg1.v[0]) ^ ((int32_t)arg2.v[0]), ((int32_t)arg1.v[1]) ^ ((int32_t)arg2.v[1]),
-                              ((int32_t)arg1.v[2]) ^ ((int32_t)arg2.v[2]), ((int32_t)arg1.v[3]) ^ ((int32_t)arg2.v[3]) } };
-    return tfSimd4iToSimd4f(result);
+
+static inline Tsimd_f32x4_t tfSimdCmpEq_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+    return tfSimd_i32x4_To_f32x4({ (arg1.v[0] == arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, 
+             (arg1.v[1] == arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
+             (arg1.v[2] == arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000, 
+             (arg1.v[3] == arg2.v[3]) ? (int32_t)0xFFFFFFFF : 0x00000000 });
+}
+static inline Tsimd_f32x4_t tfSimdCmpNeq_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+    return tfSimd_i32x4_To_f32x4({ (arg1.v[0] != arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, 
+             (arg1.v[1] != arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
+             (arg1.v[2] != arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000, 
+             (arg1.v[3] != arg2.v[3]) ? (int32_t)0xFFFFFFFF : 0x00000000 });
+}
+static inline Tsimd_f32x4_t tfSimdCmpGt_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+    return tfSimd_i32x4_To_f32x4({ 
+             (arg1.v[0] > arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, 
+             (arg1.v[1] > arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
+             (arg1.v[2] > arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000, 
+             (arg1.v[3] > arg2.v[3]) ? (int32_t)0xFFFFFFFF : 0x00000000 });
+}
+static inline Tsimd_f32x4_t tfSimdCmpGtEq_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+    return tfSimd_i32x4_To_f32x4({ (arg1.v[0] >= arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, 
+             (arg1.v[1] >= arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
+             (arg1.v[2] >= arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000, 
+             (arg1.v[3] >= arg2.v[3]) ? (int32_t)0xFFFFFFFF : 0x00000000 });
+}
+static inline Tsimd_f32x4_t tfSimdCmpLt_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+    return tfSimd_i32x4_To_f32x4({ 
+        (arg1.v[0] < arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, 
+        (arg1.v[1] < arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
+        (arg1.v[2] < arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000, 
+        (arg1.v[3] < arg2.v[3]) ? (int32_t)0xFFFFFFFF : 0x00000000 });
+}
+static inline Tsimd_f32x4_t tfSimdCmpLtEq_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+    return tfSimd_i32x4_To_f32x4({ 
+        (arg1.v[0] <= arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, 
+        (arg1.v[1] <= arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
+        (arg1.v[2] <= arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000, 
+        (arg1.v[3] <= arg2.v[3]) ? (int32_t)0xFFFFFFFF : 0x00000000 });
 }
 
-inline TSimdFloat32x4 tfSimd4fFloor(TSimdFloat32x4 value) {
-    return { { floorf(value.v[0]), floorf(value.v[1]), floorf(value.v[2]), floorf(value.v[3]) } };
+static inline bool tfSimdCmpAllEq_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2){
+    return arg1.v[0] == arg2.v[0] && arg1.v[1] == arg2.v[1] && arg1.v[2] == arg2.v[2] && arg1.v[3] == arg2.v[3];
 }
-inline TSimdFloat32x4 tfSimd4fCeil(TSimdFloat32x4 value) {
-    return { { ceilf(value.v[0]), ceilf(value.v[1]), ceilf(value.v[2]), ceilf(value.v[3]) } };
+static inline bool tfSimdCmpAllNeq_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2){
+    return arg1.v[0] != arg2.v[0] && arg1.v[1] != arg2.v[1] && arg1.v[2] != arg2.v[2] && arg1.v[3] != arg2.v[3];
 }
-inline TSimdFloat32x4 tfSimd4fRound(TSimdFloat32x4 value) {
-    // While 'roundf' may seem the obvious choice here, it rounds halfway cases
-    // away from zero regardless of the current rounding mode, but 'rintf' uses
-    // the current rounding mode which is consistent with other implementations.
-    return { { rintf(value.v[0]), rintf(value.v[1]), rintf(value.v[2]), rintf(value.v[3]) } };
+static inline bool tfSimdCmpAllGt_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+    return arg1.v[0] > arg2.v[0] && arg1.v[1] > arg2.v[1] && arg1.v[2] > arg2.v[2] && arg1.v[3] > arg2.v[3];
 }
-inline TSimdFloat32x4 tfSimd4fTruncate(TSimdFloat32x4 value) { return tfSimd4iToSimd4f(tfSimd4fToSimd4i(value)); }
-inline TSimdFloat32x4 tfSimd4fMin(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
-    return { { fminf(arg1.v[0], arg2.v[0]), fminf(arg1.v[1], arg2.v[1]), fminf(arg1.v[2], arg2.v[2]), fminf(arg1.v[3], arg2.v[3]) } };
+static inline bool tfSimdCmpAllGtEq_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2){
+    return arg1.v[0] >= arg2.v[0] && arg1.v[1] >= arg2.v[1] && arg1.v[2] >= arg2.v[2] && arg1.v[3] >= arg2.v[3];
 }
-inline TSimdFloat32x4 tfSimd4fMax(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
-    return { { fmaxf(arg1.v[0], arg2.v[0]), fmaxf(arg1.v[1], arg2.v[1]), fmaxf(arg1.v[2], arg2.v[2]), fmaxf(arg1.v[3], arg2.v[3]) } };
+static inline bool tfSimdCmpAllLt_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+    return arg1.v[0] < arg2.v[0] && arg1.v[1] < arg2.v[1] && arg1.v[2] < arg2.v[2] && arg1.v[3] < arg2.v[3];
 }
-inline TSimdFloat32x4 tfSimd4fClamp(TSimdFloat32x4 value, TSimdFloat32x4 min, TSimdFloat32x4 max) {
-    return tfSimd4fMax(min, tfSimd4fMin(value, max));
+static inline bool tfSimdCmpAllLtEq_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2){
+    return arg1.v[0] <= arg2.v[0] && arg1.v[1] <= arg2.v[1] && arg1.v[2] <= arg2.v[2] && arg1.v[3] <= arg2.v[3];
 }
 
-inline TSimdInt32x4 tfSimd4fToSimd4i(TSimdFloat32x4 value) {
-    return { (int32_t)value.v[0], (int32_t)value.v[1], (int32_t)value.v[2], (int32_t)value.v[3] };
-}
 
-inline TSimdFloat32x4 tfSimd4iToSimd4f(TSimdInt32x4 value) {
-    return { (float)value.v[0], (float)value.v[1], (float)value.v[2], (float)value.v[3] };
-}
 
-inline float tfSimd4fSelectIndex0(TSimdFloat32x4 value) { return value.v[0]; }
-inline float tfSimd4fSelectIndex1(TSimdFloat32x4 value) { return value.v[1]; }
-inline float tfSimd4fSelectIndex2(TSimdFloat32x4 value) { return value.v[2]; }
-inline float tfSimd4fSelectIndex3(TSimdFloat32x4 value) { return value.v[3]; }
+// Tsimd_i32x4_t
+static inline Tsimd_i32x4_t tfSimdLoad_i32x4(int32_t x, int32_t y, int32_t z, int32_t w) { return { x, y, z, w }; }
+static inline Tsimd_i32x4_t tfSimdSplat_i32x4(int32_t value) { return { value, value, value, value }; }
 
-inline TSimdFloat32x4 tfSimd4fAdd(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
-    return {
-        arg1.v[0] + arg2.v[0],
-        arg1.v[1] + arg2.v[1],
-        arg1.v[2] + arg2.v[2],
-        arg1.v[3] + arg2.v[3],
-    };
+static inline Tsimd_i32x4_t tfSimdSplat0_i32x4(Tsimd_i32x4_t value){ return { value.v[0], value.v[0], value.v[0], value.v[0] }; } 
+static inline Tsimd_i32x4_t tfSimdSplat1_i32x4(Tsimd_i32x4_t value){ return { value.v[1], value.v[1], value.v[1], value.v[1] }; } 
+static inline Tsimd_i32x4_t tfSimdSplat2_i32x4(Tsimd_i32x4_t value){ return { value.v[2], value.v[2], value.v[2], value.v[2] }; } 
+static inline Tsimd_i32x4_t tfSimdSplat3_i32x4(Tsimd_i32x4_t value){ return { value.v[3], value.v[3], value.v[3], value.v[3] }; }
+
+static inline int32_t tfSimdSelect_i32x4(Tsimd_i32x4_t value, int index) {
+    ASSERT(index < 4);
+    return value.v[index];
+}
+static inline int32_t tfSimdSelect0_i32x4(Tsimd_i32x4_t value) { return value.v[0]; }
+static inline int32_t tfSimdSelect1_i32x4(Tsimd_i32x4_t value) { return value.v[1]; }
+static inline int32_t tfSimdSelect2_i32x4(Tsimd_i32x4_t value) { return value.v[2]; }
+static inline int32_t tfSimdSelect3_i32x4(Tsimd_i32x4_t value) { return value.v[3]; }
+
+static inline Tsimd_i32x4_t tfSimdAdd_i32x4(Tsimd_i32x4_t a, Tsimd_i32x4_t b) {
+    return { a.v[0] + b.v[0], a.v[1] + b.v[1], a.v[2] + b.v[2], a.v[3] + b.v[3] };
 }
-inline TSimdFloat32x4 tfSimd4fSub(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
-    return {
-        arg1.v[0] - arg2.v[0],
-        arg1.v[1] - arg2.v[1],
-        arg1.v[2] - arg2.v[2],
-        arg1.v[3] - arg2.v[3],
-    };
+static inline Tsimd_i32x4_t tfSimdMul_i32x4(Tsimd_i32x4_t a, Tsimd_i32x4_t b) {
+    return { a.v[0] * b.v[0], a.v[1] * b.v[1], a.v[2] * b.v[2], a.v[3] * b.v[3] };
 }
-inline TSimdFloat32x4 tfSimd4fMul(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
+static inline Tsimd_i32x4_t tfSimdAbs_i32x4(Tsimd_i32x4_t a) {
     return {
-        arg1.v[0] * arg2.v[0],
-        arg1.v[1] * arg2.v[1],
-        arg1.v[2] * arg2.v[2],
-        arg1.v[3] * arg2.v[3],
+        abs(a.v[0]),
+        abs(a.v[1]),
+        abs(a.v[2]),
+        abs(a.v[3]),
     };
 }
-inline TSimdFloat32x4 tfSimd4fMadd(TSimdFloat32x4 mul1, TSimdFloat32x4 mul2, TSimdFloat32x4 add) {
-    return tfSimd4fAdd(tfSimd4fMul(mul1, mul2), add);
+static inline Tsimd_i32x4_t tfSimdMadd_i32x4(Tsimd_i32x4_t a, Tsimd_i32x4_t b, Tsimd_i32x4_t c) {
+    return { (a.v[0] * b.v[0]) + c.v[0], (a.v[1] * b.v[1]) + c.v[1], (a.v[2] * b.v[2]) + c.v[2], (a.v[3] * b.v[3]) + +c.v[3] };
 }
 
-inline TSimdFloat32x4 tfSimd4fDiv(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
-    return {
-        arg1.v[0] / arg2.v[0],
-        arg1.v[1] / arg2.v[1],
-        arg1.v[2] / arg2.v[2],
-        arg1.v[3] / arg2.v[3],
-    };
+static inline Tsimd_i32x4_t tfSimdNot_i32x4(Tsimd_i32x4_t value) {
+    return { ~(int32_t)value.v[0], ~(int32_t)value.v[1], ~(int32_t)value.v[2], ~(int32_t)value.v[3] };
 }
-
-inline TSimdFloat32x4 tfSimd4fAbs(TSimdFloat32x4 value) {
-    return {
-        abs(value.v[0]),
-        abs(value.v[1]),
-        abs(value.v[2]),
-        abs(value.v[3]),
-    };
+static inline Tsimd_i32x4_t tfSimdAnd_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+    return { arg1.v[0] & arg2.v[0], arg1.v[1] & arg2.v[1], arg1.v[2] & arg2.v[2], arg1.v[3] & arg2.v[3] };
+}
+static inline Tsimd_i32x4_t tfSimdAndNot_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+    return { ~arg1.v[0] & arg2.v[0], ~arg1.v[1] & arg2.v[1], ~arg1.v[2] & arg2.v[2], ~arg1.v[3] & arg2.v[3] };
+}
+static inline Tsimd_i32x4_t tfSimdOr_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+    return { arg1.v[0] | arg2.v[0], arg1.v[1] | arg2.v[1], arg1.v[2] | arg2.v[2], arg1.v[3] | arg2.v[3] };
+}
+static inline Tsimd_i32x4_t tfSimdXor_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+    return { arg1.v[0] ^ arg2.v[0], arg1.v[1] ^ arg2.v[1], arg1.v[2] ^ arg2.v[2], arg1.v[3] ^ arg2.v[3] };
 }
-inline TSimdFloat32x4 tfSimdFloat4x32Load(float x, float y, float z, float w) { return { x, y, z, w }; }
-inline TSimdInt32x4 tfSimdInt4x32Load(int32_t x, int32_t y, int32_t z, int32_t w) { return { x, y, z, w }; }
-
-inline TSimdFloat32x2 tfSimd4fToSimd2f(TSimdFloat32x4 value) { return { value.v[0], value.v[1] }; }
-inline TSimdFloat32x3 tfSimd4fToSimd3f(TSimdFloat32x4 value) { return { value.v[0], value.v[1], value.v[2] }; }
-
-inline TSimdFloat32x4 tfSimd4fSplatIndex0(TSimdFloat32x4 value) { return { value.v[0], value.v[0], value.v[0], value.v[0] }; }
-inline TSimdFloat32x4 tfSimd4fSplatIndex1(TSimdFloat32x4 value) { return { value.v[1], value.v[1], value.v[1], value.v[1] }; }
-inline TSimdFloat32x4 tfSimd4fSplatIndex2(TSimdFloat32x4 value) { return { value.v[2], value.v[2], value.v[2], value.v[2] }; }
-inline TSimdFloat32x4 tfSimd4fSplatIndex3(TSimdFloat32x4 value) { return { value.v[3], value.v[3], value.v[3], value.v[3] }; }
-
-inline TSimdInt32x4 tfSimd4iSplat(int32_t value) { return { value, value, value, value }; }
-inline TSimdFloat32x4 tfSimd4fSplat(float value) { return { value, value, value, value }; }
 
-inline TSimdFloat32x4 tfSimd4fCmpEq(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
-    return { { (arg1.v[0] == arg2.v[0]) ? (float)0xFFFFFFFF : 0x00000000, (arg1.v[1] == arg2.v[1]) ? (float)0xFFFFFFFF : 0x00000000,
-               (arg1.v[2] == arg2.v[2]) ? (float)0xFFFFFFFF : 0x00000000, (arg1.v[3] == arg2.v[3]) ? (float)0xFFFFFFFF : 0x00000000 } };
+static inline Tsimd_i32x4_t tfSimdCmpEq_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+    return { (arg1.v[0] == arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[1] == arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
+             (arg1.v[2] == arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[3] == arg2.v[3]) ? (int32_t)0xFFFFFFFF : 0x00000000 };
 }
-inline TSimdFloat32x4 tfSimd4fCmpNeq(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
-    return { { (arg1.v[0] != arg2.v[0]) ? (float)0xFFFFFFFF : 0x00000000, (arg1.v[1] != arg2.v[1]) ? (float)0xFFFFFFFF : 0x00000000,
-               (arg1.v[2] != arg2.v[2]) ? (float)0xFFFFFFFF : 0x00000000, (arg1.v[3] != arg2.v[3]) ? (float)0xFFFFFFFF : 0x00000000 } };
+static inline Tsimd_i32x4_t tfSimdCmpNeq_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+    return { (arg1.v[0] != arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[1] != arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
+             (arg1.v[2] != arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[3] != arg2.v[3]) ? (int32_t)0xFFFFFFFF : 0x00000000 };
 }
-inline TSimdFloat32x4 tfSimd4fCmpGt(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
-    return { { (arg1.v[0] > arg2.v[0]) ? (float)0xFFFFFFFF : 0x00000000, (arg1.v[1] > arg2.v[1]) ? (float)0xFFFFFFFF : 0x00000000,
-               (arg1.v[2] > arg2.v[2]) ? (float)0xFFFFFFFF : 0x00000000, (arg1.v[3] > arg2.v[3]) ? (float)0xFFFFFFFF : 0x00000000 } };
+static inline Tsimd_i32x4_t tfSimdCmpGt_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+    return { (arg1.v[0] > arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[1] > arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
+             (arg1.v[2] > arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[3] > arg2.v[3]) ? (int32_t)0xFFFFFFFF : 0x00000000 };
 }
-inline TSimdFloat32x4 tfSimd4fCmpGtEq(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
-    return { { (arg1.v[0] >= arg2.v[0]) ? (float)0xFFFFFFFF : 0x00000000, (arg1.v[1] >= arg2.v[1]) ? (float)0xFFFFFFFF : 0x00000000,
-               (arg1.v[2] >= arg2.v[2]) ? (float)0xFFFFFFFF : 0x00000000, (arg1.v[3] >= arg2.v[3]) ? (float)0xFFFFFFFF : 0x00000000 } };
+static inline Tsimd_i32x4_t tfSimdCmpGtEq_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+    return { (arg1.v[0] >= arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[1] >= arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
+             (arg1.v[2] >= arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[3] >= arg2.v[3]) ? (int32_t)0xFFFFFFFF : 0x00000000 };
 }
-inline TSimdFloat32x4 tfSimd4fCmpLt(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
-    return { { (arg1.v[0] < arg2.v[0]) ? (float)0xFFFFFFFF : 0x00000000, (arg1.v[1] < arg2.v[1]) ? (float)0xFFFFFFFF : 0x00000000,
-               (arg1.v[2] < arg2.v[2]) ? (float)0xFFFFFFFF : 0x00000000, (arg1.v[3] < arg2.v[3]) ? (float)0xFFFFFFFF : 0x00000000 } };
+static inline Tsimd_i32x4_t tfSimdCmpLt_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+    return { (arg1.v[0] < arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[1] < arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
+             (arg1.v[2] < arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[3] < arg2.v[3]) ? (int32_t)0xFFFFFFFF : 0x00000000 };
 }
-inline TSimdFloat32x4 tfSimd4fCmpLtEq(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
-    return { { (arg1.v[0] <= arg2.v[0]) ? (float)0xFFFFFFFF : 0x00000000, (arg1.v[1] <= arg2.v[1]) ? (float)0xFFFFFFFF : 0x00000000,
-               (arg1.v[2] <= arg2.v[2]) ? (float)0xFFFFFFFF : 0x00000000, (arg1.v[3] <= arg2.v[3]) ? (float)0xFFFFFFFF : 0x00000000 } };
+static inline Tsimd_i32x4_t tfSimdCmpLtEq_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+    return { (arg1.v[0] <= arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[1] <= arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
+             (arg1.v[2] <= arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[3] <= arg2.v[3]) ? (int32_t)0xFFFFFFFF : 0x00000000 };
 }
 
-inline TSimdInt32x4 tfSimd4iCmpEq(TSimdInt32x4 arg1, TSimdInt32x4 arg2) {
-    return { { (arg1.v[0] == arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[1] == arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
-               (arg1.v[2] == arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[3] == arg2.v[3]) ? (int32_t)0xFFFFFFFF : 0x00000000 } };
+static inline bool tfSimdCmpAllEq_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+    return arg1.v[0] == arg2.v[0] && arg1.v[1] == arg2.v[1] && arg1.v[2] == arg2.v[2] && arg1.v[3] == arg2.v[3];
 }
-inline TSimdInt32x4 tfSimd4iCmpNeq(TSimdInt32x4 arg1, TSimdInt32x4 arg2) {
-    return { { (arg1.v[0] != arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[1] != arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
-               (arg1.v[2] != arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[3] != arg2.v[3]) ? (int32_t)0xFFFFFFFF : 0x00000000 } };
+static inline bool tfSimdCmpAllNeq_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+    return arg1.v[0] != arg2.v[0] && arg1.v[1] != arg2.v[1] && arg1.v[2] != arg2.v[2] && arg1.v[3] != arg2.v[3];
 }
-inline TSimdInt32x4 tfSimd4iCmpGt(TSimdInt32x4 arg1, TSimdInt32x4 arg2) {
-    return { { (arg1.v[0] > arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[1] > arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
-               (arg1.v[2] > arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[3] > arg2.v[3]) ? (int32_t)0xFFFFFFFF : 0x00000000 } };
+static inline bool tfSimdCmpAllGt_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+    return arg1.v[0] > arg2.v[0] && arg1.v[1] > arg2.v[1] && arg1.v[2] > arg2.v[2] && arg1.v[3] > arg2.v[3];
 }
-inline TSimdInt32x4 tfSimd4iCmpGtEq(TSimdInt32x4 arg1, TSimdInt32x4 arg2) {
-    return { { (arg1.v[0] >= arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[1] >= arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
-               (arg1.v[2] >= arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[3] >= arg2.v[3]) ? (int32_t)0xFFFFFFFF : 0x00000000 } };
+static inline bool tfSimdCmpAllGtEq_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+    return arg1.v[0] >= arg2.v[0] && arg1.v[1] >= arg2.v[1] && arg1.v[2] >= arg2.v[2] && arg1.v[3] >= arg2.v[3];
 }
-inline TSimdInt32x4 tfSimd4iCmpLt(TSimdInt32x4 arg1, TSimdInt32x4 arg2) {
-    return { { (arg1.v[0] < arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[1] < arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
-               (arg1.v[2] < arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[3] < arg2.v[3]) ? (int32_t)0xFFFFFFFF : 0x00000000 } };
+static inline bool tfSimdCmpAllLt_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+    return arg1.v[0] < arg2.v[0] && arg1.v[1] < arg2.v[1] && arg1.v[2] < arg2.v[2] && arg1.v[3] < arg2.v[3];
 }
-inline TSimdInt32x4 tfSimd4iCmpLtEq(TSimdInt32x4 arg1, TSimdInt32x4 arg2) {
-    return { { (arg1.v[0] < arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[1] < arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
-               (arg1.v[2] < arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[3] < arg2.v[3]) ? (int32_t)0xFFFFFFFF : 0x00000000 } };
+static inline bool tfSimdCmpAllLtEq_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+    return arg1.v[0] <= arg2.v[0] && arg1.v[1] <= arg2.v[1] && arg1.v[2] <= arg2.v[2] && arg1.v[3] <= arg2.v[3];
 }
 
-inline bool tfSimd4fCmpAllLt(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
-    for (int i = 0; i < 4; i++) {
-        if (arg1.v[i] >= arg2.v[i]) {
-            return false;
-        }
-    }
-    return true;
+static inline Tsimd_i32x4_t tfSimd_f32x4_To_i32x4(Tsimd_f32x4_t a) {
+    union int_float {
+        Tsimd_f32x4_t a; 
+        Tsimd_i32x4_t b; 
+    } conversion;
+    conversion.a = a;
+    COMPILE_ASSERT(sizeof(Tsimd_f32x4_t) == sizeof(Tsimd_i32x4_t));
+    return conversion.b;
+}
+static inline Tsimd_f32x4_t tfSimd_i32x4_To_f32x4(Tsimd_i32x4_t a) {
+    union int_float {
+        Tsimd_f32x4_t a; 
+        Tsimd_i32x4_t b; 
+    } conversion;
+    conversion.b = a;
+    COMPILE_ASSERT(sizeof(Tsimd_f32x4_t) == sizeof(Tsimd_i32x4_t));
+    return conversion.a;
 }
 
-inline bool tfSimd4fCmpAllGt(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
-    for (int i = 0; i < 4; i++) {
-        if (arg1.v[i] <= arg2.v[i]) {
-            return false;
-        }
-    }
-    return true;
-}
 
-inline bool tfSimd4fCmpAllEq(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
-    for (int i = 0; i < 4; i++) {
-        if (arg1.v[i] != arg2.v[i]) {
-            return false;
-        }
-    }
-    return true;
-}
-
-inline bool tfSimd4iCmpAllEq(TSimdInt32x4 arg1, TSimdInt32x4 arg2) {
-    for (int i = 0; i < 4; i++) {
-        if (arg1.v[i] != arg2.v[i]) {
-            return false;
-        }
-    }
-    return true;
-}
+//static inline Tsimd_f32x4_t tfS32x4FReplaceIndex0ByValue(Tsimd_f32x4_t input, float value) {
+//    return { value, input.v[1], input.v[2], input.v[3] };
+//}
+//static inline Tsimd_f32x4_t tfS32x4FReplaceIndex1ByValue(Tsimd_f32x4_t input, float value) {
+//    return { input.v[0], value, input.v[2], input.v[3] };
+//}
+//static inline Tsimd_f32x4_t tfS32x4FReplaceIndex2ByValue(Tsimd_f32x4_t input, float value) {
+//    return { input.v[0], input.v[1], value, input.v[3] };
+//}
+//static inline Tsimd_f32x4_t tfS32x4FReplaceIndex3ByValue(Tsimd_f32x4_t input, float value) {
+//    return { input.v[0], input.v[1], input.v[2], value };
+//}
+//
+//inline Tsimd_i32x4_t Tsimd_i32x4_tSelect(Tsimd_i32x4_t arg0, Tsimd_i32x4_t arg1, Tsimd_i32x4_t mask) {
+//    return { (mask.v[0] == 0) ? arg0.v[0] : arg1.v[0], (mask.v[1] == 0) ? arg0.v[1] : arg1.v[1], (mask.v[2] == 0) ? arg0.v[2] : arg1.v[2],
+//             (mask.v[3] == 0) ? arg0.v[3] : arg1.v[3] };
+//}
+//inline Tsimd_f32x4_t tfS32x4FSelect(Tsimd_f32x4_t arg0, Tsimd_f32x4_t arg1, Tsimd_f32x4_t mask) {
+//    Tsimd_i32x4_t intMask = tfS32x4FToS32x4I(mask);
+//    return { (intMask.v[0] == 0) ? arg0.v[0] : arg1.v[0], (intMask.v[1] == 0) ? arg0.v[1] : arg1.v[1],
+//             (intMask.v[2] == 0) ? arg0.v[2] : arg1.v[2], (intMask.v[3] == 0) ? arg0.v[3] : arg1.v[3] };
+//}
+//
+//inline Tsimd_f32x4_t tfS32x4FZero() { return { 0, 0, 0, 0 }; }
+//inline Tsimd_i32x4_t   Tsimd_i32x4_tZero() { return { 0, 0, 0, 0 }; }
+//
+//inline Tsimd_i32x4_t Tsimd_i32x4_tNot(Tsimd_i32x4_t value) { return { ~value.v[0], ~value.v[1], ~value.v[2], ~value.v[3] }; }
+//inline Tsimd_i32x4_t Tsimd_i32x4_tAnd(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+//    return { arg1.v[0] & arg2.v[0], arg1.v[1] & arg2.v[1], arg1.v[2] & arg2.v[2], arg1.v[3] & arg2.v[3] };
+//}
+//inline Tsimd_i32x4_t Tsimd_i32x4_tAndNot(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+//    return { ~arg1.v[0] & arg2.v[0], ~arg1.v[1] & arg2.v[1], ~arg1.v[2] & arg2.v[2], ~arg1.v[3] & arg2.v[3] };
+//}
+//inline Tsimd_i32x4_t Tsimd_i32x4_tOr(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+//    return { arg1.v[0] | arg2.v[0], arg1.v[1] | arg2.v[1], arg1.v[2] | arg2.v[2], arg1.v[3] | arg2.v[3] };
+//}
+//inline Tsimd_i32x4_t Tsimd_i32x4_tXor(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+//    return { arg1.v[0] ^ arg2.v[0], arg1.v[1] ^ arg2.v[1], arg1.v[2] ^ arg2.v[2], arg1.v[3] ^ arg2.v[3] };
+//}
+//
+//inline Tsimd_f32x4_t tfS32x4FNot(Tsimd_f32x4_t value) {
+//    Tsimd_i32x4_t result = { { ~((int32_t)value.v[0]), ~((int32_t)value.v[1]), ~((int32_t)value.v[2]), ~((int32_t)value.v[3]) } };
+//    return Tsimd_i32x4_tToSimd4f(result);
+//}
+//inline Tsimd_f32x4_t tfS32x4FAnd(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+//    Tsimd_i32x4_t result = { ((int32_t)arg1.v[0]) & ((int32_t)arg2.v[0]), ((int32_t)arg1.v[1]) & ((int32_t)arg2.v[1]),
+//                            ((int32_t)arg1.v[2]) & ((int32_t)arg2.v[2]), ((int32_t)arg1.v[3]) & ((int32_t)arg2.v[3]) };
+//    return Tsimd_i32x4_tToSimd4f(result);
+//}
+//inline Tsimd_f32x4_t tfS32x4FAndNot(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+//    Tsimd_i32x4_t result = { { ~((int32_t)arg1.v[0]) & ((int32_t)arg2.v[0]), ~((int32_t)arg1.v[1]) & ((int32_t)arg2.v[1]),
+//                              ~((int32_t)arg1.v[2]) & ((int32_t)arg2.v[2]), ~((int32_t)arg1.v[3]) & ((int32_t)arg2.v[3]) } };
+//    return Tsimd_i32x4_tToSimd4f(result);
+//}
+//inline Tsimd_f32x4_t tfS32x4FOr(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+//    Tsimd_i32x4_t result = { { ((int32_t)arg1.v[0]) | ((int32_t)arg2.v[0]), ((int32_t)arg1.v[1]) | ((int32_t)arg2.v[1]),
+//                              ((int32_t)arg1.v[2]) | ((int32_t)arg2.v[2]), ((int32_t)arg1.v[3]) | ((int32_t)arg2.v[3]) } };
+//    return Tsimd_i32x4_tToSimd4f(result);
+//}
+//inline Tsimd_f32x4_t tfS32x4FXor(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+//    Tsimd_i32x4_t result = { { ((int32_t)arg1.v[0]) ^ ((int32_t)arg2.v[0]), ((int32_t)arg1.v[1]) ^ ((int32_t)arg2.v[1]),
+//                              ((int32_t)arg1.v[2]) ^ ((int32_t)arg2.v[2]), ((int32_t)arg1.v[3]) ^ ((int32_t)arg2.v[3]) } };
+//    return Tsimd_i32x4_tToSimd4f(result);
+//}
+//
+//inline Tsimd_f32x4_t tfS32x4FFloor(Tsimd_f32x4_t value) {
+//    return { { floorf(value.v[0]), floorf(value.v[1]), floorf(value.v[2]), floorf(value.v[3]) } };
+//}
+//inline Tsimd_f32x4_t tfS32x4FCeil(Tsimd_f32x4_t value) {
+//    return { { ceilf(value.v[0]), ceilf(value.v[1]), ceilf(value.v[2]), ceilf(value.v[3]) } };
+//}
+//inline Tsimd_f32x4_t tfS32x4FRound(Tsimd_f32x4_t value) {
+//    // While 'roundf' may seem the obvious choice here, it rounds halfway cases
+//    // away from zero regardless of the current rounding mode, but 'rintf' uses
+//    // the current rounding mode which is consistent with other implementations.
+//    return { { rintf(value.v[0]), rintf(value.v[1]), rintf(value.v[2]), rintf(value.v[3]) } };
+//}
+//inline Tsimd_f32x4_t tfS32x4FTruncate(Tsimd_f32x4_t value) { return Tsimd_i32x4_tToSimd4f(tfS32x4FToS32x4I(value)); }
+//inline Tsimd_f32x4_t tfS32x4FMin(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+//    return { { fminf(arg1.v[0], arg2.v[0]), fminf(arg1.v[1], arg2.v[1]), fminf(arg1.v[2], arg2.v[2]), fminf(arg1.v[3], arg2.v[3]) } };
+//}
+//inline Tsimd_f32x4_t tfS32x4FMax(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+//    return { { fmaxf(arg1.v[0], arg2.v[0]), fmaxf(arg1.v[1], arg2.v[1]), fmaxf(arg1.v[2], arg2.v[2]), fmaxf(arg1.v[3], arg2.v[3]) } };
+//}
+//inline Tsimd_f32x4_t tfS32x4FClamp(Tsimd_f32x4_t value, Tsimd_f32x4_t min, Tsimd_f32x4_t max) {
+//    return tfS32x4FMax(min, tfS32x4FMin(value, max));
+//}
+//
+//inline Tsimd_i32x4_t tfS32x4FToS32x4I(Tsimd_f32x4_t value) {
+//    return { (int32_t)value.v[0], (int32_t)value.v[1], (int32_t)value.v[2], (int32_t)value.v[3] };
+//}
+//
+//inline Tsimd_f32x4_t Tsimd_i32x4_tToSimd4f(Tsimd_i32x4_t value) {
+//    return { (float)value.v[0], (float)value.v[1], (float)value.v[2], (float)value.v[3] };
+//}
+//
+//inline float tfS32x4FSelectIndex0(Tsimd_f32x4_t value) { return value.v[0]; }
+//inline float tfS32x4FSelectIndex1(Tsimd_f32x4_t value) { return value.v[1]; }
+//inline float tfS32x4FSelectIndex2(Tsimd_f32x4_t value) { return value.v[2]; }
+//inline float tfS32x4FSelectIndex3(Tsimd_f32x4_t value) { return value.v[3]; }
+//
+//inline Tsimd_f32x4_t tfS32x4FAdd(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+//    return {
+//        arg1.v[0] + arg2.v[0],
+//        arg1.v[1] + arg2.v[1],
+//        arg1.v[2] + arg2.v[2],
+//        arg1.v[3] + arg2.v[3],
+//    };
+//}
+//inline Tsimd_f32x4_t tfS32x4FSub(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+//    return {
+//        arg1.v[0] - arg2.v[0],
+//        arg1.v[1] - arg2.v[1],
+//        arg1.v[2] - arg2.v[2],
+//        arg1.v[3] - arg2.v[3],
+//    };
+//}
+//inline Tsimd_f32x4_t tfS32x4FMul(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+//    return {
+//        arg1.v[0] * arg2.v[0],
+//        arg1.v[1] * arg2.v[1],
+//        arg1.v[2] * arg2.v[2],
+//        arg1.v[3] * arg2.v[3],
+//    };
+//}
+//inline Tsimd_f32x4_t tfS32x4FMadd(Tsimd_f32x4_t mul1, Tsimd_f32x4_t mul2, Tsimd_f32x4_t add) {
+//    return tfS32x4FAdd(tfS32x4FMul(mul1, mul2), add);
+//}
+//
+//inline Tsimd_f32x4_t tfS32x4FDiv(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+//    return {
+//        arg1.v[0] / arg2.v[0],
+//        arg1.v[1] / arg2.v[1],
+//        arg1.v[2] / arg2.v[2],
+//        arg1.v[3] / arg2.v[3],
+//    };
+//}
+//
+//inline Tsimd_f32x4_t tfS32x4FAbs(Tsimd_f32x4_t value) {
+//    return {
+//        abs(value.v[0]),
+//        abs(value.v[1]),
+//        abs(value.v[2]),
+//        abs(value.v[3]),
+//    };
+//}
+//inline Tsimd_f32x4_t tfS32x4FLoad(float x, float y, float z, float w) { return { x, y, z, w }; }
+//inline Tsimd_i32x4_t tfSimdInt4x32Load(int32_t x, int32_t y, int32_t z, int32_t w) { return { x, y, z, w }; }
+//
+//inline Tsimd_f32x2_t tfS32x4FToS32x2F(Tsimd_f32x4_t value) { return { value.v[0], value.v[1] }; }
+//inline Tsimd_f32x3_t tfS32x4FToS32x3F(Tsimd_f32x4_t value) { return { value.v[0], value.v[1], value.v[2] }; }
+//
+//inline Tsimd_f32x4_t tfS32x4FSplatIndex0(Tsimd_f32x4_t value) { return { value.v[0], value.v[0], value.v[0], value.v[0] }; }
+//inline Tsimd_f32x4_t tfS32x4FSplatIndex1(Tsimd_f32x4_t value) { return { value.v[1], value.v[1], value.v[1], value.v[1] }; }
+//inline Tsimd_f32x4_t tfS32x4FSplatIndex2(Tsimd_f32x4_t value) { return { value.v[2], value.v[2], value.v[2], value.v[2] }; }
+//inline Tsimd_f32x4_t tfS32x4FSplatIndex3(Tsimd_f32x4_t value) { return { value.v[3], value.v[3], value.v[3], value.v[3] }; }
+//
+//inline Tsimd_i32x4_t Tsimd_i32x4_tSplat(int32_t value) { return { value, value, value, value }; }
+//inline Tsimd_f32x4_t tfS32x4FSplat(float value) { return { value, value, value, value }; }
+//
+//inline Tsimd_f32x4_t tfS32x4FCmpEq(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+//    return { { (arg1.v[0] == arg2.v[0]) ? (float)0xFFFFFFFF : 0x00000000, (arg1.v[1] == arg2.v[1]) ? (float)0xFFFFFFFF : 0x00000000,
+//               (arg1.v[2] == arg2.v[2]) ? (float)0xFFFFFFFF : 0x00000000, (arg1.v[3] == arg2.v[3]) ? (float)0xFFFFFFFF : 0x00000000 } };
+//}
+//inline Tsimd_f32x4_t tfS32x4FCmpNeq(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+//    return { { (arg1.v[0] != arg2.v[0]) ? (float)0xFFFFFFFF : 0x00000000, (arg1.v[1] != arg2.v[1]) ? (float)0xFFFFFFFF : 0x00000000,
+//               (arg1.v[2] != arg2.v[2]) ? (float)0xFFFFFFFF : 0x00000000, (arg1.v[3] != arg2.v[3]) ? (float)0xFFFFFFFF : 0x00000000 } };
+//}
+//inline Tsimd_f32x4_t tfS32x4FCmpGt(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+//    return { { (arg1.v[0] > arg2.v[0]) ? (float)0xFFFFFFFF : 0x00000000, (arg1.v[1] > arg2.v[1]) ? (float)0xFFFFFFFF : 0x00000000,
+//               (arg1.v[2] > arg2.v[2]) ? (float)0xFFFFFFFF : 0x00000000, (arg1.v[3] > arg2.v[3]) ? (float)0xFFFFFFFF : 0x00000000 } };
+//}
+//inline Tsimd_f32x4_t tfS32x4FCmpGtEq(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+//    return { { (arg1.v[0] >= arg2.v[0]) ? (float)0xFFFFFFFF : 0x00000000, (arg1.v[1] >= arg2.v[1]) ? (float)0xFFFFFFFF : 0x00000000,
+//               (arg1.v[2] >= arg2.v[2]) ? (float)0xFFFFFFFF : 0x00000000, (arg1.v[3] >= arg2.v[3]) ? (float)0xFFFFFFFF : 0x00000000 } };
+//}
+//inline Tsimd_f32x4_t tfS32x4FCmpLt(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+//    return { { (arg1.v[0] < arg2.v[0]) ? (float)0xFFFFFFFF : 0x00000000, (arg1.v[1] < arg2.v[1]) ? (float)0xFFFFFFFF : 0x00000000,
+//               (arg1.v[2] < arg2.v[2]) ? (float)0xFFFFFFFF : 0x00000000, (arg1.v[3] < arg2.v[3]) ? (float)0xFFFFFFFF : 0x00000000 } };
+//}
+//inline Tsimd_f32x4_t tfS32x4FCmpLtEq(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+//    return { { (arg1.v[0] <= arg2.v[0]) ? (float)0xFFFFFFFF : 0x00000000, (arg1.v[1] <= arg2.v[1]) ? (float)0xFFFFFFFF : 0x00000000,
+//               (arg1.v[2] <= arg2.v[2]) ? (float)0xFFFFFFFF : 0x00000000, (arg1.v[3] <= arg2.v[3]) ? (float)0xFFFFFFFF : 0x00000000 } };
+//}
+//
+//inline Tsimd_i32x4_t Tsimd_i32x4_tCmpEq(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+//    return { { (arg1.v[0] == arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[1] == arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
+//               (arg1.v[2] == arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[3] == arg2.v[3]) ? (int32_t)0xFFFFFFFF : 0x00000000 } };
+//}
+//inline Tsimd_i32x4_t Tsimd_i32x4_tCmpNeq(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+//    return { { (arg1.v[0] != arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[1] != arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
+//               (arg1.v[2] != arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[3] != arg2.v[3]) ? (int32_t)0xFFFFFFFF : 0x00000000 } };
+//}
+//inline Tsimd_i32x4_t Tsimd_i32x4_tCmpGt(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+//    return { { (arg1.v[0] > arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[1] > arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
+//               (arg1.v[2] > arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[3] > arg2.v[3]) ? (int32_t)0xFFFFFFFF : 0x00000000 } };
+//}
+//inline Tsimd_i32x4_t Tsimd_i32x4_tCmpGtEq(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+//    return { { (arg1.v[0] >= arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[1] >= arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
+//               (arg1.v[2] >= arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[3] >= arg2.v[3]) ? (int32_t)0xFFFFFFFF : 0x00000000 } };
+//}
+//inline Tsimd_i32x4_t Tsimd_i32x4_tCmpLt(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+//    return { { (arg1.v[0] < arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[1] < arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
+//               (arg1.v[2] < arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[3] < arg2.v[3]) ? (int32_t)0xFFFFFFFF : 0x00000000 } };
+//}
+//inline Tsimd_i32x4_t Tsimd_i32x4_tCmpLtEq(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+//    return { { (arg1.v[0] < arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[1] < arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
+//               (arg1.v[2] < arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000, (arg1.v[3] < arg2.v[3]) ? (int32_t)0xFFFFFFFF : 0x00000000 } };
+//}
+//
+//inline bool tfS32x4FCmpAllLt(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+//    for (int i = 0; i < 4; i++) {
+//        if (arg1.v[i] >= arg2.v[i]) {
+//            return false;
+//        }
+//    }
+//    return true;
+//}
+//
+//inline bool tfS32x4FCmpAllGt(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+//    for (int i = 0; i < 4; i++) {
+//        if (arg1.v[i] <= arg2.v[i]) {
+//            return false;
+//        }
+//    }
+//    return true;
+//}
+//
+//inline bool tfS32x4FCmpAllEq(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+//    for (int i = 0; i < 4; i++) {
+//        if (arg1.v[i] != arg2.v[i]) {
+//            return false;
+//        }
+//    }
+//    return true;
+//}
+//
+//inline bool Tsimd_i32x4_tCmpAllEq(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+//    for (int i = 0; i < 4; i++) {
+//        if (arg1.v[i] != arg2.v[i]) {
+//            return false;
+//        }
+//    }
+//    return true;
+//}
diff --git a/Forge/Math/Internal/TF_Simd32x4_sse.inl b/Forge/Math/Internal/TF_Simd32x4_sse.inl
index 674226a58d..2fe489b8d4 100644
--- a/Forge/Math/Internal/TF_Simd32x4_sse.inl
+++ b/Forge/Math/Internal/TF_Simd32x4_sse.inl
@@ -4,123 +4,291 @@
 #include "../TF_Simd32x4.h"
 #endif
 
-static inline TSimdFloat32x4 tfSimd4fReplaceIndex0ByValue(TSimdFloat32x4 input, float value) {
-    return _mm_blend_ps(input, tfSimd4fSplat(value), 0b0001);
-}
-static inline TSimdFloat32x4 tfSimd4fReplaceIndex1ByValue(TSimdFloat32x4 input, float value) {
-    return _mm_blend_ps(input, tfSimd4fSplat(value), 0b0010);
-}
-static inline TSimdFloat32x4 tfSimd4fReplaceIndex2ByValue(TSimdFloat32x4 input, float value) {
-    return _mm_blend_ps(input, tfSimd4fSplat(value), 0b0100);
-}
-static inline TSimdFloat32x4 tfSimd4fReplaceIndex3ByValue(TSimdFloat32x4 input, float value) {
-    return _mm_blend_ps(input, tfSimd4fSplat(value), 0b1000);
-}
+// Tsimd_f32x4_t
+static inline Tsimd_f32x4_t tfSimdLoad_f32x4(float x, float y, float z, float w) { return _mm_set_ps(w, z, y, x); }
+static inline Tsimd_f32x4_t tfSimdZero_f32x4() { return _mm_setzero_ps(); }
+static inline Tsimd_f32x4_t tfSimdSplat_f32x4(float value) { return _mm_set1_ps(value); }
 
-inline TSimdInt32x4   tfSimd4iSelect(TSimdInt32x4 arg0, TSimdInt32x4 arg1, TSimdInt32x4 mask) { return _mm_blendv_epi8(arg0, arg1, mask); }
-inline TSimdFloat32x4 tfSimd4fSelect(TSimdFloat32x4 arg0, TSimdFloat32x4 arg1, TSimdFloat32x4 mask) {
-    return _mm_blendv_ps(arg0, arg1, mask);
-}
-
-inline TSimdFloat32x4 tfSimd4fZero() { return _mm_setzero_ps(); }
-inline TSimdInt32x4   tfSimd4iZero() { return _mm_setzero_si128(); }
+static inline Tsimd_f32x4_t tfSimdSplat0_f32x4(Tsimd_f32x4_t value) { return _mm_shuffle_ps(value, value, _MM_SHUFFLE(0, 0, 0, 0)); }
+static inline Tsimd_f32x4_t tfSimdSplat1_f32x4(Tsimd_f32x4_t value) { return _mm_shuffle_ps(value, value, _MM_SHUFFLE(1, 1, 1, 1)); }
+static inline Tsimd_f32x4_t tfSimdSplat2_f32x4(Tsimd_f32x4_t value) { return _mm_shuffle_ps(value, value, _MM_SHUFFLE(2, 2, 2, 2)); };
+static inline Tsimd_f32x4_t tfSimdSplat3_f32x4(Tsimd_f32x4_t value) { return _mm_shuffle_ps(value, value, _MM_SHUFFLE(3, 3, 3, 3)); }
 
-inline TSimdInt32x4 tfSimd4iNot(TSimdInt32x4 value) { return _mm_andnot_si128(value, _mm_set1_epi32(TF_SIMDI_MAX)); }
-inline TSimdInt32x4 tfSimd4iAnd(TSimdInt32x4 arg1, TSimdInt32x4 arg2) { return _mm_and_si128(arg1, arg2); }
-inline TSimdInt32x4 tfSimd4iAndNot(TSimdInt32x4 arg1, TSimdInt32x4 arg2) { return _mm_andnot_si128(arg1, arg2); }
-inline TSimdInt32x4 tfSimd4iOr(TSimdInt32x4 arg1, TSimdInt32x4 arg2) { return _mm_or_si128(arg1, arg2); }
-inline TSimdInt32x4 tfSimd4iXor(TSimdInt32x4 arg1, TSimdInt32x4 arg2) { return _mm_xor_si128(arg1, arg2); }
 
-inline TSimdFloat32x4 tfSimd4fNot(TSimdFloat32x4 value) { return _mm_andnot_ps(value, _mm_set1_ps((float)(TF_SIMDF_MAX))); }
-inline TSimdFloat32x4 tfSimd4fAnd(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) { return _mm_and_ps(arg1, arg2); }
-inline TSimdFloat32x4 tfSimd4fAndNot(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) { return _mm_andnot_ps(arg1, arg2); }
-inline TSimdFloat32x4 tfSimd4fOr(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) { return _mm_or_ps(arg1, arg2); }
-inline TSimdFloat32x4 tfSimd4fXor(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) { return _mm_xor_ps(arg1, arg2); }
+static inline Tsimd_f32x4_t tfSimdDot_f32x4(Tsimd_f32x4_t a,Tsimd_f32x4_t b) {
+    Tsimd_f32x4_t x2 = _mm_mul_ps(a, b);
+    Tsimd_f32x4_t tmp = _mm_hadd_ps(x2, x2);
+    return _mm_hadd_ps(tmp, tmp);
+}
 
-inline TSimdFloat32x4 tfSimd4fFloor(TSimdFloat32x4 value) { return _mm_floor_ps(value); }
-inline TSimdFloat32x4 tfSimd4fCeil(TSimdFloat32x4 value) { return _mm_ceil_ps(value); }
-inline TSimdFloat32x4 tfSimd4fRound(TSimdFloat32x4 value) { return _mm_round_ps(value, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); }
-inline TSimdFloat32x4 tfSimd4fTruncate(TSimdFloat32x4 value) { return tfSimd4iToSimd4f(tfSimd4fToSimd4i(value)); }
-inline TSimdFloat32x4 tfSimd4fMin(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) { return _mm_min_ps(arg1, arg2); }
-inline TSimdFloat32x4 tfSimd4fMax(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) { return _mm_max_ps(arg1, arg2); }
-inline TSimdFloat32x4 tfSimd4fClamp(TSimdFloat32x4 value, TSimdFloat32x4 min, TSimdFloat32x4 max) {
-    return tfSimd4fMax(min, tfSimd4fMin(value, max));
+static inline float tfSimdDot_f32x4_f32(Tsimd_f32x4_t a,Tsimd_f32x4_t b) {
+    return _mm_cvtss_f32(tfSimdDot_f32x4(a,b));
 }
 
-inline TSimdInt32x4 tfSimd4fToSimd4i(TSimdFloat32x4 value) { return _mm_castps_si128(value); }
-inline TSimdFloat32x4 tfSimd4iToSimd4f(TSimdInt32x4 value) { return _mm_castsi128_ps(value); }
+static inline float tfSimdSelect_f32x4(Tsimd_f32x4_t value, int index) {
+    ASSERT(index < 4);
+    switch (index) {
+    case 0: return tfSimdSelect0_f32x4(value);
+    case 1: return tfSimdSelect1_f32x4(value);
+    case 2: return tfSimdSelect2_f32x4(value);
+    case 3: return tfSimdSelect3_f32x4(value);
+    }
+    return {};
+}
+static inline float tfSimdSelect0_f32x4(Tsimd_f32x4_t value) { return _mm_cvtss_f32(value); }
+static inline float tfSimdSelect1_f32x4(Tsimd_f32x4_t value) { return _mm_cvtss_f32(tfSimdSplat1_f32x4(value)); }
+static inline float tfSimdSelect2_f32x4(Tsimd_f32x4_t value) { return _mm_cvtss_f32(tfSimdSplat2_f32x4(value)); }
+static inline float tfSimdSelect3_f32x4(Tsimd_f32x4_t value) { return _mm_cvtss_f32(tfSimdSplat3_f32x4(value)); }
 
-inline float tfSimd4fSelectIndex0(TSimdFloat32x4 value) { return _mm_cvtss_f32(value); }
-inline float tfSimd4fSelectIndex1(TSimdFloat32x4 value) { return tfSimd4fSelectIndex0(tfSimd4fSplatIndex1(value)); }
-inline float tfSimd4fSelectIndex2(TSimdFloat32x4 value) { return tfSimd4fSelectIndex0(tfSimd4fSplatIndex2(value)); }
-inline float tfSimd4fSelectIndex3(TSimdFloat32x4 value) { return tfSimd4fSelectIndex0(tfSimd4fSplatIndex3(value)); }
+static inline Tsimd_f32x4_t tfSimdAdd_f32x4(Tsimd_f32x4_t a, Tsimd_f32x4_t b) { return _mm_add_ps(a, b); }
+static inline Tsimd_f32x4_t tfSimdMul_f32x4(Tsimd_f32x4_t a, Tsimd_f32x4_t b) { return _mm_mul_ps(a, b); }
+static inline Tsimd_f32x4_t tfSimdDiv_f32x4(Tsimd_f32x4_t a, Tsimd_f32x4_t b) { return _mm_div_ps(a, b); }
+static inline Tsimd_f32x4_t tfSimdAbs_f32x4(Tsimd_f32x4_t a) { 
+    const __m128 signMask = tfSimd_i32x4_To_f32x4(tfSimdSplat_i32x4(0x7FFFFFFF));
+    return tfSimdAnd_f32x4(a, signMask);
+}
+static inline Tsimd_f32x4_t tfSimdMadd_f32x4(Tsimd_f32x4_t a, Tsimd_f32x4_t b, Tsimd_f32x4_t c) {
+    return tfSimdAdd_f32x4(tfSimdMul_f32x4(a, b), c);
+}
 
-inline TSimdFloat32x4 tfSimd4fAdd(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) { return _mm_add_ps(arg1, arg2); }
-inline TSimdFloat32x4 tfSimd4fSub(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) { return _mm_sub_ps(arg1, arg2); }
-inline TSimdFloat32x4 tfSimd4fMul(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) { return _mm_mul_ps(arg1, arg2); }
-inline TSimdFloat32x4 tfSimd4fMadd(TSimdFloat32x4 mul1, TSimdFloat32x4 mul2, TSimdFloat32x4 add) {
-#if 0
-    return _mm_fmadd_ps(mul1, mul2, add); // Requires FMA CPUID
-#else
-    return tfSimd4fAdd(tfSimd4fMul(mul1, mul2), add);
-#endif
+static inline Tsimd_f32x4_t tfSimdNot_f32x4(Tsimd_f32x4_t value) {
+    return _mm_andnot_ps(value, tfSimd_i32x4_To_f32x4(tfSimdSplat_i32x4((int32_t)(0xFFFFFFFF))));
 }
+static inline Tsimd_f32x4_t tfSimdAnd_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return _mm_and_ps(arg1, arg2); }
+static inline Tsimd_f32x4_t tfSimdAndNot_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return _mm_andnot_ps(arg1, arg2); }
+static inline Tsimd_f32x4_t tfSimdOr_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return _mm_or_ps(arg1, arg2); }
+static inline Tsimd_f32x4_t tfSimdXor_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return _mm_xor_ps(arg1, arg2); }
 
-inline TSimdFloat32x4 tfSimdFloat4x32Div(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) { return _mm_div_ps(arg1, arg2); }
+static inline Tsimd_f32x4_t tfSimdCmpEq_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return _mm_cmpeq_ps(arg1, arg2); }
+static inline Tsimd_f32x4_t tfSimdCmpNeq_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return _mm_cmpneq_ps(arg1, arg2); }
+static inline Tsimd_f32x4_t tfSimdCmpGt_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return _mm_cmpgt_ps(arg1, arg2); }
+static inline Tsimd_f32x4_t tfSimdCmpGtEq_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return _mm_cmpge_ps(arg1, arg2); }
+static inline Tsimd_f32x4_t tfSimdCmpLt_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return _mm_cmplt_ps(arg1, arg2); }
+static inline Tsimd_f32x4_t tfSimdCmpLtEq_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return _mm_cmple_ps(arg1, arg2); }
 
-inline TSimdFloat32x4 tfSimd4fAbs(TSimdFloat32x4 value) {
-    const TSimdFloat32x4 signMask = tfSimd4iToSimd4f(tfSimd4iSplat(0x7FFFFFFF));
-    return _mm_and_ps(value, signMask);
+static inline bool tfSimdCmpAllEq_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+    return (_mm_movemask_ps(tfSimdCmpEq_f32x4(arg1, arg2)) & 0xf) == 0xf;
+}
+static inline bool tfSimdCmpAllNeq_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+    return (_mm_movemask_ps(tfSimdCmpNeq_f32x4(arg1, arg2)) & 0xf) == 0xf;
+}
+static inline bool tfSimdCmpAllGt_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+    return (_mm_movemask_ps(tfSimdCmpGt_f32x4(arg1, arg2)) & 0xf) == 0xf;
+}
+static inline bool tfSimdCmpAllGtEq_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+    return (_mm_movemask_ps(tfSimdCmpGtEq_f32x4(arg1, arg2)) & 0xf) == 0xf;
+}
+static inline bool tfSimdCmpAllLt_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+    return (_mm_movemask_ps(tfSimdCmpLt_f32x4(arg1, arg2)) & 0xf) == 0xf;
+}
+static inline bool tfSimdCmpAllLtEq_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+    return (_mm_movemask_ps(tfSimdCmpLtEq_f32x4(arg1, arg2)) & 0xf) == 0xf;
 }
-inline TSimdFloat32x4 tfSimdFloat4x32Load(float x, float y, float z, float w) { return _mm_set_ps(w, z, y, x); }
-inline TSimdInt32x4 tfSimdInt4x32Load(int32_t x, int32_t y, int32_t z, int32_t w) { return _mm_set_epi32(w, z, y, x); }
 
-inline TSimdFloat32x2 tfSimd4fToSimd2f(TSimdFloat32x4 value) { return value; }
-inline TSimdFloat32x3 tfSimd4fToSimd3f(TSimdFloat32x4 value) { return value; }
+// Tsimd_i32x4_t
+static inline Tsimd_i32x4_t tfSimdLoad_i32x4(int32_t x, int32_t y, int32_t z, int32_t w) { return _mm_set_epi32(w, z, y, x); }
+static inline Tsimd_i32x4_t tfSimdSplat_i32x4(int32_t value) { return _mm_set1_epi32(value); }
+static inline Tsimd_i32x4_t tfSimdSplat0_i32x4(Tsimd_i32x4_t value) { return _mm_shuffle_epi32(value, _MM_SHUFFLE(0, 0, 0, 0)); }
+static inline Tsimd_i32x4_t tfSimdSplat1_i32x4(Tsimd_i32x4_t value) { return _mm_shuffle_epi32(value, _MM_SHUFFLE(1, 1, 1, 1)); }
+static inline Tsimd_i32x4_t tfSimdSplat2_i32x4(Tsimd_i32x4_t value) { return _mm_shuffle_epi32(value, _MM_SHUFFLE(2, 2, 2, 2)); }
+static inline Tsimd_i32x4_t tfSimdSplat3_i32x4(Tsimd_i32x4_t value) { return _mm_shuffle_epi32(value, _MM_SHUFFLE(3, 3, 3, 3)); }
 
-inline TSimdFloat32x4 tfSimd4fSplatIndex0(TSimdFloat32x4 value) { return _mm_shuffle_ps(value, value, _MM_SHUFFLE(0, 0, 0, 0)); }
-inline TSimdFloat32x4 tfSimd4fSplatIndex1(TSimdFloat32x4 value) { return _mm_shuffle_ps(value, value, _MM_SHUFFLE(1, 1, 1, 1)); }
-inline TSimdFloat32x4 tfSimd4fSplatIndex2(TSimdFloat32x4 value) { return _mm_shuffle_ps(value, value, _MM_SHUFFLE(2, 2, 2, 2)); }
-inline TSimdFloat32x4 tfSimd4fSplatIndex3(TSimdFloat32x4 value) { return _mm_shuffle_ps(value, value, _MM_SHUFFLE(3, 3, 3, 3)); }
+static inline int32_t tfSimdSelect_i32x4(Tsimd_i32x4_t value, int index) {
+    ASSERT(index < 4);
+    switch(index) {
+        case 0: return tfSimdSelect0_i32x4(value);
+        case 1: return tfSimdSelect1_i32x4(value);
+        case 2: return tfSimdSelect2_i32x4(value);
+        case 3: return tfSimdSelect3_i32x4(value);
+    }
+    return {};
 
-inline TSimdInt32x4 tfSimd4iSplat(int32_t value) { return _mm_set1_epi32(value); }
-inline TSimdFloat32x4 tfSimd4fSplat(float value) { return _mm_set1_ps(value); }
+}
+static inline int32_t tfSimdSelect0_i32x4(Tsimd_i32x4_t value) { return _mm_cvtsi128_si32(value); }
+static inline int32_t tfSimdSelect1_i32x4(Tsimd_i32x4_t value) { return _mm_cvtsi128_si32(tfSimdSplat1_i32x4(value)); };
+static inline int32_t tfSimdSelect2_i32x4(Tsimd_i32x4_t value) { return _mm_cvtsi128_si32(tfSimdSplat2_i32x4(value)); };
+static inline int32_t tfSimdSelect3_i32x4(Tsimd_i32x4_t value) { return _mm_cvtsi128_si32(tfSimdSplat3_i32x4(value)); };
 
-inline TSimdFloat32x4 tfSimd4fCmpEq(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) { return _mm_cmpeq_ps(arg1, arg2); }
-inline TSimdFloat32x4 tfSimd4fCmpNeq(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) { return _mm_cmpneq_ps(arg1, arg2); }
-inline TSimdFloat32x4 tfSimd4fCmpGt(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) { return _mm_cmpgt_ps(arg1, arg2); }
-inline TSimdFloat32x4 tfSimd4fCmpGtEq(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) { return _mm_cmpge_ps(arg1, arg2); }
-inline TSimdFloat32x4 tfSimd4fCmpLt(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) { return _mm_cmplt_ps(arg1, arg2); }
-inline TSimdFloat32x4 tfSimd4fCmpLtEq(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) { return _mm_cmple_ps(arg1, arg2); }
+static inline Tsimd_i32x4_t tfSimd_f32x4_To_i32x4(Tsimd_f32x4_t a) { return _mm_castps_si128(a); }
+static inline Tsimd_f32x4_t tfSimd_i32x4_To_f32x4(Tsimd_i32x4_t a) { return _mm_castsi128_ps(a); }
 
-inline TSimdInt32x4 tfSimd4iCmpEq(TSimdInt32x4 arg1, TSimdInt32x4 arg2) { return _mm_cmpeq_epi32(arg1, arg2); }
-inline TSimdInt32x4 tfSimd4iCmpNeq(TSimdInt32x4 arg1, TSimdInt32x4 arg2) {
-    return _mm_xor_si128(_mm_cmpeq_epi32(arg1, arg2), _mm_set1_epi32((int32_t)0xFFFFFFFF));
+static inline Tsimd_f32x4_t tfSimdAdd_i32x4(Tsimd_f32x4_t a, Tsimd_f32x4_t b) { return _mm_add_epi32(a, b); }
+static inline Tsimd_f32x4_t tfSimdMul_i32x4(Tsimd_f32x4_t a, Tsimd_f32x4_t b) { return _mm_mul_epi32(a, b); }
+static inline Tsimd_f32x4_t tfSimdAbs_i32x4(Tsimd_f32x4_t a) { return _mm_abs_epi32(a); }
+static inline Tsimd_f32x4_t tfSimdMadd_i32x4(Tsimd_f32x4_t a, Tsimd_f32x4_t b, Tsimd_f32x4_t c) {
+    return tfSimdAdd_i32x4(tfSimdMul_i32x4(a, b), c);
 }
-inline TSimdInt32x4 tfSimd4iCmpGt(TSimdInt32x4 arg1, TSimdInt32x4 arg2) { return _mm_cmpgt_epi32(arg1, arg2); }
-inline TSimdInt32x4 tfSimd4iCmpGtEq(TSimdInt32x4 arg1, TSimdInt32x4 arg2) {
+static inline Tsimd_i32x4_t tfSimdNot_i32x4(Tsimd_i32x4_t value) { return _mm_andnot_si128(value, tfSimdSplat_i32x4(0xFFFFFFFF)); }
+static inline Tsimd_i32x4_t tfSimdAnd_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) { return _mm_and_si128(arg1, arg2); }
+static inline Tsimd_i32x4_t tfSimdAndNot_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) { return _mm_andnot_si128(arg1, arg2); }
+static inline Tsimd_i32x4_t tfSimdOr_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) { return _mm_or_si128(arg1, arg2); }
+static inline Tsimd_i32x4_t tfSimdXor_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) { return _mm_xor_si128(arg1, arg2); }
+
+static inline Tsimd_i32x4_t tfSimdCmpEq_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) { return _mm_cmpeq_epi32(arg1, arg2); }
+static inline Tsimd_i32x4_t tfSimdCmpNeq_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+    return tfSimdNot_i32x4(tfSimdCmpEq_i32x4(arg1, arg2));
+}
+static inline Tsimd_i32x4_t tfSimdCmpGt_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) { return _mm_cmpgt_epi32(arg1, arg2); }
+
+static inline Tsimd_i32x4_t tfSimdCmpGtEq_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
     return _mm_or_si128(_mm_cmpgt_epi32(arg1, arg2), _mm_cmpeq_epi32(arg1, arg2));
 }
-inline TSimdInt32x4 tfSimd4iCmpLt(TSimdInt32x4 arg1, TSimdInt32x4 arg2) { return _mm_cmplt_epi32(arg1, arg2); }
-inline TSimdInt32x4 tfSimd4iCmpLtEq(TSimdInt32x4 arg1, TSimdInt32x4 arg2) {
+static inline Tsimd_i32x4_t tfSimdCmpLt_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) { return _mm_cmplt_epi32(arg1, arg2); }
+static inline Tsimd_i32x4_t tfSimdCmpLtEq_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) { 
     return _mm_or_si128(_mm_cmplt_epi32(arg1, arg2), _mm_cmpeq_epi32(arg1, arg2));
 }
-inline bool tfSimd4fCmpAllLt(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
-    TSimdFloat32x4 compare = tfSimd4fCmpLt(arg1, arg2);
-    return (_mm_movemask_ps(compare) & 0xf) == 0xf;
-}
 
-inline bool tfSimd4fCmpAllGt(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
-    TSimdFloat32x4 compare = tfSimd4fCmpGt(arg1, arg2);
-    return (_mm_movemask_ps(compare) & 0xf) == 0xf;
+static inline bool tfSimdCmpAllEq_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+    return (_mm_movemask_epi8(tfSimdCmpEq_i32x4(arg1, arg2)) & 0xFFFF) == 0xFFFF;
 }
-
-inline bool tfSimd4fCmpAllEq(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
-    TSimdFloat32x4 compare = tfSimd4fCmpEq(arg1, arg2);
-    return (_mm_movemask_ps(compare) & 0xf) == 0xf;
+static inline bool tfSimdCmpAllNeq_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+    return (_mm_movemask_epi8(tfSimdCmpNeq_i32x4(arg1, arg2)) & 0xFFFF) == 0xFFFF;
 }
-
-inline bool tfSimd4iCmpAllEq(TSimdInt32x4 arg1, TSimdInt32x4 arg2) {
-    const TSimdInt32x4 compare = tfSimd4iCmpEq(arg1, arg2);
-    return (_mm_movemask_epi8(compare) & 0xf) == 0xf;
+static inline bool tfSimdCmpAllGt_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+    return (_mm_movemask_epi8(tfSimdCmpGt_i32x4(arg1, arg2))& 0xFFFF) == 0xFFFF;
+}
+static inline bool tfSimdCmpAllGtEq_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+    return (_mm_movemask_epi8(tfSimdCmpGtEq_i32x4(arg1, arg2))& 0xFFFF) == 0xFFFF;
 }
+static inline bool tfSimdCmpAllLt_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+    return (_mm_movemask_epi8(tfSimdCmpLt_i32x4(arg1, arg2)) & 0xFFFF) == 0xFFFF; 
+}
+static inline bool tfSimdCmpAllLtEq_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+    return (_mm_movemask_epi8(tfSimdCmpLtEq_i32x4(arg1, arg2))& 0xFFFF) == 0xFFFF;  
+}
+
+// DELETE everything below
+//static inline Tsimd_f32x4_t tfS32x4FReplaceIndex0(Tsimd_f32x4_t input, Tsimd_f32x4_t value) {
+//    return _mm_blend_ps(input, value, 0b0001);
+//}
+//static inline Tsimd_f32x4_t tfS32x4FReplaceIndex1(Tsimd_f32x4_t input, Tsimd_f32x4_t value){
+//    return _mm_blend_ps(input, value, 0b0010);
+//}
+//static inline Tsimd_f32x4_t tfS32x4FReplaceIndex2(Tsimd_f32x4_t input, Tsimd_f32x4_t value){
+//    return _mm_blend_ps(input, value, 0b0100);
+//}
+//static inline Tsimd_f32x4_t tfS32x4FReplaceIndex3(Tsimd_f32x4_t input, Tsimd_f32x4_t value){
+//    return _mm_blend_ps(input, value, 0b1000);
+//}
+//
+//
+//static inline Tsimd_f32x4_t tfS32x4FReplaceIndex0ByValue(Tsimd_f32x4_t input, float value) {
+//    return _mm_blend_ps(input, tfS32x4FSplat(value), 0b0001);
+//}
+//static inline Tsimd_f32x4_t tfSimdFloat4ReplaceIndex1ByValue(Tsimd_f32x4_t input, float value) {
+//    return _mm_blend_ps(input, tfS32x4FSplat(value), 0b0010);
+//}
+//static inline Tsimd_f32x4_t tfSimd4fReplaceIndex2ByValue(Tsimd_f32x4_t input, float value) {
+//    return _mm_blend_ps(input, tfS32x4FSplat(value), 0b0100);
+//}
+//static inline Tsimd_f32x4_t tfSimd4fReplaceIndex3ByValue(Tsimd_f32x4_t input, float value) {
+//    return _mm_blend_ps(input, tfS32x4FSplat(value), 0b1000);
+//}
+//
+//inline Tsimd_i32x4_t   tfSimd4iSelect(Tsimd_i32x4_t arg0, Tsimd_i32x4_t arg1, Tsimd_i32x4_t mask) { return _mm_blendv_epi8(arg0, arg1, mask); }
+//inline Tsimd_f32x4_t tfSimd4fSelect(Tsimd_f32x4_t arg0, Tsimd_f32x4_t arg1, Tsimd_f32x4_t mask) {
+//    return _mm_blendv_ps(arg0, arg1, mask);
+//}
+//
+//inline Tsimd_f32x4_t tfSimd4fZero() { return _mm_setzero_ps(); }
+//inline Tsimd_i32x4_t   tfSimd4iZero() { return _mm_setzero_si128(); }
+//
+//inline Tsimd_i32x4_t tfSimd4iNot(Tsimd_i32x4_t value) { return _mm_andnot_si128(value, _mm_set1_epi32(TF_SIMDI_MAX)); }
+//inline Tsimd_i32x4_t tfSimd4iAnd(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) { return _mm_and_si128(arg1, arg2); }
+//inline Tsimd_i32x4_t tfSimd4iAndNot(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) { return _mm_andnot_si128(arg1, arg2); }
+//inline Tsimd_i32x4_t tfSimd4iOr(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) { return _mm_or_si128(arg1, arg2); }
+//inline Tsimd_i32x4_t tfSimd4iXor(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) { return _mm_xor_si128(arg1, arg2); }
+//
+//inline Tsimd_f32x4_t tfSimd4fNot(Tsimd_f32x4_t value) { return _mm_andnot_ps(value, _mm_set1_ps((float)(TF_SIMDF_MAX))); }
+//inline Tsimd_f32x4_t tfSimd4fAnd(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return _mm_and_ps(arg1, arg2); }
+//inline Tsimd_f32x4_t tfSimd4fAndNot(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return _mm_andnot_ps(arg1, arg2); }
+//inline Tsimd_f32x4_t tfSimd4fOr(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return _mm_or_ps(arg1, arg2); }
+//inline Tsimd_f32x4_t tfSimd4fXor(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return _mm_xor_ps(arg1, arg2); }
+//
+//inline Tsimd_f32x4_t tfSimd4fFloor(Tsimd_f32x4_t value) { return _mm_floor_ps(value); }
+//inline Tsimd_f32x4_t tfSimd4fCeil(Tsimd_f32x4_t value) { return _mm_ceil_ps(value); }
+//inline Tsimd_f32x4_t tfSimd4fRound(Tsimd_f32x4_t value) { return _mm_round_ps(value, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); }
+//inline Tsimd_f32x4_t tfSimd4fTruncate(Tsimd_f32x4_t value) { return tfSimd4iToSimd4f(tfSimd4fToSimd4i(value)); }
+//inline Tsimd_f32x4_t tfSimd4fMin(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return _mm_min_ps(arg1, arg2); }
+//inline Tsimd_f32x4_t tfSimd4fMax(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return _mm_max_ps(arg1, arg2); }
+//inline Tsimd_f32x4_t tfSimd4fClamp(Tsimd_f32x4_t value, Tsimd_f32x4_t min, Tsimd_f32x4_t max) {
+//    return tfSimd4fMax(min, tfSimd4fMin(value, max));
+//}
+//
+//inline Tsimd_i32x4_t tfSimd4fToSimd4i(Tsimd_f32x4_t value) { return _mm_castps_si128(value); }
+//inline Tsimd_f32x4_t tfSimd4iToSimd4f(Tsimd_i32x4_t value) { return _mm_castsi128_ps(value); }
+//
+//inline float tfSimd4fSelectIndex0(Tsimd_f32x4_t value) { return _mm_cvtss_f32(value); }
+//inline float tfSimd4fSelectIndex1(Tsimd_f32x4_t value) { return tfSimd4fSelectIndex0(tfSimd4fSplatIndex1(value)); }
+//inline float tfSimd4fSelectIndex2(Tsimd_f32x4_t value) { return tfSimd4fSelectIndex0(tfSimd4fSplatIndex2(value)); }
+//inline float tfSimd4fSelectIndex3(Tsimd_f32x4_t value) { return tfSimd4fSelectIndex0(tfSimd4fSplatIndex3(value)); }
+//
+//inline Tsimd_f32x4_t tfSimd4fAdd(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return _mm_add_ps(arg1, arg2); }
+//inline Tsimd_f32x4_t tfSimd4fSub(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return _mm_sub_ps(arg1, arg2); }
+//inline Tsimd_f32x4_t tfSimd4fMul(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return _mm_mul_ps(arg1, arg2); }
+//inline Tsimd_f32x4_t tfSimd4fMadd(Tsimd_f32x4_t mul1, Tsimd_f32x4_t mul2, Tsimd_f32x4_t add) {
+//#if 0
+//    return _mm_fmadd_ps(mul1, mul2, add); // Requires FMA CPUID
+//#else
+//    return tfSimd4fAdd(tfSimd4fMul(mul1, mul2), add);
+//#endif
+//}
+//
+//inline Tsimd_f32x4_t tfSimdFloat4x32Div(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return _mm_div_ps(arg1, arg2); }
+//
+//inline Tsimd_f32x4_t tfSimd4fAbs(Tsimd_f32x4_t value) {
+//    const Tsimd_f32x4_t signMask = tfSimd4iToSimd4f(tfSimd4iSplat(0x7FFFFFFF));
+//    return _mm_and_ps(value, signMask);
+//}
+//inline Tsimd_f32x4_t tfSimdFloat4x32Load(float x, float y, float z, float w) { return _mm_set_ps(w, z, y, x); }
+//inline Tsimd_i32x4_t tfSimdInt4x32Load(int32_t x, int32_t y, int32_t z, int32_t w) { return _mm_set_epi32(w, z, y, x); }
+//
+//inline Tsimd_f32x2_t tfSimd4fToSimd2f(Tsimd_f32x4_t value) { return value; }
+//inline Tsimd_f32x3_t tfSimd4fToSimd3f(Tsimd_f32x4_t value) { return value; }
+//
+//inline Tsimd_f32x4_t tfSimd4fSplatIndex0(Tsimd_f32x4_t value) { return _mm_shuffle_ps(value, value, _MM_SHUFFLE(0, 0, 0, 0)); }
+//inline Tsimd_f32x4_t tfSimd4fSplatIndex1(Tsimd_f32x4_t value) { return _mm_shuffle_ps(value, value, _MM_SHUFFLE(1, 1, 1, 1)); }
+//inline Tsimd_f32x4_t tfSimd4fSplatIndex2(Tsimd_f32x4_t value) { return _mm_shuffle_ps(value, value, _MM_SHUFFLE(2, 2, 2, 2)); }
+//inline Tsimd_f32x4_t tfSimd4fSplatIndex3(Tsimd_f32x4_t value) { return _mm_shuffle_ps(value, value, _MM_SHUFFLE(3, 3, 3, 3)); }
+//
+//inline Tsimd_i32x4_t tfSimd4iSplat(int32_t value) { return _mm_set1_epi32(value); }
+//inline Tsimd_f32x4_t tfSimd4fSplat(float value) { return _mm_set1_ps(value); }
+//
+//inline Tsimd_f32x4_t tfSimd4fCmpEq(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return _mm_cmpeq_ps(arg1, arg2); }
+//inline Tsimd_f32x4_t tfSimd4fCmpNeq(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return _mm_cmpneq_ps(arg1, arg2); }
+//inline Tsimd_f32x4_t tfSimd4fCmpGt(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return _mm_cmpgt_ps(arg1, arg2); }
+//inline Tsimd_f32x4_t tfSimd4fCmpGtEq(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return _mm_cmpge_ps(arg1, arg2); }
+//inline Tsimd_f32x4_t tfSimd4fCmpLt(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return _mm_cmplt_ps(arg1, arg2); }
+//inline Tsimd_f32x4_t tfSimd4fCmpLtEq(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return _mm_cmple_ps(arg1, arg2); }
+//
+//inline Tsimd_i32x4_t tfSimd4iCmpEq(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) { return _mm_cmpeq_epi32(arg1, arg2); }
+//inline Tsimd_i32x4_t tfSimd4iCmpNeq(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+//    return _mm_xor_si128(_mm_cmpeq_epi32(arg1, arg2), _mm_set1_epi32((int32_t)0xFFFFFFFF));
+//}
+//inline Tsimd_i32x4_t tfSimd4iCmpGt(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) { return _mm_cmpgt_epi32(arg1, arg2); }
+//inline Tsimd_i32x4_t tfSimd4iCmpGtEq(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+//    return _mm_or_si128(_mm_cmpgt_epi32(arg1, arg2), _mm_cmpeq_epi32(arg1, arg2));
+//}
+//inline Tsimd_i32x4_t tfSimd4iCmpLt(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) { return _mm_cmplt_epi32(arg1, arg2); }
+//inline Tsimd_i32x4_t tfSimd4iCmpLtEq(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+//    return _mm_or_si128(_mm_cmplt_epi32(arg1, arg2), _mm_cmpeq_epi32(arg1, arg2));
+//}
+//inline bool tfSimd4fCmpAllLt(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+//    Tsimd_f32x4_t compare = tfSimd4fCmpLt(arg1, arg2);
+//    return (_mm_movemask_ps(compare) & 0xf) == 0xf;
+//}
+//
+//inline bool tfSimd4fCmpAllGt(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+//    Tsimd_f32x4_t compare = tfSimd4fCmpGt(arg1, arg2);
+//    return (_mm_movemask_ps(compare) & 0xf) == 0xf;
+//}
+//
+//inline bool tfSimd4fCmpAllEq(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+//    Tsimd_f32x4_t compare = tfSimd4fCmpEq(arg1, arg2);
+//    return (_mm_movemask_ps(compare) & 0xf) == 0xf;
+//}
+//
+//inline bool tfSimd4iCmpAllEq(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+//    const Tsimd_i32x4_t compare = tfSimd4iCmpEq(arg1, arg2);
+//    return (_mm_movemask_epi8(compare) & 0xf) == 0xf;
+//}
diff --git a/Forge/Math/Internal/TF_Simd4x32_neon.inl b/Forge/Math/Internal/TF_Simd4x32_neon.inl
new file mode 100644
index 0000000000..22a4c10ed2
--- /dev/null
+++ b/Forge/Math/Internal/TF_Simd4x32_neon.inl
@@ -0,0 +1,121 @@
+#if defined(__CLANGD__)
+#define TF_FEATURE_CPU_NEON  
+#include "Forge/TF_Config.h"
+#include "../TF_Simd32x4.h"
+#endif
+
+static inline Tsimd_f32x4_t tfSimd4fReplaceIndex0ByValue(Tsimd_f32x4_t input, float value) { return vsetq_lane_f32(value, input, 0); }
+static inline Tsimd_f32x4_t tfSimd4fReplaceIndex1ByValue(Tsimd_f32x4_t input, float value) { return vsetq_lane_f32(value, input, 1); }
+static inline Tsimd_f32x4_t tfSimd4fReplaceIndex2ByValue(Tsimd_f32x4_t input, float value) { return vsetq_lane_f32(value, input, 2); }
+static inline Tsimd_f32x4_t tfSimd4fReplaceIndex3ByValue(Tsimd_f32x4_t input, float value) { return vsetq_lane_f32(value, input, 3); }
+
+inline Tsimd_i32x4_t   tfSimd4iSelect(Tsimd_i32x4_t arg0, Tsimd_i32x4_t arg1, Tsimd_i32x4_t mask) { return vbslq_s32(mask, arg1, arg1); }
+inline Tsimd_f32x4_t tfSimd4fSelect(Tsimd_f32x4_t arg0, Tsimd_f32x4_t arg1, Tsimd_f32x4_t mask) { return vbslq_f32(mask, arg1, arg1); }
+
+inline Tsimd_f32x4_t tfSimd4fZero() { return vmovq_n_f32(0.0f); }
+inline Tsimd_i32x4_t   tfSimd4iZero() { return vmovq_n_s32(0); }
+
+inline Tsimd_i32x4_t tfSimd4iNot(Tsimd_i32x4_t value) { return vmvnq_s32(value); }
+inline Tsimd_i32x4_t tfSimd4iAnd(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) { return vandq_s32(arg1, arg2); }
+inline Tsimd_i32x4_t tfSimd4iAndNot(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) { return vandq_s32(vmvnq_s32(arg1), arg2); }
+inline Tsimd_i32x4_t tfSimd4iOr(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) { return vorrq_s32(arg1, arg2); }
+inline Tsimd_i32x4_t tfSimd4iXor(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) { return veorq_s32(arg1, arg2); }
+
+inline Tsimd_f32x4_t tfSimd4fNot(Tsimd_f32x4_t value) { return vreinterpretq_f32_s32(vmvnq_s32(vreinterpretq_s32_f32(value))); }
+inline Tsimd_f32x4_t tfSimd4fAnd(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+    return vreinterpretq_f32_s32(vandq_s32(vreinterpretq_s32_f32(arg1), vreinterpretq_s32_f32(arg2)));
+}
+inline Tsimd_f32x4_t tfSimd4fAndNot(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+    return vreinterpretq_f32_s32(vandq_s32(vmvnq_s32(vreinterpretq_s32_f32(arg1)), vreinterpretq_s32_f32(arg2)));
+}
+inline Tsimd_f32x4_t tfSimd4fOr(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+    return vreinterpretq_f32_s32(vorrq_s32(vreinterpretq_s32_f32(arg1), vreinterpretq_s32_f32(arg2)));
+}
+inline Tsimd_f32x4_t tfSimd4fXor(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+    return vreinterpretq_f32_s32(veorq_s32(vreinterpretq_s32_f32(arg1), vreinterpretq_s32_f32(arg2)));
+}
+
+inline Tsimd_f32x4_t tfSimd4fFloor(Tsimd_f32x4_t value) { return vrndmq_f32(value); }
+inline Tsimd_f32x4_t tfSimd4fCeil(Tsimd_f32x4_t value) { return vrndpq_f32(value); }
+inline Tsimd_f32x4_t tfSimd4fRound(Tsimd_f32x4_t value) { return vrndnq_f32(value); }
+inline Tsimd_f32x4_t tfSimd4fTruncate(Tsimd_f32x4_t value) { return tfSimd4iToSimd4f(tfSimd4fToSimd4i(value)); }
+inline Tsimd_f32x4_t tfSimd4fMin(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return vminq_f32(arg1, arg2); }
+inline Tsimd_f32x4_t tfSimd4fMax(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return vmaxq_f32(arg1, arg2); }
+inline Tsimd_f32x4_t tfSimd4fClamp(Tsimd_f32x4_t value, Tsimd_f32x4_t min, Tsimd_f32x4_t max) {
+    return tfSimd4fMax(min, tfSimd4fMin(value, max));
+}
+
+inline Tsimd_i32x4_t tfSimd4fToSimd4i(Tsimd_f32x4_t value) { return vreinterpretq_f32_s32(value); }
+
+inline Tsimd_f32x4_t tfSimd4iToSimd4f(Tsimd_i32x4_t value) { return vreinterpretq_s32_f32(value); }
+
+inline float tfS32x4FSelectIndex0(Tsimd_f32x4_t value) { return vgetq_lane_f32(value, 0); }
+inline float tfS32x4FSelectIndex1(Tsimd_f32x4_t value) { return vgetq_lane_f32(value, 1); }
+inline float tfS32x4FSelectIndex2(Tsimd_f32x4_t value) { return vgetq_lane_f32(value, 2); }
+inline float tfS32x4FSelectIndex3(Tsimd_f32x4_t value) { return vgetq_lane_f32(value, 3); }
+
+inline Tsimd_f32x4_t tfSimd4fAdd(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return vaddq_f32(arg1, arg2); }
+inline Tsimd_f32x4_t tfSimd4fSub(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return vsubq_f32(arg1, arg2); }
+inline Tsimd_f32x4_t tfSimd4fMul(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return vmulq_f32(arg1, arg2); }
+inline Tsimd_f32x4_t tfSimd4fMadd(Tsimd_f32x4_t mul1, Tsimd_f32x4_t mul2, Tsimd_f32x4_t add) { return vmlaq_f32(add, mul1, mul2); }
+
+inline Tsimd_f32x4_t tfSimd4fDiv(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return vdivq_f32(arg1, arg2); }
+
+inline Tsimd_f32x4_t tfSimd4fAbs(Tsimd_f32x4_t value) { return vabsq_f32(value); }
+inline Tsimd_f32x4_t tfSimdFloat4x32Load(float x, float y, float z, float w) {
+    const float values[4] = { x, y, z, w };
+    return vld1q_f32(values);
+}
+
+inline Tsimd_i32x4_t tfSimdInt4x32Load(int32_t x, int32_t y, int32_t z, int32_t w) {
+    const int32_t values[4] = { x, y, z, w };
+    return vld1q_s32(values);
+}
+
+inline Tsimd_f32x2_t tfSimd4fToSimd2f(Tsimd_f32x4_t value) { return vget_low_f32(value); }
+
+inline Tsimd_f32x3_t tfSimd4fToSimd3f(Tsimd_f32x4_t value) { return value; }
+
+inline Tsimd_f32x4_t tfSimd4fSplatIndex0(Tsimd_f32x4_t value) { return vdupq_laneq_f32(value, 0); }
+
+inline Tsimd_f32x4_t tfSimd4fSplatIndex1(Tsimd_f32x4_t value) { return vdupq_laneq_f32(value, 1); }
+
+inline Tsimd_f32x4_t tfSimd4fSplatIndex2(Tsimd_f32x4_t value) { return vdupq_laneq_f32(value, 2); }
+
+inline Tsimd_f32x4_t tfSimd4fSplatIndex3(Tsimd_f32x4_t value) { return vdupq_laneq_f32(value, 3); }
+
+inline Tsimd_i32x4_t tfSimd4iSplat(int32_t value) { return vdupq_n_s32(value); }
+
+inline Tsimd_f32x4_t tfSimd4fSplat(float value) { return vdupq_n_f32(value); }
+
+inline Tsimd_f32x4_t tfSimd4fCmpEq(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return vreinterpretq_f32_s32(vceqq_f32(arg1, arg2)); }
+inline Tsimd_f32x4_t tfSimd4fCmpNeq(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+    return vreinterpretq_f32_s32(vmvnq_s32(vceqq_f32(arg1, arg2)));
+}
+inline Tsimd_f32x4_t tfSimd4fCmpGt(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return vreinterpretq_f32_s32(vcgtq_f32(arg1, arg2)); }
+inline Tsimd_f32x4_t tfSimd4fCmpGtEq(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return vreinterpretq_f32_s32(vcgeq_f32(arg1, arg2)); }
+inline Tsimd_f32x4_t tfSimd4fCmpLt(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return vreinterpretq_f32_s32(vcltq_f32(arg1, arg2)); }
+inline Tsimd_f32x4_t tfSimd4fCmpLtEq(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) { return vreinterpretq_f32_s32(vcleq_f32(arg1, arg2)); }
+
+inline Tsimd_i32x4_t tfSimd4iCmpEq(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) { return vceqq_s32(arg1, arg2); }
+inline Tsimd_i32x4_t tfSimd4iCmpNeq(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) { return vmvnq_s32(vceqq_s32(arg1, arg2)); }
+inline Tsimd_i32x4_t tfSimd4iCmpGt(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) { return vcgtq_s32(arg1, arg2); }
+inline Tsimd_i32x4_t tfSimd4iCmpGtEq(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) { return vcgeq_s32(arg1, arg2); }
+inline Tsimd_i32x4_t tfSimd4iCmpLt(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) { return vcltq_s32(arg1, arg2); }
+inline Tsimd_i32x4_t tfSimd4iCmpLtEq(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) { return vcleq_s32(arg1, arg2); }
+
+inline bool tfSimd4fCmpAllLt(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+    return vminv_u32(vcltq_f32(arg1, arg2)) != 0;
+}
+
+inline bool tfSimd4fCmpAllGt(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+    return vminv_u32(vcgtq_f32(arg1, arg2)) != 0;
+}
+
+inline bool tfSimd4fCmpAllEq(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2) {
+    return vminv_u32(vceqq_f32(arg1, arg2)) != 0;
+}
+
+inline bool tfSimd4iCmpAllEq(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2) {
+    return vminv_u32(vceqq_s32(arg1, arg2)) != 0;
+}
diff --git a/Forge/Math/Internal/TF_SimdFloat.inl b/Forge/Math/Internal/TF_SimdFloat.inl
index d1f0f38f48..8c8973863f 100644
--- a/Forge/Math/Internal/TF_SimdFloat.inl
+++ b/Forge/Math/Internal/TF_SimdFloat.inl
@@ -17,19 +17,19 @@
 
 
 static inline TSimdFloat4 tfVectorMul3x4F(const TSimdFloat4x3 a0, const TSimdFloat3 a1) {
-    TSimdFloat32x4 xxxx = tfSimdFloat3To4Splat0(a1.mRow);
-    TSimdFloat32x4 yyyy = tfSimdFloat3To4Splat1(a1.mRow);
-    TSimdFloat32x4 zzzz = tfSimdFloat3To4Splat2(a1.mRow);
-    TSimdFloat32x4 res = tfSimd4fMul(a0.mCol0, xxxx);
+    Tsimd_f32x4_t xxxx = tfS32x3FTo32x4FSplat0(a1.mRow);
+    Tsimd_f32x4_t yyyy = tfS32x3FTo32x4FSplat1(a1.mRow);
+    Tsimd_f32x4_t zzzz = tfS32x3FTo32x4FSplat2(a1.mRow);
+    Tsimd_f32x4_t res = tfSimd4fMul(a0.mCol0, xxxx);
     res = tfSimd4fMadd(a0.mCol1, yyyy, res);
     res = tfSimd4fMadd(a0.mCol2, zzzz, res);
     return { res };
 }
 
 static inline TSimdFloat4 tfVectorMul2x4F(const TSimdFloat4x2 a0, const TSimdFloat2 a1) {
-    TSimdFloat32x4 xxxx = tfSimdFloat2To4Splat0(a1.mRow);
-    TSimdFloat32x4 yyyy = tfSimdFloat2To4Splat1(a1.mRow);
-    TSimdFloat32x4 res = tfSimd4fMul(a0.mCol0, xxxx);
+    Tsimd_f32x4_t xxxx = tfSimdFloat2To4Splat0(a1.mRow);
+    Tsimd_f32x4_t yyyy = tfSimdFloat2To4Splat1(a1.mRow);
+    Tsimd_f32x4_t res = tfSimd4fMul(a0.mCol0, xxxx);
     res = tfSimd4fMadd(a0.mCol1, yyyy, res);
     return { res };
 }
@@ -38,12 +38,12 @@ static inline TSimdFloat4 tfVectorMul2x4F(const TSimdFloat4x2 a0, const TSimdFlo
  * Multiplication of a 3x3 matrix and a 4 element vector
  **/
 static inline TSimdFloat3 tfVectorMul3x3F(const TSimdFloat3x3 a0, const TSimdFloat3 a1) {
-    TSimdFloat32x3 xxx = tfSimd3fSplatIndex0(a1.mRow);
-    TSimdFloat32x3 yyy = tfSimd3fSplatIndex1(a1.mRow);
-    TSimdFloat32x3 zzz = tfSimd3fSplatIndex2(a1.mRow);
-    TSimdFloat32x3 res = tfSimd3fMul(a0.mCol0, xxx);
-    res = tfSimd3fMadd(a0.mCol1, yyy, res);
-    res = tfSimd3fMadd(a0.mCol2, zzz, res);
+    Tsimd_f32x3_t xxx = tfS32x3FSplatIndex0(a1.mRow);
+    Tsimd_f32x3_t yyy = tfS32x3FSplatIndex1(a1.mRow);
+    Tsimd_f32x3_t zzz = tfS32x3FSplatIndex2(a1.mRow);
+    Tsimd_f32x3_t res = tfS32x3FMul(a0.mCol0, xxx);
+    res = tfS32x3FMadd(a0.mCol1, yyy, res);
+    res = tfS32x3FMadd(a0.mCol2, zzz, res);
     return { res };
 }
 
@@ -52,19 +52,19 @@ static inline TSimdFloat3 tfVectorMul3x3F(const TSimdFloat3x3 a0, const TSimdFlo
 static inline TSimdFloat3 tfGetRowSimd3x4F(TSimdFloat4x3 input, int row) {
   ASSERT(row >= 0 && row < 4);
   switch(row) {
-    case 0: return {tfSimdFloat3x32Load(
+    case 0: return {tfSimd3x32FLoad(
                     tfSimd4fSelectIndex0(input.mCol0),
                     tfSimd4fSelectIndex0(input.mCol1),
                     tfSimd4fSelectIndex0(input.mCol2))};
-    case 1: return {tfSimdFloat3x32Load(
+    case 1: return {tfSimd3x32FLoad(
                     tfSimd4fSelectIndex1(input.mCol0),
                     tfSimd4fSelectIndex1(input.mCol1),
                     tfSimd4fSelectIndex1(input.mCol2))};
-    case 2: return {tfSimdFloat3x32Load(
+    case 2: return {tfSimd3x32FLoad(
                     tfSimd4fSelectIndex2(input.mCol0),
                     tfSimd4fSelectIndex2(input.mCol1),
                     tfSimd4fSelectIndex2(input.mCol2))};
-    case 3: return {tfSimdFloat3x32Load(
+    case 3: return {tfSimd3x32FLoad(
                     tfSimd4fSelectIndex3(input.mCol0),
                     tfSimd4fSelectIndex3(input.mCol1),
                     tfSimd4fSelectIndex3(input.mCol2))};
@@ -73,34 +73,6 @@ static inline TSimdFloat3 tfGetRowSimd3x4F(TSimdFloat4x3 input, int row) {
 
 }
 
-static inline TSimdFloat4 tfGetRowSimd4x4F(TSimdFloat4x4 input, int row) {
-  ASSERT(row >= 0 && row < 4);
-  switch(row) {
-    case 0: return {tfSimdFloat4x32Load(
-                    tfSimd4fSelectIndex0(input.mCol0),
-                    tfSimd4fSelectIndex0(input.mCol1),
-                    tfSimd4fSelectIndex0(input.mCol2),
-                    tfSimd4fSelectIndex0(input.mCol3)
-                    )};
-    case 1: return {tfSimdFloat4x32Load(
-                    tfSimd4fSelectIndex1(input.mCol0),
-                    tfSimd4fSelectIndex1(input.mCol1),
-                    tfSimd4fSelectIndex1(input.mCol2),
-                    tfSimd4fSelectIndex1(input.mCol3))};
-    case 2: return {tfSimdFloat4x32Load(
-                    tfSimd4fSelectIndex2(input.mCol0),
-                    tfSimd4fSelectIndex2(input.mCol1),
-                    tfSimd4fSelectIndex2(input.mCol2),
-                    tfSimd4fSelectIndex2(input.mCol3))};
-    case 3: return {tfSimdFloat4x32Load(
-                    tfSimd4fSelectIndex3(input.mCol0),
-                    tfSimd4fSelectIndex3(input.mCol1),
-                    tfSimd4fSelectIndex3(input.mCol2),
-                    tfSimd4fSelectIndex3(input.mCol3))};
-  }
-  return {};
-
-}
 
 
 static inline TSimdFloat2 tfGetRowSimd2x4F(TSimdFloat4x2 input, int row) {
@@ -134,35 +106,13 @@ static inline float tfGetRowSimd1x4F(TSimdFloat4x1 input, int row) {
 
 }
 
-static inline void tfSetElemSimd4x4F(TSimdFloat4x4* input, int col, int row, float value){
-  ASSERT(col >= 0 && col < 4);
-  ASSERT(row >= 0 && row < 4);
-  switch (row)
-  {
-    case 0: input->mCol[col] = tfSimd4fReplaceIndex0ByValue(input->mCol[col], value); break;
-    case 1: input->mCol[col] = tfSimd4fReplaceIndex1ByValue(input->mCol[col], value); break;
-    case 2: input->mCol[col] = tfSimd4fReplaceIndex2ByValue(input->mCol[col], value); break;
-    case 3: input->mCol[col] = tfSimd4fReplaceIndex3ByValue(input->mCol[col], value); break;
-  }
-}
-static inline void tfSetElemSimd3x4F(TSimdFloat4x3* input, int col, int row, float value){
-  ASSERT(col >= 0 && col < 3);
-  ASSERT(row >= 0 && row < 4);
-  switch (row)
-  {
-    case 0: input->mCol[col] = tfSimd4fReplaceIndex0ByValue(input->mCol[col], value); break;
-    case 1: input->mCol[col] = tfSimd4fReplaceIndex1ByValue(input->mCol[col], value); break;
-    case 2: input->mCol[col] = tfSimd4fReplaceIndex2ByValue(input->mCol[col], value); break;
-    case 3: input->mCol[col] = tfSimd4fReplaceIndex3ByValue(input->mCol[col], value); break;
-  }
-}
 static inline void tfSetElemSimd2x4F(TSimdFloat4x2* input, int col, int row, float value){
   ASSERT(col >= 0 && col < 2);
   ASSERT(row >= 0 && row < 4);
   switch (row)
   {
-    case 0: input->mCol[col] = tfSimd4fReplaceIndex0ByValue(input->mCol[col], value); break;
-    case 1: input->mCol[col] = tfSimd4fReplaceIndex1ByValue(input->mCol[col], value); break;
+    case 0: input->mCol[col] = tfSimdFloat4ReplaceIndex0ByValue(input->mCol[col], value); break;
+    case 1: input->mCol[col] = tfSimdFloat4ReplaceIndex1ByValue(input->mCol[col], value); break;
     case 2: input->mCol[col] = tfSimd4fReplaceIndex2ByValue(input->mCol[col], value); break;
     case 3: input->mCol[col] = tfSimd4fReplaceIndex3ByValue(input->mCol[col], value); break;
   }
@@ -172,8 +122,8 @@ static inline void tfSetElemSimd1x4F(TSimdFloat4x1* input, int col, int row, flo
   ASSERT(row >= 0 && row < 4);
   switch (row)
   {
-    case 0: input->mCol[col] = tfSimd4fReplaceIndex0ByValue(input->mCol[col], value); break;
-    case 1: input->mCol[col] = tfSimd4fReplaceIndex1ByValue(input->mCol[col], value); break;
+    case 0: input->mCol[col] = tfSimdFloat4ReplaceIndex0ByValue(input->mCol[col], value); break;
+    case 1: input->mCol[col] = tfSimdFloat4ReplaceIndex1ByValue(input->mCol[col], value); break;
     case 2: input->mCol[col] = tfSimd4fReplaceIndex2ByValue(input->mCol[col], value); break;
     case 3: input->mCol[col] = tfSimd4fReplaceIndex3ByValue(input->mCol[col], value); break;
   }
@@ -261,9 +211,9 @@ static inline TSimdFloat3x3 tfLoadSimd3x3F(
   float m20, float m21, float m22
 ) {
     TSimdFloat3x3 res;
-    res.mCol0 = tfSimdFloat3x32Load(m00, m10, m20);
-    res.mCol1 = tfSimdFloat3x32Load(m01, m11, m21);
-    res.mCol2 = tfSimdFloat3x32Load(m02, m12, m22);
+    res.mCol0 = tfSimd3x32FLoad(m00, m10, m20);
+    res.mCol1 = tfSimd3x32FLoad(m01, m11, m21);
+    res.mCol2 = tfSimd3x32FLoad(m02, m12, m22);
     return res;
 }
 static inline TSimdFloat3x2 tfLoadSimd2x3F(
@@ -272,8 +222,8 @@ static inline TSimdFloat3x2 tfLoadSimd2x3F(
   float m20, float m21
 ) {
     TSimdFloat3x2 res;
-    res.mCol0 = tfSimdFloat3x32Load(m00, m10, m20);
-    res.mCol1 = tfSimdFloat3x32Load(m01, m11, m21);
+    res.mCol0 = tfSimd3x32FLoad(m00, m10, m20);
+    res.mCol1 = tfSimd3x32FLoad(m01, m11, m21);
     return res;
 }
 static inline TSimdFloat3x1 tfLoadSimd1x3F(
@@ -282,7 +232,7 @@ static inline TSimdFloat3x1 tfLoadSimd1x3F(
   float m20 
 ) {
     TSimdFloat3x1 res;
-    res.mCol0 = tfSimdFloat3x32Load(m00, m10, m20);
+    res.mCol0 = tfSimd3x32FLoad(m00, m10, m20);
     return res;
 }
 
@@ -291,9 +241,9 @@ static inline void tfSetElemSimd3x3F(TSimdFloat3x3* input, int col, int row, flo
   ASSERT(row >= 0 && row < 3);
   switch (row)
   {
-    case 0: input->mCol[col] = tfSimdFloat3x32ReplaceIndex0ByValue(input->mCol[col], value); break;
-    case 1: input->mCol[col] = tfSimdFloat3x32ReplaceIndex1ByValue(input->mCol[col], value); break;
-    case 2: input->mCol[col] = tfSimdFloat3x32ReplaceIndex2ByValue(input->mCol[col], value); break;
+    case 0: input->mCol[col] = tfSimd3x32FReplaceIndex0ByValue(input->mCol[col], value); break;
+    case 1: input->mCol[col] = tfSimd3x32FReplaceIndex1ByValue(input->mCol[col], value); break;
+    case 2: input->mCol[col] = tfSimd3x32FReplaceIndex2ByValue(input->mCol[col], value); break;
   }
 }
 
@@ -303,9 +253,9 @@ static inline void tfSetElemSimd2x3F(TSimdFloat3x2* input, int col, int row, flo
   ASSERT(row >= 0 && row < 3);
   switch (row)
   {
-    case 0: input->mCol[col] = tfSimdFloat3x32ReplaceIndex0ByValue(input->mCol[col], value); break;
-    case 1: input->mCol[col] = tfSimdFloat3x32ReplaceIndex1ByValue(input->mCol[col], value); break;
-    case 2: input->mCol[col] = tfSimdFloat3x32ReplaceIndex2ByValue(input->mCol[col], value); break;
+    case 0: input->mCol[col] = tfSimd3x32FReplaceIndex0ByValue(input->mCol[col], value); break;
+    case 1: input->mCol[col] = tfSimd3x32FReplaceIndex1ByValue(input->mCol[col], value); break;
+    case 2: input->mCol[col] = tfSimd3x32FReplaceIndex2ByValue(input->mCol[col], value); break;
   }
 }
 
@@ -314,9 +264,9 @@ static inline void tfSetElemSimd1x3F(TSimdFloat3x1* input, int col, int row, flo
   ASSERT(row >= 0 && row < 3);
   switch (row)
   {
-    case 0: input->mCol[col] = tfSimdFloat3x32ReplaceIndex0ByValue(input->mCol[col], value); break;
-    case 1: input->mCol[col] = tfSimdFloat3x32ReplaceIndex1ByValue(input->mCol[col], value); break;
-    case 2: input->mCol[col] = tfSimdFloat3x32ReplaceIndex2ByValue(input->mCol[col], value); break;
+    case 0: input->mCol[col] = tfSimd3x32FReplaceIndex0ByValue(input->mCol[col], value); break;
+    case 1: input->mCol[col] = tfSimd3x32FReplaceIndex1ByValue(input->mCol[col], value); break;
+    case 2: input->mCol[col] = tfSimd3x32FReplaceIndex2ByValue(input->mCol[col], value); break;
   }
 }
 
@@ -324,27 +274,27 @@ static inline float tfGetElemSimd3F(TSimdFloat3 a, int elem) {
     ASSERT(elem >= 0 && elem < 3);
     switch (elem) {
     case 0:
-        return tfSimd3fSelectIndex0(a.mRow);
+        return tfS32x3FSelectIndex0(a.mRow);
     case 1:
-        return tfSimd3fSelectIndex1(a.mRow);
+        return tfS32x3FSelectIndex1(a.mRow);
     case 2:
-        return tfSimd3fSelectIndex2(a.mRow);
+        return tfS32x3FSelectIndex2(a.mRow);
     }
     return 0;
 }
-static inline float tfGetXSimd3F(TSimdFloat3 a) { return tfSimd3fSelectIndex0(a.mRow); }
-static inline float tfGetYSimd3F(TSimdFloat3 a) { return tfSimd3fSelectIndex1(a.mRow); }
-static inline float tfGetZSimd3F(TSimdFloat3 a) { return tfSimd3fSelectIndex2(a.mRow); }
+static inline float tfGetXSimd3F(TSimdFloat3 a) { return tfS32x3FSelectIndex0(a.mRow); }
+static inline float tfGetYSimd3F(TSimdFloat3 a) { return tfS32x3FSelectIndex1(a.mRow); }
+static inline float tfGetZSimd3F(TSimdFloat3 a) { return tfS32x3FSelectIndex2(a.mRow); }
 
 static inline bool tfIsCloseSimd3F(TSimdFloat3 a, TSimdFloat3 b, float epsilon) {
-    return tfSimdFloat32x3CmpAllLt(tfSimd3fAbs(tfSimd3fSub(a.mRow, b.mRow)), tfSimd3fSplat(epsilon));
+    return tfSimdFloat32x3CmpAllLt(tfS32x3FAbs(tfS32x3FSub(a.mRow, b.mRow)), tfS32x3FSplat(epsilon));
 }
 
 static inline float tfVectorDot3F(TSimdFloat3 a0, TSimdFloat3 a1) {
-    TSimdFloat32x3 x2 = tfSimd3fMul(a0.mRow, a1.mRow);
-    TSimdFloat32x3 xy = tfSimd3fAdd(tfSimd3fSplatIndex1(x2), x2);
-    TSimdFloat32x3 xyz = tfSimd3fAdd(tfSimd3fSplatIndex2(x2), xy);
-    return tfSimd3fSelectIndex0(xyz);
+    Tsimd_f32x3_t x2 = tfS32x3FMul(a0.mRow, a1.mRow);
+    Tsimd_f32x3_t xy = tfS32x3FAdd(tfS32x3FSplatIndex1(x2), x2);
+    Tsimd_f32x3_t xyz = tfS32x3FAdd(tfS32x3FSplatIndex2(x2), xy);
+    return tfS32x3FSelectIndex0(xyz);
 }
 
 
@@ -385,10 +335,10 @@ static inline TSimdFloat2 tfLoadSimd2F(float x, float y) {
 }
 
 static inline TSimdFloat2 tfVectorMul2x2F(const TSimdFloat2x2 a0, const TSimdFloat2 a1) {
-    TSimdFloat32x2 xx = tfSimd2fSplatIndex0(a1.mRow);
-    TSimdFloat32x2 yy = tfSimd2fSplatIndex1(a1.mRow);
-    TSimdFloat32x2 res = tfSimd2fMul(a0.mCol0, xx);
-    res = tfSimd2fMadd(a0.mCol1, yy, res);
+    Tsimd_f32x2_t xx = tfS32x2FSplatIndex0(a1.mRow);
+    Tsimd_f32x2_t yy = tfS32x2FSplatIndex1(a1.mRow);
+    Tsimd_f32x2_t res = tfS32x2FMul(a0.mCol0, xx);
+    res = tfS32x2FMadd(a0.mCol1, yy, res);
     return { res };
 }
 
@@ -396,24 +346,24 @@ static inline float tfGetElemSimd2F(TSimdFloat2 a, int elem) {
     ASSERT(elem >= 0 && elem < 2);
     switch (elem) {
     case 0:
-        return tfSimd2fSelectIndex0(a.mRow);
+        return tfS32x2FSelectIndex0(a.mRow);
     case 1:
-        return tfSimd2fSelectIndex1(a.mRow);
+        return tfS32x2FSelectIndex1(a.mRow);
     }
     return 0;
 }
-static inline float tfGetXSimd2F(TSimdFloat2 a) { return tfSimd2fSelectIndex0(a.mRow); }
-static inline float tfGetYSimd2F(TSimdFloat2 a) { return tfSimd2fSelectIndex1(a.mRow); }
+static inline float tfGetXSimd2F(TSimdFloat2 a) { return tfS32x2FSelectIndex0(a.mRow); }
+static inline float tfGetYSimd2F(TSimdFloat2 a) { return tfS32x2FSelectIndex1(a.mRow); }
 
-static inline TSimdFloat3 tfVectorEleDiv3F(TSimdFloat3 a0, TSimdFloat3 a1) { return { tfSimd3fDiv(a0.mRow, a1.mRow) }; }
-static inline TSimdFloat2 tfVectorEleDiv2F(TSimdFloat2 a0, TSimdFloat2 a1) { return { tfSimd2fDiv(a0.mRow, a1.mRow) }; }
+static inline TSimdFloat3 tfVectorEleDiv3F(TSimdFloat3 a0, TSimdFloat3 a1) { return { tfS32x3FDiv(a0.mRow, a1.mRow) }; }
+static inline TSimdFloat2 tfVectorEleDiv2F(TSimdFloat2 a0, TSimdFloat2 a1) { return { tfS32x2FDiv(a0.mRow, a1.mRow) }; }
 
-static inline TSimdFloat3 tfVectorEleSub3F(TSimdFloat3 a0, TSimdFloat3 a1) { return { tfSimd3fSub(a0.mRow, a1.mRow) }; }
-static inline TSimdFloat2 tfVectorEleSub2F(TSimdFloat2 a0, TSimdFloat2 a1) { return { tfSimd2fSub(a0.mRow, a1.mRow) }; }
+static inline TSimdFloat3 tfVectorEleSub3F(TSimdFloat3 a0, TSimdFloat3 a1) { return { tfS32x3FSub(a0.mRow, a1.mRow) }; }
+static inline TSimdFloat2 tfVectorEleSub2F(TSimdFloat2 a0, TSimdFloat2 a1) { return { tfS32x2FSub(a0.mRow, a1.mRow) }; }
 
-static inline TSimdFloat3 tfVectorEleAdd3F(TSimdFloat3 a0, TSimdFloat3 a1) { return { tfSimd3fAdd(a0.mRow, a1.mRow) }; }
-static inline TSimdFloat2 tfVectorEleAdd2F(TSimdFloat2 a0, TSimdFloat2 a1) { return { tfSimd2fAdd(a0.mRow, a1.mRow) }; }
+static inline TSimdFloat3 tfVectorEleAdd3F(TSimdFloat3 a0, TSimdFloat3 a1) { return { tfS32x3FAdd(a0.mRow, a1.mRow) }; }
+static inline TSimdFloat2 tfVectorEleAdd2F(TSimdFloat2 a0, TSimdFloat2 a1) { return { tfS32x2FAdd(a0.mRow, a1.mRow) }; }
 
-static inline TSimdFloat3 tfVectorEleMul3F(TSimdFloat3 a0, TSimdFloat3 a1) { return { tfSimd3fMul(a0.mRow, a1.mRow) }; }
-static inline TSimdFloat2 tfVectorEleMul2F(TSimdFloat2 a0, TSimdFloat2 a1) { return { tfSimd2fMul(a0.mRow, a1.mRow) }; }
+static inline TSimdFloat3 tfVectorEleMul3F(TSimdFloat3 a0, TSimdFloat3 a1) { return { tfS32x3FMul(a0.mRow, a1.mRow) }; }
+static inline TSimdFloat2 tfVectorEleMul2F(TSimdFloat2 a0, TSimdFloat2 a1) { return { tfS32x2FMul(a0.mRow, a1.mRow) }; }
 
diff --git a/Forge/Math/Internal/TF_SimdFloat3.inl b/Forge/Math/Internal/TF_SimdFloat3.inl
index 8aec56c142..6bcb4ba6df 100644
--- a/Forge/Math/Internal/TF_SimdFloat3.inl
+++ b/Forge/Math/Internal/TF_SimdFloat3.inl
@@ -4,14 +4,14 @@
 
 #include "Forge/Math/TF_Simd32x3.h"
 
-static inline TSimdFloat3 tfLoadZeroSimd3x3F() {
-    TSimdFloat3 res;
-    res.mRow = tfSimd3fZero();
+static inline TSimdFloat3x3 tfLoadZeroSimd3x3F() {
+    TSimdFloat3x3 res;
+    //res.mRow = tfS32x3FZero();
     return res;
 }
 
 static inline TSimdFloat3 tfLoadSimd3F(float x, float y, float z) {
     TSimdFloat3 res;
-    res.mRow = tfSimdFloat3x32Load(x, y, z);
+    res.mRow = tfSimd3x32FLoad(x, y, z);
     return res;
 }
diff --git a/Forge/Math/Internal/TF_SimdFloat4.inl b/Forge/Math/Internal/TF_SimdFloat4.inl
index 6f5aecc9a0..3f249dd684 100644
--- a/Forge/Math/Internal/TF_SimdFloat4.inl
+++ b/Forge/Math/Internal/TF_SimdFloat4.inl
@@ -14,17 +14,17 @@ static inline float tfGetElemSimd4F(TSimdFloat4 a, int elem) {
     ASSERT(elem >= 0 && elem < 4);
     switch (elem) {
     case 0:
-        return tfSimd4fSelectIndex0(a.mRow);
+        return tfS32x4FSelectIndex0(a.mRow);
     case 1:
-        return tfSimd4fSelectIndex1(a.mRow);
+        return tfS32x4FSelectIndex1(a.mRow);
     case 2:
-        return tfSimd4fSelectIndex2(a.mRow);
+        return tfS32x4FSelectIndex2(a.mRow);
     case 3:
-        return tfSimd4fSelectIndex3(a.mRow);
+        return tfS32x4FSelectIndex3(a.mRow);
     }
     return 0;
 }
-static inline float tfGetXSimd4F(TSimdFloat4 a) { return tfSimd4fSelectIndex0(a.mRow); }
-static inline float tfGetYSimd4F(TSimdFloat4 a) { return tfSimd4fSelectIndex1(a.mRow); }
-static inline float tfGetZSimd4F(TSimdFloat4 a) { return tfSimd4fSelectIndex2(a.mRow); }
-static inline float tfGetWSimd4F(TSimdFloat4 a) { return tfSimd4fSelectIndex3(a.mRow); }
+static inline float tfGetXSimd4F(TSimdFloat4 a) { return tfS32x4FSelectIndex0(a.mRow); }
+static inline float tfGetYSimd4F(TSimdFloat4 a) { return tfS32x4FSelectIndex1(a.mRow); }
+static inline float tfGetZSimd4F(TSimdFloat4 a) { return tfS32x4FSelectIndex2(a.mRow); }
+static inline float tfGetWSimd4F(TSimdFloat4 a) { return tfS32x4FSelectIndex3(a.mRow); }
diff --git a/Forge/Math/Internal/TF_SimdFloat4x4.inl b/Forge/Math/Internal/TF_SimdFloat4x4.inl
index 0aa2f9c727..53cab22b42 100644
--- a/Forge/Math/Internal/TF_SimdFloat4x4.inl
+++ b/Forge/Math/Internal/TF_SimdFloat4x4.inl
@@ -5,12 +5,66 @@
 
 #include "Forge/Math/TF_Simd32x4.h"
 
-static inline TSimdFloat4 tfVectorMul4x4F(const TSimdFloat4x4 a0, const TSimdFloat4 a1) {
-    TSimdFloat32x4 xxxx = tfSimd4fSplatIndex0(a1.mRow);
-    TSimdFloat32x4 yyyy = tfSimd4fSplatIndex1(a1.mRow);
-    TSimdFloat32x4 zzzz = tfSimd4fSplatIndex2(a1.mRow);
-    TSimdFloat32x4 wwww = tfSimd4fSplatIndex3(a1.mRow);
-    TSimdFloat32x4 res = tfSimd4fMul(a0.mCol0, xxxx);
+static inline TSimdFloat4 tfGetRowSimd4x4F(TSimdFloat4x4 input, int row) {
+  ASSERT(row >= 0 && row < 4);
+  switch(row) {
+    case 0: return {tfS32x4FLoad(
+                    tfSimd4fSelectIndex0(input.mCol0),
+                    tfSimd4fSelectIndex0(input.mCol1),
+                    tfSimd4fSelectIndex0(input.mCol2),
+                    tfSimd4fSelectIndex0(input.mCol3)
+                    )};
+    case 1: return {tfS32x4FLoad(
+                    tfS32x4FSelectIndex1(input.mCol0),
+                    tfS32x4FSelectIndex1(input.mCol1),
+                    tfS32x4FSelectIndex1(input.mCol2),
+                    tfS32x4FSelectIndex1(input.mCol3))};
+    case 2: return {tfS32x4FLoad(
+                    tfS32x4FSelectIndex2(input.mCol0),
+                    tfS32x4FSelectIndex2(input.mCol1),
+                    tfS32x4FSelectIndex2(input.mCol2),
+                   tfS32x4FSelectIndex2(input.mCol3))};
+    case 3: return {tfS32x4FLoad(
+                    tfS32x4FSelectIndex3(input.mCol0),
+                    tfS32x4FSelectIndex3(input.mCol1),
+                    tfS32x4FSelectIndex3(input.mCol2),
+                    tfS32x4FSelectIndex3(input.mCol3))};
+  }
+  return {};
+}
+
+static inline void tfSetRowSimd4x4F(TSimdFloat4x4* input, int row, TSimdFloat4 value) {
+   // ASSERT(row >= 0 && row < 4);
+   // switch (row) {
+   // case 0:
+   //     input->mCol0 = tfS32x4FReplaceIndex0(input->mCol0, tfS32x4FSplatIndex0(value));
+   // }
+    return {};
+}
+static inline TSimdFloat4 tfGetColumnSimd4x4F(int column);
+static inline void tfSetColumnSimd4x4F(TSimdFloat4x4* input, int row);
+static inline void tfSetElemSimd4x4F(TSimdFloat4x4* input, int col, int row, float value);
+
+
+//static inline void tfSetElemSimd3x4F(TSimdFloat4x3* input, int col, int row, float value){
+//  ASSERT(col >= 0 && col < 3);
+//  ASSERT(row >= 0 && row < 4);
+//  switch (row)
+//  {
+//    case 0: input->mCol[col] = tfSimd4fReplaceIndex0ByValue(input->mCol[col], value); break;
+//    case 1: input->mCol[col] = tfSimd4fReplaceIndex1ByValue(input->mCol[col], value); break;
+//    case 2: input->mCol[col] = tfSimd4fReplaceIndex2ByValue(input->mCol[col], value); break;
+//    case 3: input->mCol[col] = tfSimd4fReplaceIndex3ByValue(input->mCol[col], value); break;
+//  }
+//}
+
+
+static inline TSimdFloat4 tfVectorMulSimd4x4F(const TSimdFloat4x4 a0, const TSimdFloat4 a1) {
+    Tsimd_f32x4_t xxxx = tfSimd4fSplatIndex0(a1.mRow);
+    Tsimd_f32x4_t yyyy = tfSimd4fSplatIndex1(a1.mRow);
+    Tsimd_f32x4_t zzzz = tfSimd4fSplatIndex2(a1.mRow);
+    Tsimd_f32x4_t wwww = tfSimd4fSplatIndex3(a1.mRow);
+    Tsimd_f32x4_t res = tfSimd4fMul(a0.mCol0, xxxx);
     res = tfSimd4fMadd(a0.mCol1, yyyy, res);
     res = tfSimd4fMadd(a0.mCol2, zzzz, res);
     res = tfSimd4fMadd(a0.mCol3, wwww, res);
@@ -37,30 +91,30 @@ static inline TSimdFloat4x4 tfLoadIdentitySimd4x4F()
     return value;
 }
 
-static inline TSimdFloat4x4 tfMatMul4x4F_4x4F(TSimdFloat4x4 a0, TSimdFloat4x4 a1) {
+static inline TSimdFloat4x4 tfMulSimd4x4F_4x4F(TSimdFloat4x4 a0, TSimdFloat4x4 a1) {
     TSimdFloat4x4 res;
-    res.mCol0 = tfVectorMul4x4F(a0, TSimdFloat4{ a1.mCol0 }).mRow;
-    res.mCol1 = tfVectorMul4x4F(a0, TSimdFloat4{ a1.mCol1 }).mRow;
-    res.mCol2 = tfVectorMul4x4F(a0, TSimdFloat4{ a1.mCol2 }).mRow;
-    res.mCol3 = tfVectorMul4x4F(a0, TSimdFloat4{ a1.mCol3 }).mRow;
+    res.mCol0 = tfVectorMulSimd4x4F(a0, TSimdFloat4{ a1.mCol0 }).mRow;
+    res.mCol1 = tfVectorMulSimd4x4F(a0, TSimdFloat4{ a1.mCol1 }).mRow;
+    res.mCol2 = tfVectorMulSimd4x4F(a0, TSimdFloat4{ a1.mCol2 }).mRow;
+    res.mCol3 = tfVectorMulSimd4x4F(a0, TSimdFloat4{ a1.mCol3 }).mRow;
     return res;
 }
 
-static inline TSimdFloat4x3 tfMatMul4x4F_3x4F(TSimdFloat4x4 a0, TSimdFloat4x3 a1) {
+static inline TSimdFloat4x3 tfMulSimd4x4F_3x4F(TSimdFloat4x4 a0, TSimdFloat4x3 a1) {
     TSimdFloat4x3 res;
-    res.mCol0 = tfVectorMul4x4F(a0, TSimdFloat4{ a1.mCol0 }).mRow;
-    res.mCol1 = tfVectorMul4x4F(a0, TSimdFloat4{ a1.mCol1 }).mRow;
-    res.mCol2 = tfVectorMul4x4F(a0, TSimdFloat4{ a1.mCol2 }).mRow;
+    res.mCol0 = tfVectorMulSimd4x4F(a0, TSimdFloat4{ a1.mCol0 }).mRow;
+    res.mCol1 = tfVectorMulSimd4x4F(a0, TSimdFloat4{ a1.mCol1 }).mRow;
+    res.mCol2 = tfVectorMulSimd4x4F(a0, TSimdFloat4{ a1.mCol2 }).mRow;
     return res;
 }
-static inline TSimdFloat4x2 tfMatMul4x4F_2x4F(TSimdFloat4x4 a0, TSimdFloat4x2 a1) {
+static inline TSimdFloat4x2 tfMulSimd4x4F_2x4F(TSimdFloat4x4 a0, TSimdFloat4x2 a1) {
     TSimdFloat4x2 res;
-    res.mCol0 = tfVectorMul4x4F(a0, TSimdFloat4{ a1.mCol0 }).mRow;
-    res.mCol1 = tfVectorMul4x4F(a0, TSimdFloat4{ a1.mCol1 }).mRow;
+    res.mCol0 = tfVectorMulSimd4x4F(a0, TSimdFloat4{ a1.mCol0 }).mRow;
+    res.mCol1 = tfVectorMulSimd4x4F(a0, TSimdFloat4{ a1.mCol1 }).mRow;
     return res;
 }
-static inline TSimdFloat4x1 tfMatMul4x4F_1x4F(TSimdFloat4x4 a0, TSimdFloat4x1 a1) {
+static inline TSimdFloat4x1 tfMulSimd4x4F_1x4F(TSimdFloat4x4 a0, TSimdFloat4x1 a1) {
     TSimdFloat4x1 res;
-    res.mCol0 = tfVectorMul4x4F(a0, TSimdFloat4{ a1.mCol0 }).mRow;
+    res.mCol0 = tfVectorMulSimd4x4F(a0, TSimdFloat4{ a1.mCol0 }).mRow;
     return res;
 }
diff --git a/Forge/Math/Internal/TF_SimdFloat4x4_neon.inl b/Forge/Math/Internal/TF_SimdFloat4x4_neon.inl
index f177452c3b..40cefac482 100644
--- a/Forge/Math/Internal/TF_SimdFloat4x4_neon.inl
+++ b/Forge/Math/Internal/TF_SimdFloat4x4_neon.inl
@@ -4,15 +4,15 @@
 #include "../TF_SimdFloat4x4.h"
 #endif
 
-static inline TSimdFloat4x4 tfMatTranpose4x4F(TSimdFloat4x4 a0) {
+static inline TSimdFloat4x4 tfTransposeSimd4x4F(TSimdFloat4x4 a0) {
     // abcd    aecg    aeim
     // efgh -> bfdh -> bfjn
     // ijkl    imko    cgko
     // mnop    jnlp    dhlp
-    const TSimdFloat32x4  tmp0 = vtrn1q_f32(a0.mCol0, a0.mCol1);
-    const TSimdFloat32x4  tmp1 = vtrn2q_f32(a0.mCol0, a0.mCol1);
-    const TSimdFloat32x4  tmp2 = vtrn1q_f32(a0.mCol2, a0.mCol3);
-    const TSimdFloat32x4  tmp3 = vtrn2q_f32(a0.mCol2, a0.mCol3);
+    const TSimd32Fx4  tmp0 = vtrn1q_f32(a0.mCol0, a0.mCol1);
+    const TSimd32Fx4  tmp1 = vtrn2q_f32(a0.mCol0, a0.mCol1);
+    const TSimd32Fx4  tmp2 = vtrn1q_f32(a0.mCol2, a0.mCol3);
+    const TSimd32Fx4  tmp3 = vtrn2q_f32(a0.mCol2, a0.mCol3);
     TSimdFloat4x4  result;
     result.mCol0 = vtrn1q_f64(tmp0, tmp2);
     result.mCol1 = vtrn1q_f64(tmp1, tmp3);
diff --git a/Forge/Math/Internal/TF_SimdFloat4x4_scalar.inl b/Forge/Math/Internal/TF_SimdFloat4x4_scalar.inl
index b52a058436..4d29a1b84b 100644
--- a/Forge/Math/Internal/TF_SimdFloat4x4_scalar.inl
+++ b/Forge/Math/Internal/TF_SimdFloat4x4_scalar.inl
@@ -4,11 +4,11 @@
 #include "../TF_SimdFloat4x4.h"
 #endif
 
-static inline TSimdFloat4x4 tfMatTranpose4x4F(TSimdFloat4x4 a0) {
-    TSimdFloat32x4 cols0 = { { a0.mCol0.v[0], a0.mCol1.v[0], a0.mCol2.v[0], a0.mCol3.v[0] } };
-    TSimdFloat32x4 cols1 = { { a0.mCol0.v[1], a0.mCol1.v[1], a0.mCol2.v[1], a0.mCol3.v[1] } };
-    TSimdFloat32x4 cols2 = { { a0.mCol0.v[2], a0.mCol1.v[2], a0.mCol2.v[2], a0.mCol3.v[2] } };
-    TSimdFloat32x4 cols3 = { { a0.mCol0.v[3], a0.mCol1.v[3], a0.mCol2.v[3], a0.mCol3.v[3] } };
+static inline TSimdFloat4x4 tfTransposeSimd4x4F(TSimdFloat4x4 a0) {
+    Tsimd_f32x4_t cols0 = { { a0.mCol0.v[0], a0.mCol1.v[0], a0.mCol2.v[0], a0.mCol3.v[0] } };
+    Tsimd_f32x4_t cols1 = { { a0.mCol0.v[1], a0.mCol1.v[1], a0.mCol2.v[1], a0.mCol3.v[1] } };
+    Tsimd_f32x4_t cols2 = { { a0.mCol0.v[2], a0.mCol1.v[2], a0.mCol2.v[2], a0.mCol3.v[2] } };
+    Tsimd_f32x4_t cols3 = { { a0.mCol0.v[3], a0.mCol1.v[3], a0.mCol2.v[3], a0.mCol3.v[3] } };
     TSimdFloat4x4  result;
     result.mCol0 = cols0;
     result.mCol1 = cols1;
diff --git a/Forge/Math/Internal/TF_SimdFloat4x4_sse.inl b/Forge/Math/Internal/TF_SimdFloat4x4_sse.inl
index 5a178a0914..751cde3813 100644
--- a/Forge/Math/Internal/TF_SimdFloat4x4_sse.inl
+++ b/Forge/Math/Internal/TF_SimdFloat4x4_sse.inl
@@ -4,11 +4,11 @@
 #include "../TF_SimdFloat4x4.h"
 #endif
 
-static inline TSimdFloat4x4 tfMatTranpose4x4F(TSimdFloat4x4 a0) {
-    TSimdFloat32x4 tmp0 = _mm_shuffle_ps(a0.mCol0, a0.mCol1, 0x44);
-    TSimdFloat32x4 tmp2 = _mm_shuffle_ps(a0.mCol0, a0.mCol1, 0xEE);
-    TSimdFloat32x4 tmp1 = _mm_shuffle_ps(a0.mCol2, a0.mCol3, 0x44);
-    TSimdFloat32x4 tmp3 = _mm_shuffle_ps(a0.mCol2, a0.mCol3, 0xEE);
+static inline TSimdFloat4x4 tfTransposeSimd4x4F(TSimdFloat4x4 a0) {
+    Tsimd_f32x4_t tmp0 = _mm_shuffle_ps(a0.mCol0, a0.mCol1, 0x44);
+    Tsimd_f32x4_t tmp2 = _mm_shuffle_ps(a0.mCol0, a0.mCol1, 0xEE);
+    Tsimd_f32x4_t tmp1 = _mm_shuffle_ps(a0.mCol2, a0.mCol3, 0x44);
+    Tsimd_f32x4_t tmp3 = _mm_shuffle_ps(a0.mCol2, a0.mCol3, 0xEE);
     TSimdFloat4x4  result;
     result.mCol0 = _mm_shuffle_ps(tmp0, tmp1, 0x88);
     result.mCol1 = _mm_shuffle_ps(tmp0, tmp1, 0xDD);
diff --git a/Forge/Math/Internal/TF_SimdFloat_sse.inl b/Forge/Math/Internal/TF_SimdFloat_sse.inl
index d559c9b678..ce6e055ca2 100644
--- a/Forge/Math/Internal/TF_SimdFloat_sse.inl
+++ b/Forge/Math/Internal/TF_SimdFloat_sse.inl
@@ -5,9 +5,9 @@
 #endif
 
 
-static inline TSimdFloat32x4 ___Simd4x324Dot4(TSimdFloat4 a0, TSimdFloat4 a1) {
-    TSimdFloat32x4 x2 = _mm_mul_ps(a0.mRow, a1.mRow);
-    TSimdFloat32x4 tmp = _mm_add_ps(x2, _mm_shuffle_ps(x2, x2, _MM_SHUFFLE(2, 3, 0, 1)));
+static inline Tsimd_f32x4_t ___Simd4x324Dot4(TSimdFloat4 a0, TSimdFloat4 a1) {
+    Tsimd_f32x4_t x2 = _mm_mul_ps(a0.mRow, a1.mRow);
+    Tsimd_f32x4_t tmp = _mm_add_ps(x2, _mm_shuffle_ps(x2, x2, _MM_SHUFFLE(2, 3, 0, 1)));
     return _mm_add_ps(tmp, _mm_shuffle_ps(tmp, tmp, _MM_SHUFFLE(1, 0, 2, 3)));
 }
 
diff --git a/Forge/Math/TF_Simd32x2.h b/Forge/Math/TF_Simd32x2.h
index 571186ae2b..58921c93f4 100644
--- a/Forge/Math/TF_Simd32x2.h
+++ b/Forge/Math/TF_Simd32x2.h
@@ -12,75 +12,75 @@
 
 #include "Forge/Math/Internal/SimdTypes.h"
 
-inline TSimdFloat32x2 tfSimd2fSplat(float value);
-inline TSimdInt32x2 tfSimd2iSplat(int32_t value);
-
-inline TSimdFloat32x2 tfSimd2fZero();
-inline TSimdInt32x2 tfSimd2iZero();
-
-inline TSimdInt32x2 tfSimd2fToSimd2i(TSimdFloat32x2 value);
-inline TSimdFloat32x2 tfSimd2iToSimd2f(TSimdInt32x2 value);
-
-inline TSimdFloat32x2 tfSimd2fSplatIndex0(TSimdFloat32x2 value);
-inline TSimdFloat32x2 tfSimd2fSplatIndex1(TSimdFloat32x2 value);
-
-static inline TSimdFloat32x4 tfSimdFloat2To4Splat0(TSimdFloat32x2 value); 
-static inline TSimdFloat32x4 tfSimdFloat2To4Splat1(TSimdFloat32x2 value); 
-
-inline TSimdFloat32x2 tfSimd2fSelect(TSimdFloat32x2 arg0, TSimdFloat32x2 arg1, TSimdFloat32x2 mask);
-inline TSimdInt32x2 tfSimd2iSelect(TSimdInt32x2 arg0, TSimdInt32x2 arg1, TSimdInt32x2 mask);
-
-inline float tfSimd2fSelectIndex0(TSimdFloat32x2 value);
-inline float tfSimd2fSelectIndex1(TSimdFloat32x2 value);
-
-inline TSimdFloat32x2 tfSimdFloat2x32Load(float x, float y);
-inline TSimdInt32x2   tfSimdInt2x32Load(int32_t x, int32_t y);
-
-inline TSimdFloat32x2 tfSimd2fAdd(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2);
-inline TSimdFloat32x2 tfSimd2fSub(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2);
-inline TSimdFloat32x2 tfSimd2fMul(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2);
-inline TSimdFloat32x2 tfSimd2fMadd(TSimdFloat32x2 mul1, TSimdFloat32x2 mul2, TSimdFloat32x2 add);
-inline TSimdFloat32x2 tfSimd2fDiv(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2);
-inline TSimdFloat32x2 tfSimd2fAbs(TSimdFloat32x2 value);
-
-inline TSimdFloat32x2 tfSimd2fNot(TSimdFloat32x2 value);
-inline TSimdFloat32x2 tfSimd2fAnd(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2);
-inline TSimdFloat32x2 tfSimd2fAndNot(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2);
-inline TSimdFloat32x2 tfSimd2fOr(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2);
-inline TSimdFloat32x2 tfSimd2fXor(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2);
-
-inline TSimdInt32x2 tfSimd2iNot(TSimdInt32x2 value);
-inline TSimdInt32x2 tfSimd2iAnd(TSimdInt32x2 arg1, TSimdInt32x2 arg2);
-inline TSimdInt32x2 tfSimd2iAndNot(TSimdInt32x2 arg1, TSimdInt32x2 arg2);
-inline TSimdInt32x2 tfSimd2iOr(TSimdInt32x2 arg1, TSimdInt32x2 arg2);
-inline TSimdInt32x2 tfSimd2iXor(TSimdInt32x2 arg1, TSimdInt32x2 arg2);
-
-inline TSimdFloat32x2 tfSimd2fFloor(TSimdFloat32x2 value);
-inline TSimdFloat32x2 tfSimd2fCeil(TSimdFloat32x2 value);
-inline TSimdFloat32x2 tfSimd2fRound(TSimdFloat32x2 value); // Ties to even (banker's rounding)
-inline TSimdFloat32x2 tfSimd2fTruncate(TSimdFloat32x2 value);
-inline TSimdFloat32x2 tfSimd2fMin(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2);
-inline TSimdFloat32x2 tfSimd2fMax(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2);
-inline TSimdFloat32x2 tfSimd2fClamp(TSimdFloat32x2 value, TSimdFloat32x2 min, TSimdFloat32x2 max);
-
-inline TSimdInt32x2 tfSimd2iCmpEq(TSimdInt32x2 arg1, TSimdInt32x2 arg2);
-inline TSimdInt32x2 tfSimd2iCmpNeq(TSimdInt32x2 arg1, TSimdInt32x2 arg2);
-inline TSimdInt32x2 tfSimd2iCmpGt(TSimdInt32x2 arg1, TSimdInt32x2 arg2);
-inline TSimdInt32x2 tfSimd2iCmpGtEq(TSimdInt32x2 arg1, TSimdInt32x2 arg2);
-inline TSimdInt32x2 tfSimd2iCmpLt(TSimdInt32x2 arg1, TSimdInt32x2 arg2);
-inline TSimdInt32x2 tfSimd2iCmpLtEq(TSimdInt32x2 arg1, TSimdInt32x2 arg2);
-
-inline TSimdFloat32x2 tfSimd2fCmpEq(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2);
-inline TSimdFloat32x2 tfSimd2fCmpNeq(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2);
-inline TSimdFloat32x2 tfSimd2fCmpGt(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2);
-inline TSimdFloat32x2 tfSimd2fCmpGtEq(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2);
-inline TSimdFloat32x2 tfSimd2fCmpLt(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2);
-inline TSimdFloat32x2 tfSimd2fCmpLtEq(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2);
-
-inline bool tfSimd2iCmpAllEq(TSimdInt32x2 arg1, TSimdInt32x2 arg2);
-inline bool tfSimd2fCmpAllEq(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2);
-
-static inline bool tfSimdFloat32x2CmpAllLt(TSimdFloat32x2 a, TSimdFloat32x2 b);
+inline Tsimd_f32x2_t tfS32x2FSplat(float value);
+inline Tsimd_i32x2_t tfS32x2ISplat(int32_t value);
+
+inline Tsimd_f32x2_t tfS32x2FZero();
+inline Tsimd_i32x2_t tfS32x2IZero();
+
+inline Tsimd_i32x2_t tfS32x2FToSimd2i(Tsimd_f32x2_t value);
+inline Tsimd_f32x2_t tfS32x2IToSimd2f(Tsimd_i32x2_t value);
+
+inline Tsimd_f32x2_t tfS32x2FSplatIndex0(Tsimd_f32x2_t value);
+inline Tsimd_f32x2_t tfS32x2FSplatIndex1(Tsimd_f32x2_t value);
+
+static inline Tsimd_f32x4_t tfSimdFloat2To4Splat0(Tsimd_f32x2_t value); 
+static inline Tsimd_f32x4_t tfSimdFloat2To4Splat1(Tsimd_f32x2_t value); 
+
+inline Tsimd_f32x2_t tfS32x2FSelect(Tsimd_f32x2_t arg0, Tsimd_f32x2_t arg1, Tsimd_f32x2_t mask);
+inline Tsimd_i32x2_t tfS32x2ISelect(Tsimd_i32x2_t arg0, Tsimd_i32x2_t arg1, Tsimd_i32x2_t mask);
+
+inline float tfS32x2FSelectIndex0(Tsimd_f32x2_t value);
+inline float tfS32x2FSelectIndex1(Tsimd_f32x2_t value);
+
+inline Tsimd_f32x2_t tfSimdFloat2x32Load(float x, float y);
+inline Tsimd_i32x2_t   tfSimdInt2x32Load(int32_t x, int32_t y);
+
+inline Tsimd_f32x2_t tfS32x2FAdd(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2);
+inline Tsimd_f32x2_t tfS32x2FSub(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2);
+inline Tsimd_f32x2_t tfS32x2FMul(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2);
+inline Tsimd_f32x2_t tfS32x2FMadd(Tsimd_f32x2_t mul1, Tsimd_f32x2_t mul2, Tsimd_f32x2_t add);
+inline Tsimd_f32x2_t tfS32x2FDiv(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2);
+inline Tsimd_f32x2_t tfS32x2FAbs(Tsimd_f32x2_t value);
+
+inline Tsimd_f32x2_t tfS32x2FNot(Tsimd_f32x2_t value);
+inline Tsimd_f32x2_t tfS32x2FAnd(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2);
+inline Tsimd_f32x2_t tfS32x2FAndNot(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2);
+inline Tsimd_f32x2_t tfS32x2FOr(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2);
+inline Tsimd_f32x2_t tfS32x2FXor(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2);
+
+inline Tsimd_i32x2_t tfS32x2INot(Tsimd_i32x2_t value);
+inline Tsimd_i32x2_t tfS32x2IAnd(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2);
+inline Tsimd_i32x2_t tfS32x2IAndNot(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2);
+inline Tsimd_i32x2_t tfS32x2IOr(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2);
+inline Tsimd_i32x2_t tfS32x2IXor(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2);
+
+inline Tsimd_f32x2_t tfS32x2FFloor(Tsimd_f32x2_t value);
+inline Tsimd_f32x2_t tfS32x2FCeil(Tsimd_f32x2_t value);
+inline Tsimd_f32x2_t tfS32x2FRound(Tsimd_f32x2_t value); // Ties to even (banker's rounding)
+inline Tsimd_f32x2_t tfS32x2FTruncate(Tsimd_f32x2_t value);
+inline Tsimd_f32x2_t tfS32x2FMin(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2);
+inline Tsimd_f32x2_t tfS32x2FMax(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2);
+inline Tsimd_f32x2_t tfS32x2FClamp(Tsimd_f32x2_t value, Tsimd_f32x2_t min, Tsimd_f32x2_t max);
+
+inline Tsimd_i32x2_t tfS32x2ICmpEq(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2);
+inline Tsimd_i32x2_t tfS32x2ICmpNeq(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2);
+inline Tsimd_i32x2_t tfS32x2ICmpGt(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2);
+inline Tsimd_i32x2_t tfS32x2ICmpGtEq(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2);
+inline Tsimd_i32x2_t tfS32x2ICmpLt(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2);
+inline Tsimd_i32x2_t tfS32x2ICmpLtEq(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2);
+
+inline Tsimd_f32x2_t tfS32x2FCmpEq(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2);
+inline Tsimd_f32x2_t tfS32x2FCmpNeq(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2);
+inline Tsimd_f32x2_t tfS32x2FCmpGt(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2);
+inline Tsimd_f32x2_t tfS32x2FCmpGtEq(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2);
+inline Tsimd_f32x2_t tfS32x2FCmpLt(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2);
+inline Tsimd_f32x2_t tfS32x2FCmpLtEq(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2);
+
+inline bool tfS32x2ICmpAllEq(Tsimd_i32x2_t arg1, Tsimd_i32x2_t arg2);
+inline bool tfS32x2FCmpAllEq(Tsimd_f32x2_t arg1, Tsimd_f32x2_t arg2);
+
+static inline bool tfS32x2FCmpAllLt(Tsimd_f32x2_t a, Tsimd_f32x2_t b);
 
 #if defined(TF_FEATURE_CPU_SSE)
 #include "Internal/TF_Simd32x2_sse.inl"
diff --git a/Forge/Math/TF_Simd32x3.h b/Forge/Math/TF_Simd32x3.h
index c21048a654..f5f5eedd7b 100644
--- a/Forge/Math/TF_Simd32x3.h
+++ b/Forge/Math/TF_Simd32x3.h
@@ -11,83 +11,83 @@
 
 #include "Forge/Math/Internal/SimdTypes.h"
 
-inline TSimdFloat32x3 tfSimd3fSplat(float value);
-inline TSimdInt32x3 tfSimd3iSplat(int32_t value);
-
-inline TSimdFloat32x3 tfSimd3fZero();
-inline TSimdInt32x3 tfSimd3iZero();
-
-inline TSimdFloat32x2 tfSimd3fToSimd2f(TSimdFloat32x3 value);
-inline TSimdInt32x3 tfSimd3fToSimd3i(TSimdFloat32x3 value);
-inline TSimdFloat32x3 tfSimd3iToSimd3f(TSimdInt32x3 value);
-
-inline TSimdFloat32x3 tfSimd3fSplatIndex0(TSimdFloat32x3 value);
-inline TSimdFloat32x3 tfSimd3fSplatIndex1(TSimdFloat32x3 value);
-inline TSimdFloat32x3 tfSimd3fSplatIndex2(TSimdFloat32x3 value);
-
-static inline TSimdFloat32x4 tfSimdFloat3To4Splat0(TSimdFloat32x3 value);
-static inline TSimdFloat32x4 tfSimdFloat3To4Splat1(TSimdFloat32x3 value);
-static inline TSimdFloat32x4 tfSimdFloat3To4Splat2(TSimdFloat32x3 value);
-
-inline TSimdFloat32x3 tfSimd3fSelect(TSimdFloat32x3 arg0, TSimdFloat32x3 arg1, TSimdFloat32x3 mask);
-inline TSimdInt32x3 tfSimd3iSelect(TSimdInt32x3 arg0, TSimdInt32x3 arg1, TSimdInt32x3 mask);
-
-inline float tfSimd3fSelectIndex0(TSimdFloat32x3 value);
-inline float tfSimd3fSelectIndex1(TSimdFloat32x3 value);
-inline float tfSimd3fSelectIndex2(TSimdFloat32x3 value);
-
-static inline TSimdFloat32x3 tfSimdFloat3x32ReplaceIndex0ByValue(TSimdFloat32x3 input, float value);
-static inline TSimdFloat32x3 tfSimdFloat3x32ReplaceIndex1ByValue(TSimdFloat32x3 input, float value);
-static inline TSimdFloat32x3 tfSimdFloat3x32ReplaceIndex2ByValue(TSimdFloat32x3 input, float value);
-
-inline TSimdFloat32x3 tfSimdFloat3x32Load(float x, float y, float z);
-inline TSimdInt32x3 tfSimdInt3x32Load(int32_t x, int32_t y, int32_t z);
-
-inline TSimdFloat32x3 tfSimd3fAdd(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2);
-inline TSimdFloat32x3 tfSimd3fSub(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2);
-inline TSimdFloat32x3 tfSimd3fMul(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2);
-inline TSimdFloat32x3 tfSimd3fMadd(TSimdFloat32x3 mul1, TSimdFloat32x3 mul2, TSimdFloat32x3 add);
-inline TSimdFloat32x3 tfSimd3fDiv(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2);
-inline TSimdFloat32x3 tfSimd3fAbs(TSimdFloat32x3 value);
-
-inline TSimdFloat32x3 tfSimd3fNot(TSimdFloat32x3 value);
-inline TSimdFloat32x3 tfSimd3fAnd(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2);
-inline TSimdFloat32x3 tfSimd3fAndNot(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2);
-inline TSimdFloat32x3 tfSimd3fOr(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2);
-inline TSimdFloat32x3 tfSimd3fXor(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2);
-
-inline TSimdInt32x3 tfSimd3iNot(TSimdInt32x3 value);
-inline TSimdInt32x3 tfSimd3iAnd(TSimdInt32x3 arg1, TSimdInt32x3 arg2);
-inline TSimdInt32x3 tfSimd3iAndNot(TSimdInt32x3 arg1, TSimdInt32x3 arg2);
-inline TSimdInt32x3 tfSimd3iOr(TSimdInt32x3 arg1, TSimdInt32x3 arg2);
-inline TSimdInt32x3 tfSimd3iXor(TSimdInt32x3 arg1, TSimdInt32x3 arg2);
-
-inline TSimdFloat32x3 tfSimd3fFloor(TSimdFloat32x3 value);
-inline TSimdFloat32x3 tfSimd3fCeil(TSimdFloat32x3 value);
-inline TSimdFloat32x3 tfSimd3fRound(TSimdFloat32x3 value); // Ties to even (banker's rounding)
-inline TSimdFloat32x3 tfSimd3fTruncate(TSimdFloat32x3 value);
-inline TSimdFloat32x3 tfSimd3fMin(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2);
-inline TSimdFloat32x3 tfSimd3fMax(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2);
-inline TSimdFloat32x3 tfSimd3fClamp(TSimdFloat32x3 value, TSimdFloat32x3 min, TSimdFloat32x3 max);
-
-inline TSimdInt32x3 tfSimd3iCmpEq(TSimdInt32x3 arg1, TSimdInt32x3 arg2);
-inline TSimdInt32x3 tfSimd3iCmpNeq(TSimdInt32x3 arg1, TSimdInt32x3 arg2);
-inline TSimdInt32x3 tfSimd3iCmpGt(TSimdInt32x3 arg1, TSimdInt32x3 arg2);
-inline TSimdInt32x3 tfSimd3iCmpGtEq(TSimdInt32x3 arg1, TSimdInt32x3 arg2);
-inline TSimdInt32x3 tfSimd3iCmpLt(TSimdInt32x3 arg1, TSimdInt32x3 arg2);
-inline TSimdInt32x3 tfSimd3iCmpLtEq(TSimdInt32x3 arg1, TSimdInt32x3 arg2);
-
-inline TSimdFloat32x3 tfSimd3fCmpEq(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2);
-inline TSimdFloat32x3 tfSimd3fCmpNeq(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2);
-inline TSimdFloat32x3 tfSimd3fCmpGt(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2);
-inline TSimdFloat32x3 tfSimd3fCmpGtEq(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2);
-inline TSimdFloat32x3 tfSimd3fCmpLt(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2);
-inline TSimdFloat32x3 tfSimd3fCmpLtEq(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2);
-
-inline bool tfSimd3iCmpAllEq(TSimdInt32x3 arg1, TSimdInt32x3 arg2);
-inline bool tfSimd3fCmpAllEq(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2);
-
-static inline bool tfSimdFloat32x3CmpAllLt(TSimdFloat32x3 a, TSimdFloat32x3 b);
+inline Tsimd_f32x3_t tfS32x3FSplat(float value);
+inline Tsimd_i32x3_t tfS32x3iSplat(int32_t value);
+
+inline Tsimd_f32x3_t tfS32x3FZero();
+inline Tsimd_i32x3_t tfS32x3iZero();
+
+inline Tsimd_f32x2_t tfS32x3FToSimd2f(Tsimd_f32x3_t value);
+inline Tsimd_i32x3_t tfS32x3FToSimd3i(Tsimd_f32x3_t value);
+inline Tsimd_f32x3_t tfS32x3iToSimd3f(Tsimd_i32x3_t value);
+
+inline Tsimd_f32x3_t tfS32x3FSplatIndex0(Tsimd_f32x3_t value);
+inline Tsimd_f32x3_t tfS32x3FSplatIndex1(Tsimd_f32x3_t value);
+inline Tsimd_f32x3_t tfS32x3FSplatIndex2(Tsimd_f32x3_t value);
+
+static inline Tsimd_f32x4_t tfS32x3FTo32x4FSplat0(Tsimd_f32x3_t value);
+static inline Tsimd_f32x4_t tfS32x3FTo32x4FSplat1(Tsimd_f32x3_t value);
+static inline Tsimd_f32x4_t tfS32x3FTo32x4FSplat2(Tsimd_f32x3_t value);
+
+inline Tsimd_f32x3_t tfS32x3FSelect(Tsimd_f32x3_t arg0, Tsimd_f32x3_t arg1, Tsimd_f32x3_t mask);
+inline Tsimd_i32x3_t tfS32x3iSelect(Tsimd_i32x3_t arg0, Tsimd_i32x3_t arg1, Tsimd_i32x3_t mask);
+
+inline float tfS32x3FSelectIndex0(Tsimd_f32x3_t value);
+inline float tfS32x3FSelectIndex1(Tsimd_f32x3_t value);
+inline float tfS32x3FSelectIndex2(Tsimd_f32x3_t value);
+
+static inline Tsimd_f32x3_t tfSimd3x32FReplaceIndex0ByValue(Tsimd_f32x3_t input, float value);
+static inline Tsimd_f32x3_t tfSimd3x32FReplaceIndex1ByValue(Tsimd_f32x3_t input, float value);
+static inline Tsimd_f32x3_t tfSimd3x32FReplaceIndex2ByValue(Tsimd_f32x3_t input, float value);
+
+inline Tsimd_f32x3_t tfSimd3x32FLoad(float x, float y, float z);
+inline Tsimd_i32x3_t tfSimd3x32ILoad(int32_t x, int32_t y, int32_t z);
+
+inline Tsimd_f32x3_t tfS32x3FAdd(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2);
+inline Tsimd_f32x3_t tfS32x3FSub(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2);
+inline Tsimd_f32x3_t tfS32x3FMul(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2);
+inline Tsimd_f32x3_t tfS32x3FMadd(Tsimd_f32x3_t mul1, Tsimd_f32x3_t mul2, Tsimd_f32x3_t add);
+inline Tsimd_f32x3_t tfS32x3FDiv(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2);
+inline Tsimd_f32x3_t tfS32x3FAbs(Tsimd_f32x3_t value);
+
+inline Tsimd_f32x3_t tfS32x3FNot(Tsimd_f32x3_t value);
+inline Tsimd_f32x3_t tfS32x3FAnd(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2);
+inline Tsimd_f32x3_t tfS32x3FAndNot(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2);
+inline Tsimd_f32x3_t tfS32x3FOr(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2);
+inline Tsimd_f32x3_t tfS32x3FXor(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2);
+
+inline Tsimd_i32x3_t tfS32x3iNot(Tsimd_i32x3_t value);
+inline Tsimd_i32x3_t tfS32x3iAnd(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2);
+inline Tsimd_i32x3_t tfS32x3iAndNot(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2);
+inline Tsimd_i32x3_t tfS32x3iOr(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2);
+inline Tsimd_i32x3_t tfS32x3iXor(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2);
+
+inline Tsimd_f32x3_t tfS32x3FFloor(Tsimd_f32x3_t value);
+inline Tsimd_f32x3_t tfS32x3FCeil(Tsimd_f32x3_t value);
+inline Tsimd_f32x3_t tfS32x3FRound(Tsimd_f32x3_t value); // Ties to even (banker's rounding)
+inline Tsimd_f32x3_t tfS32x3FTruncate(Tsimd_f32x3_t value);
+inline Tsimd_f32x3_t tfS32x3FMin(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2);
+inline Tsimd_f32x3_t tfS32x3FMax(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2);
+inline Tsimd_f32x3_t tfS32x3FClamp(Tsimd_f32x3_t value, Tsimd_f32x3_t min, Tsimd_f32x3_t max);
+
+inline Tsimd_i32x3_t tfS32x3iCmpEq(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2);
+inline Tsimd_i32x3_t tfS32x3iCmpNeq(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2);
+inline Tsimd_i32x3_t tfS32x3iCmpGt(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2);
+inline Tsimd_i32x3_t tfS32x3iCmpGtEq(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2);
+inline Tsimd_i32x3_t tfS32x3iCmpLt(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2);
+inline Tsimd_i32x3_t tfS32x3iCmpLtEq(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2);
+
+inline Tsimd_f32x3_t tfS32x3FCmpEq(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2);
+inline Tsimd_f32x3_t tfS32x3FCmpNeq(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2);
+inline Tsimd_f32x3_t tfS32x3FCmpGt(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2);
+inline Tsimd_f32x3_t tfS32x3FCmpGtEq(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2);
+inline Tsimd_f32x3_t tfS32x3FCmpLt(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2);
+inline Tsimd_f32x3_t tfS32x3FCmpLtEq(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2);
+
+inline bool tfS32x3iCmpAllEq(Tsimd_i32x3_t arg1, Tsimd_i32x3_t arg2);
+inline bool tfS32x3FCmpAllEq(Tsimd_f32x3_t arg1, Tsimd_f32x3_t arg2);
+
+static inline bool tfSimdFloat32x3CmpAllLt(Tsimd_f32x3_t a, Tsimd_f32x3_t b);
 
 #if defined(TF_FEATURE_CPU_SSE)
 #include "Internal/TF_Simd32x3_sse.inl"
diff --git a/Forge/Math/TF_Simd32x4.h b/Forge/Math/TF_Simd32x4.h
index e55e6d7d74..09c5648a16 100644
--- a/Forge/Math/TF_Simd32x4.h
+++ b/Forge/Math/TF_Simd32x4.h
@@ -12,84 +12,182 @@
 #include "Forge/Math/Internal/SimdTypes.h"
 #include "Forge/TF_Log.h"
 
-inline TSimdFloat32x4 tfSimd4fSplat(float value);
-inline TSimdInt32x4 tfSimd4iSplat(int32_t value);
-
-inline TSimdFloat32x4 tfSimd4fZero();
-inline TSimdInt32x4 tfSimd4iZero();
-
-inline TSimdFloat32x2 tfSimd4fToSimd2f(TSimdFloat32x4 value);
-inline TSimdFloat32x3 tfSimd4fToSimd3f(TSimdFloat32x4 value);
-inline TSimdInt32x4 tfSimd4fToSimd4i(TSimdFloat32x4 value);
-inline TSimdFloat32x4 tfSimd4iToSimd4f(TSimdInt32x4 value);
-
-inline TSimdFloat32x4 tfSimd4fSplatIndex0(TSimdFloat32x4 value);
-inline TSimdFloat32x4 tfSimd4fSplatIndex1(TSimdFloat32x4 value);
-inline TSimdFloat32x4 tfSimd4fSplatIndex2(TSimdFloat32x4 value);
-inline TSimdFloat32x4 tfSimd4fSplatIndex3(TSimdFloat32x4 value);
-
-inline TSimdFloat32x4 tfSimd4fSelect(TSimdFloat32x4 arg0, TSimdFloat32x4 arg1, TSimdFloat32x4 mask);
-inline TSimdInt32x4 tfSimd4iSelect(TSimdInt32x4 arg0, TSimdInt32x4 arg1, TSimdInt32x4 mask);
-
-inline float tfSimd4fSelectIndex0(TSimdFloat32x4 value);
-inline float tfSimd4fSelectIndex1(TSimdFloat32x4 value);
-inline float tfSimd4fSelectIndex2(TSimdFloat32x4 value);
-inline float tfSimd4fSelectIndex3(TSimdFloat32x4 value);
-
-static inline TSimdFloat32x4 tfSimd4fReplaceIndex0ByValue(TSimdFloat32x4 input, float value);
-static inline TSimdFloat32x4 tfSimd4fReplaceIndex1ByValue(TSimdFloat32x4 input, float value);
-static inline TSimdFloat32x4 tfSimd4fReplaceIndex2ByValue(TSimdFloat32x4 input, float value);
-static inline TSimdFloat32x4 tfSimd4fReplaceIndex3ByValue(TSimdFloat32x4 input, float value);
-
-inline TSimdFloat32x4 tfSimdFloat4x32Load(float x, float y, float z, float w);
-inline TSimdInt32x4 tfSimdInt4x32Load(int32_t x, int32_t y, int32_t z, int32_t w);
-
-inline TSimdFloat32x4 tfSimd4fAdd(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2);
-inline TSimdFloat32x4 tfSimd4fSub(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2);
-inline TSimdFloat32x4 tfSimd4fMul(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2);
-inline TSimdFloat32x4 tfSimd4fMadd(TSimdFloat32x4 mul1, TSimdFloat32x4 mul2, TSimdFloat32x4 add);
-inline TSimdFloat32x4 tfSimdFloat4x32Div(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2);
-inline TSimdFloat32x4 tfSimd4fAbs(TSimdFloat32x4 value);
-
-inline TSimdFloat32x4 tfSimd4fNot(TSimdFloat32x4 value);
-inline TSimdFloat32x4 tfSimd4fAnd(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2);
-inline TSimdFloat32x4 tfSimd4fAndNot(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2);
-inline TSimdFloat32x4 tfSimd4fOr(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2);
-inline TSimdFloat32x4 tfSimd4fXor(TSimdFloat32x4 arg1, TSimdFloat32x4  arg2);
-
-inline TSimdInt32x4 tfSimd4iNot(TSimdInt32x4 value);
-inline TSimdInt32x4 tfSimd4iAnd(TSimdInt32x4 arg1, TSimdInt32x4 arg2);
-inline TSimdInt32x4 tfSimd4iAndNot(TSimdInt32x4 arg1, TSimdInt32x4 arg2);
-inline TSimdInt32x4 tfSimd4iOr(TSimdInt32x4 arg1, TSimdInt32x4 arg2);
-inline TSimdInt32x4 tfSimd4iXor(TSimdInt32x4 arg1, TSimdInt32x4 arg2);
-
-inline TSimdFloat32x4 tfSimd4fFloor(TSimdFloat32x4 value);
-inline TSimdFloat32x4 tfSimd4fCeil(TSimdFloat32x4 value);
-inline TSimdFloat32x4 tfSimd4fRound(TSimdFloat32x4 value); // Ties to even (banker's rounding)
-inline TSimdFloat32x4 tfSimd4fTruncate(TSimdFloat32x4 value);
-inline TSimdFloat32x4 tfSimd4fMin(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2);
-inline TSimdFloat32x4 tfSimd4fMax(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2);
-inline TSimdFloat32x4 tfSimd4fClamp(TSimdFloat32x4 value, TSimdFloat32x4 min, TSimdFloat32x4 max);
-
-inline TSimdInt32x4 tfSimd4iCmpEq(TSimdInt32x4 arg1, TSimdInt32x4 arg2);
-inline TSimdInt32x4 tfSimd4iCmpNeq(TSimdInt32x4 arg1, TSimdInt32x4 arg2);
-inline TSimdInt32x4 tfSimd4iCmpGt(TSimdInt32x4 arg1, TSimdInt32x4 arg2);
-inline TSimdInt32x4 tfSimd4iCmpGtEq(TSimdInt32x4 arg1, TSimdInt32x4 arg2);
-inline TSimdInt32x4 tfSimd4iCmpLt(TSimdInt32x4 arg1, TSimdInt32x4 arg2);
-inline TSimdInt32x4 tfSimd4iCmpLtEq(TSimdInt32x4 arg1, TSimdInt32x4 arg2);
-
-inline TSimdFloat32x4 tfSimd4fCmpEq(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2);
-inline TSimdFloat32x4 tfSimd4fCmpNeq(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2);
-inline TSimdFloat32x4 tfSimd4fCmpGt(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2);
-inline TSimdFloat32x4 tfSimd4fCmpGtEq(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2);
-inline TSimdFloat32x4 tfSimd4fCmpLt(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2);
-inline TSimdFloat32x4 tfSimd4fCmpLtEq(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2);
-
-inline bool tfSimd4iCmpAllEq(TSimdInt32x4 arg1, TSimdInt32x4 arg2);
-
-inline bool tfSimd4fCmpAllEq(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2);
-inline bool tfSimd4fCmpAllLt(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2);
-inline bool tfSimd4fCmpAllGt(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2);
+// Tsimd_f32x4_t
+static inline Tsimd_f32x4_t tfSimdLoad_f32x4(float x, float y, float z, float w);
+static inline Tsimd_f32x4_t tfSimdZero_f32x4();
+
+static inline Tsimd_f32x4_t tfSimdSplat_f32x4(float value);
+static inline Tsimd_f32x4_t tfSimdSplat0_f32x4(Tsimd_f32x4_t value);
+static inline Tsimd_f32x4_t tfSimdSplat1_f32x4(Tsimd_f32x4_t value);
+static inline Tsimd_f32x4_t tfSimdSplat2_f32x4(Tsimd_f32x4_t value);
+static inline Tsimd_f32x4_t tfSimdSplat3_f32x4(Tsimd_f32x4_t value);
+
+static inline Tsimd_f32x4_t tfSimdDot_f32x4(Tsimd_f32x4_t a,Tsimd_f32x4_t b); 
+static inline float tfSimdDot_f32x4_f32(Tsimd_f32x4_t a,Tsimd_f32x4_t b); 
+
+static inline float tfSimdSelect_f32x4(Tsimd_f32x4_t value, int index);
+static inline float tfSimdSelect0_f32x4(Tsimd_f32x4_t value);
+static inline float tfSimdSelect1_f32x4(Tsimd_f32x4_t value);
+static inline float tfSimdSelect2_f32x4(Tsimd_f32x4_t value);
+static inline float tfSimdSelect3_f32x4(Tsimd_f32x4_t value);
+
+static inline Tsimd_f32x4_t tfSimdAdd_f32x4(Tsimd_f32x4_t a, Tsimd_f32x4_t b);
+static inline Tsimd_f32x4_t tfSimdMul_f32x4(Tsimd_f32x4_t a, Tsimd_f32x4_t b);
+static inline Tsimd_f32x4_t tfSimdDiv_f32x4(Tsimd_f32x4_t a, Tsimd_f32x4_t b);
+static inline Tsimd_f32x4_t tfSimdAbs_f32x4(Tsimd_f32x4_t a);
+static inline Tsimd_f32x4_t tfSimdMadd_f32x4(Tsimd_f32x4_t a, Tsimd_f32x4_t b, Tsimd_f32x4_t c);
+
+static inline Tsimd_f32x4_t tfSimdNot_f32x4(Tsimd_f32x4_t value);
+static inline Tsimd_f32x4_t tfSimdAnd_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2);
+static inline Tsimd_f32x4_t tfSimdAndNot_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2);
+static inline Tsimd_f32x4_t tfSimdOr_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2);
+static inline Tsimd_f32x4_t tfSimdXor_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2);
+
+static inline Tsimd_f32x4_t tfSimdCmpEq_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2);
+static inline Tsimd_f32x4_t tfSimdCmpNeq_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2);
+static inline Tsimd_f32x4_t tfSimdCmpGt_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2);
+static inline Tsimd_f32x4_t tfSimdCmpGtEq_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2);
+static inline Tsimd_f32x4_t tfSimdCmpLt_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2);
+static inline Tsimd_f32x4_t tfSimdCmpLtEq_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2);
+
+static inline bool tfSimdCmpAllEq_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2);
+static inline bool tfSimdCmpAllNeq_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2);
+static inline bool tfSimdCmpAllGt_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2);
+static inline bool tfSimdCmpAllGtEq_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2);
+static inline bool tfSimdCmpAllLt_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2);
+static inline bool tfSimdCmpAllLtEq_f32x4(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2);
+
+// Tsimd_i32x4_t
+static inline Tsimd_i32x4_t tfSimdLoad_i32x4(int32_t x, int32_t y, int32_t z, int32_t w);
+
+
+static inline Tsimd_i32x4_t tfSimdSplat_i32x4(int32_t value);
+static inline Tsimd_i32x4_t tfSimdSplat0_i32x4(Tsimd_i32x4_t value);
+static inline Tsimd_i32x4_t tfSimdSplat1_i32x4(Tsimd_i32x4_t value);
+static inline Tsimd_i32x4_t tfSimdSplat2_i32x4(Tsimd_i32x4_t value);
+static inline Tsimd_i32x4_t tfSimdSplat3_i32x4(Tsimd_i32x4_t value);
+
+static inline int32_t tfSimdSelect_i32x4(Tsimd_i32x4_t value, int index);
+static inline int32_t tfSimdSelect0_i32x4(Tsimd_i32x4_t value);
+static inline int32_t tfSimdSelect1_i32x4(Tsimd_i32x4_t value);
+static inline int32_t tfSimdSelect2_i32x4(Tsimd_i32x4_t value);
+static inline int32_t tfSimdSelect3_i32x4(Tsimd_i32x4_t value);
+
+static inline Tsimd_i32x4_t tfSimdAdd_i32x4(Tsimd_i32x4_t a, Tsimd_i32x4_t b);
+static inline Tsimd_i32x4_t tfSimdMul_i32x4(Tsimd_i32x4_t a, Tsimd_i32x4_t b);
+static inline Tsimd_i32x4_t tfSimdAbs_i32x4(Tsimd_i32x4_t a);
+static inline Tsimd_i32x4_t tfSimdMadd_i32x4(Tsimd_i32x4_t a, Tsimd_i32x4_t b, Tsimd_i32x4_t c);
+
+static inline Tsimd_i32x4_t tfSimdNot_i32x4(Tsimd_i32x4_t value);
+static inline Tsimd_i32x4_t tfSimdAnd_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2);
+static inline Tsimd_i32x4_t tfSimdAndNot_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2);
+static inline Tsimd_i32x4_t tfSimdOr_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2);
+static inline Tsimd_i32x4_t tfSimdXor_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2);
+
+static inline Tsimd_i32x4_t tfSimdCmpEq_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2);
+static inline Tsimd_i32x4_t tfSimdCmpNeq_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2);
+static inline Tsimd_i32x4_t tfSimdCmpGt_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2);
+static inline Tsimd_i32x4_t tfSimdCmpGtEq_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2);
+static inline Tsimd_i32x4_t tfSimdCmpLt_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2);
+static inline Tsimd_i32x4_t tfSimdCmpLtEq_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2);
+
+static inline bool tfSimdCmpAllEq_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2);
+static inline bool tfSimdCmpAllNeq_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2);
+static inline bool tfSimdCmpAllGt_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2);
+static inline bool tfSimdCmpAllGtEq_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2);
+static inline bool tfSimdCmpAllLt_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2);
+static inline bool tfSimdCmpAllLtEq_i32x4(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2);
+
+// --------------------------------------------
+
+static inline Tsimd_i32x4_t tfSimd_f32x4_To_i32x4(Tsimd_f32x4_t a);
+static inline Tsimd_f32x4_t tfSimd_i32x4_To_f32x4(Tsimd_i32x4_t a);
+
+
+
+//inline Tsimd_f32x4_t tfS32x4FSplat(float value);
+//inline Tsimd_i32x4_t Tsimd_i32x4_tSplat(int32_t value);
+//
+//inline Tsimd_f32x4_t tfS32x4FLoad(float x, float y, float z, float w);
+//inline Tsimd_i32x4_t tfSimdInt4x32Load(int32_t x, int32_t y, int32_t z, int32_t w);
+//
+//inline Tsimd_f32x4_t tfS32x4FZero();
+//inline Tsimd_i32x4_t Tsimd_i32x4_tZero();
+//
+//inline Tsimd_f32x2_t tfS32x4FToS32x2F(Tsimd_f32x4_t value);
+//inline Tsimd_f32x3_t tfS32x4FToS32x3F(Tsimd_f32x4_t value);
+//inline Tsimd_i32x4_t tfS32x4FToS32x4I(Tsimd_f32x4_t value);
+//inline Tsimd_f32x4_t Tsimd_i32x4_tToSimd4f(Tsimd_i32x4_t value);
+//
+//inline Tsimd_f32x4_t tfS32x4FSplatIndex0(Tsimd_f32x4_t value);
+//inline Tsimd_f32x4_t tfS32x4FSplatIndex1(Tsimd_f32x4_t value);
+//inline Tsimd_f32x4_t tfS32x4FSplatIndex2(Tsimd_f32x4_t value);
+//inline Tsimd_f32x4_t tfS32x4FSplatIndex3(Tsimd_f32x4_t value);
+//
+//inline Tsimd_f32x4_t tfS32x4FSelect(Tsimd_f32x4_t arg0, Tsimd_f32x4_t arg1, Tsimd_f32x4_t mask);
+//inline Tsimd_i32x4_t Tsimd_i32x4_tSelect(Tsimd_i32x4_t arg0, Tsimd_i32x4_t arg1, Tsimd_i32x4_t mask);
+//
+//inline float tfS32x4FSelectIndex0(Tsimd_f32x4_t value);
+//inline float tfS32x4FSelectIndex1(Tsimd_f32x4_t value);
+//inline float tfS32x4FSelectIndex2(Tsimd_f32x4_t value);
+//inline float tfS32x4FSelectIndex3(Tsimd_f32x4_t value);
+//
+//static inline Tsimd_f32x4_t tfS32x4FReplaceIndex0ByValue(Tsimd_f32x4_t input, float value);
+//static inline Tsimd_f32x4_t tfS32x4FReplaceIndex1ByValue(Tsimd_f32x4_t input, float value);
+//static inline Tsimd_f32x4_t tfS32x4FReplaceIndex2ByValue(Tsimd_f32x4_t input, float value);
+//static inline Tsimd_f32x4_t tfS32x4FReplaceIndex3ByValue(Tsimd_f32x4_t input, float value);
+//
+//static inline Tsimd_f32x4_t tfS32x4FReplaceIndex0(Tsimd_f32x4_t input, Tsimd_f32x4_t value);
+//static inline Tsimd_f32x4_t tfS32x4FReplaceIndex1(Tsimd_f32x4_t input, Tsimd_f32x4_t value);
+//static inline Tsimd_f32x4_t tfS32x4FReplaceIndex2(Tsimd_f32x4_t input, Tsimd_f32x4_t value);
+//static inline Tsimd_f32x4_t tfS32x4FReplaceIndex3(Tsimd_f32x4_t input, Tsimd_f32x4_t value);
+//
+//inline Tsimd_f32x4_t tfS32x4FAdd(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2);
+//inline Tsimd_f32x4_t tfS32x4FSub(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2);
+//inline Tsimd_f32x4_t tfS32x4FMul(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2);
+//inline Tsimd_f32x4_t tfS32x4FMadd(Tsimd_f32x4_t mul1, Tsimd_f32x4_t mul2, Tsimd_f32x4_t add);
+//inline Tsimd_f32x4_t tfS32x4FDiv(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2);
+//inline Tsimd_f32x4_t tfS32x4FAbs(Tsimd_f32x4_t value);
+//
+//inline Tsimd_f32x4_t tfS32x4FNot(Tsimd_f32x4_t value);
+//inline Tsimd_f32x4_t tfS32x4FAnd(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2);
+//inline Tsimd_f32x4_t tfS32x4FAndNot(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2);
+//inline Tsimd_f32x4_t tfS32x4FOr(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2);
+//inline Tsimd_f32x4_t tfS32x4FXor(Tsimd_f32x4_t arg1, Tsimd_f32x4_t  arg2);
+//
+//inline Tsimd_i32x4_t Tsimd_i32x4_tNot(Tsimd_i32x4_t value);
+//inline Tsimd_i32x4_t Tsimd_i32x4_tAnd(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2);
+//inline Tsimd_i32x4_t Tsimd_i32x4_tAndNot(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2);
+//inline Tsimd_i32x4_t Tsimd_i32x4_tOr(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2);
+//inline Tsimd_i32x4_t Tsimd_i32x4_tXor(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2);
+//
+//inline Tsimd_f32x4_t tfS32x4FFloor(Tsimd_f32x4_t value);
+//inline Tsimd_f32x4_t tfS32x4FCeil(Tsimd_f32x4_t value);
+//inline Tsimd_f32x4_t tfS32x4FRound(Tsimd_f32x4_t value); // Ties to even (banker's rounding)
+//inline Tsimd_f32x4_t tfS32x4FTruncate(Tsimd_f32x4_t value);
+//inline Tsimd_f32x4_t tfS32x4FMin(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2);
+//inline Tsimd_f32x4_t tfS32x4FMax(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2);
+//inline Tsimd_f32x4_t tfS32x4FClamp(Tsimd_f32x4_t value, Tsimd_f32x4_t min, Tsimd_f32x4_t max);
+//
+//inline Tsimd_i32x4_t Tsimd_i32x4_tCmpEq(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2);
+//inline Tsimd_i32x4_t Tsimd_i32x4_tCmpNeq(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2);
+//inline Tsimd_i32x4_t Tsimd_i32x4_tCmpGt(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2);
+//inline Tsimd_i32x4_t Tsimd_i32x4_tCmpGtEq(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2);
+//inline Tsimd_i32x4_t Tsimd_i32x4_tCmpLt(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2);
+//inline Tsimd_i32x4_t Tsimd_i32x4_tCmpLtEq(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2);
+//
+//inline Tsimd_f32x4_t tfS32x4FCmpEq(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2);
+//inline Tsimd_f32x4_t tfS32x4FCmpNeq(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2);
+//inline Tsimd_f32x4_t tfS32x4FCmpGt(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2);
+//inline Tsimd_f32x4_t tfS32x4FCmpGtEq(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2);
+//inline Tsimd_f32x4_t tfS32x4FCmpLt(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2);
+//inline Tsimd_f32x4_t tfS32x4FCmpLtEq(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2);
+//
+//inline bool Tsimd_i32x4_tCmpAllEq(Tsimd_i32x4_t arg1, Tsimd_i32x4_t arg2);
+//
+//inline bool tfS32x4FCmpAllEq(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2);
+//inline bool tfS32x4FCmpAllLt(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2);
+//inline bool tfS32x4FCmpAllGt(Tsimd_f32x4_t arg1, Tsimd_f32x4_t arg2);
 
 #if defined(TF_FEATURE_CPU_SSE)
 #include "Internal/TF_Simd32x4_sse.inl"
diff --git a/Forge/Math/TF_Simd32x4x4.h b/Forge/Math/TF_Simd32x4x4.h
new file mode 100644
index 0000000000..44274b8abe
--- /dev/null
+++ b/Forge/Math/TF_Simd32x4x4.h
@@ -0,0 +1,45 @@
+#pragma once
+#ifndef TF_MATH_SIMD_FLOAT4x4_H
+#define TF_MATH_SIMD_FLOAT4x4_H
+
+#include "Internal/SimdTypes.h"
+
+//static inline TSimdFloat4x4 tfLoadZeroSimd4x4F();
+//static inline TSimdFloat4x4 tfLoadIdentitySimd4x4F();
+//static inline TSimdFloat4x4 tfLoadSimd4x4F(
+//  float m00, float m01, float m02, float m03, 
+//  float m10, float m11, float m12, float m13,
+//  float m20, float m21, float m22, float m23, 
+//  float m30, float m31, float m32, float m33);
+//
+//static inline TSimdFloat4x4 tfAddPerElemSimd4x4(TSimdFloat4x4 a0, TSimdFloat4x4 a1);
+//static inline TSimdFloat4x4 tfMulPerElemSimd4x4(TSimdFloat4x4 a0, TSimdFloat4x4 a1);
+//static inline TSimdFloat4x4 tfDivPerElemSimd4x4(TSimdFloat4x4 a0, TSimdFloat4x4 a1);
+//
+//static inline TSimdFloat4   tfVectorMulSimd4x4F(const TSimdFloat4x4 a0, const TSimdFloat4 a1);
+//
+//static inline TSimdFloat4x4 tfMulSimd4x4F_4x4F(TSimdFloat4x4 a0, TSimdFloat4x4 a1);
+//static inline TSimdFloat4x3 tfMulSimd4x4F_3x4F(TSimdFloat4x4 a0, TSimdFloat4x3 a1);
+//static inline TSimdFloat4x2 tfMulSimd4x4F_2x4F(TSimdFloat4x4 a0, TSimdFloat4x2 a1);
+//static inline TSimdFloat4x1 tfMulSimd4x4F_1x4F(TSimdFloat4x4 a0, TSimdFloat4x1 a1);
+//static inline TSimdFloat4x4 tfTransposeSimd4x4F(TSimdFloat4x4 a0);
+//static inline TSimdFloat4x1 tfInverseFullSimd4x4F(TSimdFloat4x4 a0);
+//
+//static inline TSimdFloat4 tfGetRowSimd4x4F(TSimdFloat4x4 input, int row);
+//static inline void        tfSetRowSimd4x4F(TSimdFloat4x4* input, int row, TSimdFloat4 value);
+//static inline TSimdFloat4 tfGetColumnSimd4x4F(int column);
+//static inline void tfSetColumnSimd4x4F(TSimdFloat4x4* input, int row);
+//static inline void tfSetElemSimd4x4F(TSimdFloat4x4* input, int col, int row, float value);
+//
+//static inline bool tfIsCloseSimd4x4F(TSimdFloat4x4 a, TSimdFloat4x4 b, float epsilon);
+
+
+#include "Internal/TF_SimdFloat4x4.inl"
+#if defined(TF_FEATURE_CPU_SSE)
+#include "Internal/TF_SimdFloat4x4_sse.inl"
+#elif defined(TF_FEATURE_CPU_NEON)
+#include "Internal/TF_SimdFloat4x4_neon.inl"
+#else
+#include "Internal/TF_SimdFloat4x4_scalar.inl"
+#endif
+#endif
diff --git a/Forge/Math/TF_SimdFloat.h b/Forge/Math/TF_SimdFloat.h
index 174a8840f8..ead63701f4 100644
--- a/Forge/Math/TF_SimdFloat.h
+++ b/Forge/Math/TF_SimdFloat.h
@@ -28,28 +28,13 @@ static inline float tfGetYSimd2F(TSimdFloat2 a);
 
 static inline bool tfIsCloseSimd2F(TSimdFloat2 a, TSimdFloat2 b, float epsilon);
 
-// -----------------------------------------------------------
-// TSimdFloat3
-// -----------------------------------------------------------
-
-
-// -----------------------------------------------------------
-// TSimdFloat4
-// -----------------------------------------------------------
 #include "TF_SimdFloat3.h"
 #include "TF_SimdFloat4.h"
+#include "TF_SimdFloat3x3.h"
 
-// -----------------------------------------------------------
-//  TSimdFloat3x3
-// -----------------------------------------------------------
-static inline TSimdFloat3x3 tfLoadIdentitySimd3x4F();
-static inline void tfSetElemSimd3x3F(TSimdFloat3x3* input, int col, int row, float value);
-
-// -----------------------------------------------------------
-//  TSimdFloat3x4
-// -----------------------------------------------------------
+static inline TSimdFloat4x3 tfLoadIdentitySimd4x3F();
 
-static inline TSimdFloat4x3  tfLoadSimd3x4F(float m00, float m01, float m02, float m10, float m11, float m12, float m20, float m21,
+static inline TSimdFloat4x3  tfLoadSimd4x3F(float m00, float m01, float m02, float m10, float m11, float m12, float m20, float m21,
                                            float m22, float m30, float m31, float m32);
 
 // -----------------------------------------------------------
@@ -60,46 +45,27 @@ static inline TSimdFloat4x3  tfLoadSimd3x4F(float m00, float m01, float m02, flo
 static inline TSimdFloat4x2 tfLoadSimd4x2F(float m00, float m01, float m10, float m11, float m20, float m21, float m30, float m31);
 static inline TSimdFloat4x1 tfLoadSimd4x1F(float m00, float m10, float m20, float m30);
 
-static inline TSimdFloat3x3 tfLoadSimd3x3F(float m00, float m01, float m02, float m10, float m11, float m12, float m20, float m21,
-                                           float m22);
 static inline TSimdFloat3x2 tfLoadSimd2x3F(float m00, float m01, float m10, float m11, float m20, float m21);
 static inline TSimdFloat3x1 tfLoadSimd1x3F(float m00, float m10, float m20);
 
-
 static inline void tfSetElemSimd2x3F(TSimdFloat3x2* input, int col, int row, float value);
 static inline void tfSetElemSimd1x3F(TSimdFloat3x1* input, int col, int row, float value);
 
 static inline TSimdFloat2x2 tfLoadSimd2x2F(float m00, float m01, float m10, float m11);
 static inline TSimdFloat2x1 tfLoadSimd2x1F(float m00, float m10);
 
-static inline TSimdFloat4x4 tfTransposeSimd4x4F(TSimdFloat4x4 input);
-
 static inline bool tfIsCloseSimd3x4F(TSimdFloat4x3 a, TSimdFloat4x3 b, float epsilon);
 static inline bool tfIsCloseSimd2x4F(TSimdFloat4x2 a, TSimdFloat4x2 b, float epsilon);
 static inline bool tfIsCloseSimd1x4F(TSimdFloat4x1 a, TSimdFloat4x1 b, float epsilon);
 
-static inline bool tfIsCloseSimd4F(TSimdFloat4 a, TSimdFloat4 b, float epsilon);
-
 static inline void tfSetElemSimd3x4F(TSimdFloat4x3* input, int col, int row, float value);
 static inline void tfSetElemSimd2x4F(TSimdFloat4x2* input, int col, int row, float value);
 static inline void tfSetElemSimd1x4F(TSimdFloat4x1* input, int col, int row, float value);
 
-static inline TSimdFloat4 tfGetRowSimd4x4F(TSimdFloat4x4 input, int row);
 static inline TSimdFloat3 tfGetRowSimd3x4F(TSimdFloat4x3 input, int row);
 static inline TSimdFloat2 tfGetRowSimd2x4F(TSimdFloat4x2 input, int row);
 static inline float       tfGetRowSimd1x4F(TSimdFloat4x1 input, int row);
 
-static inline float tfVectorDot4F(TSimdFloat4 a0, TSimdFloat4 a1);
-static inline float tfVectorLengthSq4F(TSimdFloat4 a0, TSimdFloat4 a1);
-static inline float tfVectorLength4F(TSimdFloat4 a0, TSimdFloat4 a1);
-
-static inline TSimdFloat4 tfVectorEleDiv4F(TSimdFloat4 a0, TSimdFloat4 a1);
-static inline TSimdFloat4 tfVectorEleAdd4F(TSimdFloat4 a0, TSimdFloat4 a1);
-static inline TSimdFloat4 tfVectorEleSub4F(TSimdFloat4 a0, TSimdFloat4 a1);
-static inline TSimdFloat4 tfVectorEleMul4F(TSimdFloat4 a0, TSimdFloat4 a1);
-
-static inline bool tfIsCloseSimd4x4F(TSimdFloat4x4 a, TSimdFloat4x4 b, float epsilon);
-static inline void tfSetElemSimd4x4F(TSimdFloat4x4* input, int col, int row, float value);
 
 
 // conviences if cpp is avaliable
diff --git a/Forge/Math/TF_SimdFloat3.h b/Forge/Math/TF_SimdFloat3.h
index 258e73d2ac..0937349dce 100644
--- a/Forge/Math/TF_SimdFloat3.h
+++ b/Forge/Math/TF_SimdFloat3.h
@@ -4,7 +4,6 @@
 #include "Internal/SimdTypes.h"
 
 
-static inline TSimdFloat3   tfLoadZeroSimd3x3F();
 static inline TSimdFloat3   tfLoadSimd3F(float x, float y, float z);
 
 static inline float tfVectorDot3F(TSimdFloat3 a0, TSimdFloat3 a1);
diff --git a/Forge/Math/TF_SimdFloat3x3.h b/Forge/Math/TF_SimdFloat3x3.h
new file mode 100644
index 0000000000..cd943881ca
--- /dev/null
+++ b/Forge/Math/TF_SimdFloat3x3.h
@@ -0,0 +1,21 @@
+#pragma once
+#ifndef TF_MATH_SIMD_FLOAT3x3_H
+#define TF_MATH_SIMD_FLOAT3x3_H
+
+#include "Internal/SimdTypes.h"
+
+
+static inline TSimdFloat3x3 tfLoadZeroSimd3x3F();
+static inline TSimdFloat3x3 tfLoadIdentitySimd3x3F();
+static inline TSimdFloat3x3 tfLoadSimd3x3F(
+  float m00, float m01, float m02,  
+  float m10, float m11, float m12, 
+  float m20, float m21, float m22);
+
+
+static inline TSimdFloat4x3 tfMatMul3x3F_3x3F(TSimdFloat3x3 a0, TSimdFloat3x3 a1);
+static inline TSimdFloat4x2 tfMatMul3x3F_3x2F(TSimdFloat3x3 a0, TSimdFloat3x2 a1);
+
+static inline void tfSetElemSimd3x3F(TSimdFloat3x3* input, int col, int row, float value);
+
+#endif
diff --git a/Forge/Math/TF_SimdFloat4.h b/Forge/Math/TF_SimdFloat4.h
index 8b4bcaeac8..46362b2652 100644
--- a/Forge/Math/TF_SimdFloat4.h
+++ b/Forge/Math/TF_SimdFloat4.h
@@ -12,6 +12,17 @@ static inline float tfGetYSimd4F(TSimdFloat4 a);
 static inline float tfGetZSimd4F(TSimdFloat4 a);
 static inline float tfGetWSimd4F(TSimdFloat4 a);
 
+static inline bool tfIsCloseSimd4F(TSimdFloat4 a, TSimdFloat4 b, float epsilon);
+
+static inline float tfVectorDot4F(TSimdFloat4 a0, TSimdFloat4 a1);
+static inline float tfVectorLengthSq4F(TSimdFloat4 a0, TSimdFloat4 a1);
+static inline float tfVectorLength4F(TSimdFloat4 a0, TSimdFloat4 a1);
+
+static inline TSimdFloat4 tfVectorEleDiv4F(TSimdFloat4 a0, TSimdFloat4 a1);
+static inline TSimdFloat4 tfVectorEleAdd4F(TSimdFloat4 a0, TSimdFloat4 a1);
+static inline TSimdFloat4 tfVectorEleSub4F(TSimdFloat4 a0, TSimdFloat4 a1);
+static inline TSimdFloat4 tfVectorEleMul4F(TSimdFloat4 a0, TSimdFloat4 a1);
+
 #include "Internal/TF_SimdFloat4.inl"
 
 #endif
diff --git a/Forge/Math/TF_SimdFloat4x2.h b/Forge/Math/TF_SimdFloat4x2.h
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/Forge/Math/TF_SimdFloat4x3.h b/Forge/Math/TF_SimdFloat4x3.h
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/Forge/Math/TF_SimdFloat4x4.h b/Forge/Math/TF_SimdFloat4x4.h
deleted file mode 100644
index 25a5bcc8fd..0000000000
--- a/Forge/Math/TF_SimdFloat4x4.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#pragma once
-#ifndef TF_MATH_SIMD_FLOAT4x4_H
-#define TF_MATH_SIMD_FLOAT4x4_H
-
-#include "Internal/SimdTypes.h"
-
-static inline TSimdFloat4x4 tfLoadZeroSimd4x4F();
-static inline TSimdFloat4x4 tfLoadIdentitySimd4x4F();
-static inline TSimdFloat4x4 tfLoadSimd4x4F(
-  float m00, float m01, float m02, float m03, 
-  float m10, float m11, float m12, float m13,
-  float m20, float m21, float m22, float m23, 
-  float m30, float m31, float m32, float m33);
-
-static inline TSimdFloat4   tfVectorMul4x4F(const TSimdFloat4x4 a0, const TSimdFloat4 a1);
-static inline TSimdFloat4x4 tfMatMul4x4F_4x4F(TSimdFloat4x4 a0, TSimdFloat4x4 a1);
-static inline TSimdFloat4x3 tfMatMul4x4F_3x4F(TSimdFloat4x4 a0, TSimdFloat4x3 a1);
-static inline TSimdFloat4x2 tfMatMul4x4F_2x4F(TSimdFloat4x4 a0, TSimdFloat4x2 a1);
-static inline TSimdFloat4x1 tfMatMul4x4F_1x4F(TSimdFloat4x4 a0, TSimdFloat4x1 a1);
-static inline TSimdFloat4x4 tfMatTranpose4x4F(TSimdFloat4x4 a0);
-
-static inline TSimdFloat4x1 tfMatInverseFull4x4F(TSimdFloat4x4 a0);
-
-#include "Internal/TF_SimdFloat4x4.inl"
-#if defined(TF_FEATURE_CPU_SSE)
-#include "Internal/TF_SimdFloat4x4_sse.inl"
-#elif defined(TF_FEATURE_CPU_NEON)
-#include "Internal/TF_SimdFloat4x4_neon.inl"
-#else
-#include "Internal/TF_SimdFloat4x4_scalar.inl"
-#endif
-#endif
diff --git a/Forge/Math/TF_SimdMath4.h b/Forge/Math/TF_SimdMath4.h
new file mode 100644
index 0000000000..197fc57ea2
--- /dev/null
+++ b/Forge/Math/TF_SimdMath4.h
@@ -0,0 +1,566 @@
+/*
+ * Copyright (c) Contributors to the Open 3D Engine Project.
+ * For complete copyright and license terms please see the LICENSE at the root of this distribution.
+ *
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
+ *
+ */
+#include "Forge/Math/Internal/Types.h"
+
+inline TSimd32fx4 tfSimd4fSplat(float value);
+inline TSimdi32x4 tfSimd4iSplat(int32_t value);
+
+inline TSimd32fx4 tfSimd4fZero();
+inline TSimdi32x4 tfSimd4iZero();
+
+inline Simd_FloatType  tfSimd4fToSimd1f(TSimd32fx4 value);
+inline TSimdf32x2 tfSimd4fToSimd2f(TSimd32fx4 value);
+inline TSimdf32x3 tfSimd4fToSimd3f(TSimd32fx4 value);
+inline TSimdi32x4 tfSimd4fToSimd4i(TSimd32fx4 value);
+inline TSimd32fx4 tfSimd4iToSimd4f(TSimdi32x4 value);
+
+inline TSimd32fx4 tfSimd4fSplatIndex0(TSimd32fx4 value);
+inline TSimd32fx4 tfSimd4fSplatIndex1(TSimd32fx4 value);
+inline TSimd32fx4 tfSimd4fSplatIndex2(TSimd32fx4 value);
+inline TSimd32fx4 tfSimd4fSplatIndex3(TSimd32fx4 value);
+
+inline TSimd32fx4 tfSimd4fSelect(TSimd32fx4 arg0, TSimd32fx4 arg1, TSimd32fx4 mask);
+inline TSimdi32x4 tfSimd4iSelect(TSimdi32x4 arg0, TSimdi32x4 arg1, TSimdi32x4 mask);
+
+inline float tfS32x4FSelectIndex0(TSimd32fx4 value);
+inline float tfS32x4FSelectIndex1(TSimd32fx4 value);
+inline float tfS32x4FSelectIndex2(TSimd32fx4 value);
+inline float tfS32x4FSelectIndex3(TSimd32fx4 value);
+
+inline TSimd32fx4 tfSimdFloat4Load(float x, float y, float z, float w);
+inline TSimdi32x4 tfSimdInt4Load(int32_t x, int32_t y, int32_t z, int32_t w);
+
+inline TSimd32fx4 tfSimd4fAdd(TSimd32fx4 arg1, TSimd32fx4 arg2);
+inline TSimd32fx4 tfSimd4fSub(TSimd32fx4 arg1, TSimd32fx4 arg2);
+inline TSimd32fx4 tfSimd4fMul(TSimd32fx4 arg1, TSimd32fx4 arg2);
+inline TSimd32fx4 tfSimd4fMadd(TSimd32fx4 mul1, TSimd32fx4 mul2, TSimd32fx4 add);
+inline TSimd32fx4 tfSimd4fDiv(TSimd32fx4 arg1, TSimd32fx4 arg2);
+inline TSimd32fx4 tfSimd4fAbs(TSimd32fx4 value);
+
+inline TSimd32fx4 tfSimd4fNot(TSimd32fx4 value);
+inline TSimd32fx4 tfSimd4fAnd(TSimd32fx4 arg1, TSimd32fx4 arg2);
+inline TSimd32fx4 tfSimd4fAndNot(TSimd32fx4 arg1, TSimd32fx4 arg2);
+inline TSimd32fx4 tfSimd4fOr(TSimd32fx4 arg1, TSimd32fx4 arg2);
+inline TSimd32fx4 tfSimd4fXor(TSimd32fx4 arg1, TSimd32fx4  arg2);
+
+inline TSimdi32x4 tfSimd4iNot(TSimdi32x4 value);
+inline TSimdi32x4 tfSimd4iAnd(TSimdi32x4 arg1, TSimdi32x4 arg2);
+inline TSimdi32x4 tfSimd4iAndNot(TSimdi32x4 arg1, TSimdi32x4 arg2);
+inline TSimdi32x4 tfSimd4iOr(TSimdi32x4 arg1, TSimdi32x4 arg2);
+inline TSimdi32x4 tfSimd4iXor(TSimdi32x4 arg1, TSimdi32x4 arg2);
+
+inline TSimd32fx4 tfSimd4fFloor(TSimd32fx4 value);
+inline TSimd32fx4 tfSimd4fCeil(TSimd32fx4 value);
+inline TSimd32fx4 tfSimd4fRound(TSimd32fx4 value); // Ties to even (banker's rounding)
+inline TSimd32fx4 tfSimd4fTruncate(TSimd32fx4 value);
+inline TSimd32fx4 tfSimd4fMin(TSimd32fx4 arg1, TSimd32fx4 arg2);
+inline TSimd32fx4 tfSimd4fMax(TSimd32fx4 arg1, TSimd32fx4 arg2);
+inline TSimd32fx4 tfSimd4fClamp(TSimd32fx4 value, TSimd32fx4 min, TSimd32fx4 max);
+
+inline TSimdi32x4 tfSimd4iCmpEq(TSimdi32x4 arg1, TSimdi32x4 arg2);
+inline TSimdi32x4 tfSimd4iCmpNeq(TSimdi32x4 arg1, TSimdi32x4 arg2);
+inline TSimdi32x4 tfSimd4iCmpGt(TSimdi32x4 arg1, TSimdi32x4 arg2);
+inline TSimdi32x4 tfSimd4iCmpGtEq(TSimdi32x4 arg1, TSimdi32x4 arg2);
+inline TSimdi32x4 tfSimd4iCmpLt(TSimdi32x4 arg1, TSimdi32x4 arg2);
+inline TSimdi32x4 tfSimd4iCmpLtEq(TSimdi32x4 arg1, TSimdi32x4 arg2);
+
+inline TSimd32fx4 tfSimd4fCmpEq(TSimd32fx4 arg1, TSimd32fx4 arg2);
+inline TSimd32fx4 tfSimd4fCmpNeq(TSimd32fx4 arg1, TSimd32fx4 arg2);
+inline TSimd32fx4 tfSimd4fCmpGt(TSimd32fx4 arg1, TSimd32fx4 arg2);
+inline TSimd32fx4 tfSimd4fCmpGtEq(TSimd32fx4 arg1, TSimd32fx4 arg2);
+inline TSimd32fx4 tfSimd4fCmpLt(TSimd32fx4 arg1, TSimd32fx4 arg2);
+inline TSimd32fx4 tfSimd4fCmpLtEq(TSimd32fx4 arg1, TSimd32fx4 arg2);
+
+inline bool tfSimd4iCmpAllEq(TSimdi32x4 arg1, TSimdi32x4 arg2);
+inline bool tfSimd4fCmpAllEq(TSimd32fx4 arg1, TSimd32fx4 arg2);
+
+// ----------------------------------------------------------------
+// --------------------- Implementaion ----------------------------
+// ----------------------------------------------------------------
+inline TSimdi32x4 tfSimd4iSelect(TSimdi32x4 arg0, TSimdi32x4 arg1, TSimdi32x4 mask) {
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_blendv_epi8(arg0, arg1, mask);
+#else
+    return {(mask.v[0] == 0) ? arg0.v[0] : arg1.v[0]
+            , (mask.v[1] == 0) ? arg0.v[1] : arg1.v[1]
+            , (mask.v[2] == 0) ? arg0.v[2] : arg1.v[2]
+            , (mask.v[3] == 0) ? arg0.v[3] : arg1.v[3] };
+#endif
+}
+inline TSimd32fx4 tfSimd4fSelect(TSimd32fx4 arg0, TSimd32fx4 arg1, TSimd32fx4 mask)
+{
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_blendv_ps(arg0, arg1, mask);
+#else
+    TSimdi32x4 intMask =  tfSimd4fToSimd4i(mask);
+    return { (intMask.v[0] == 0) ? arg0.v[0] : arg1.v[0]
+            , (intMask.v[1] == 0) ? arg0.v[1] : arg1.v[1]
+            , (intMask.v[2] == 0) ? arg0.v[2] : arg1.v[2]
+            , (intMask.v[3] == 0) ? arg0.v[3] : arg1.v[3] };
+#endif
+}
+
+inline TSimd32fx4 tfSimd4fZero() { return tfSimd4iToSimd4f(tfSimd4iZero()); }
+inline TSimdi32x4 tfSimd4iZero()
+{
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_setzero_si128();
+#else
+    return { 0, 0, 0, 0 };
+#endif
+}
+
+inline TSimdi32x4 tfSimd4iNot(TSimdi32x4 value)
+{
+#if defined(TF_FEATURE_CPU_SSE)
+    const TSimdi32x4 invert = tfSimd4iSplat(TF_SIMDI_MAX);
+    return _mm_andnot_si128(value, invert);
+#else
+    return { ~value.v[0], ~value.v[1], ~value.v[2], ~value.v[3] };
+#endif
+}
+inline TSimdi32x4 tfSimd4iAnd(TSimdi32x4 arg1, TSimdi32x4 arg2){
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_and_si128(arg1, arg2);
+#else
+    return { arg1.v[0] & arg2.v[0], arg1.v[1] & arg2.v[1], arg1.v[2] & arg2.v[2], arg1.v[3] & arg2.v[3] };
+#endif
+
+}
+inline TSimdi32x4 tfSimd4iAndNot(TSimdi32x4 arg1, TSimdi32x4 arg2) { 
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_andnot_si128(arg1, arg2); 
+#else
+    return { ~arg1.v[0] & arg2.v[0], ~arg1.v[1] & arg2.v[1], ~arg1.v[2] & arg2.v[2], ~arg1.v[3] & arg2.v[3] };
+#endif
+}
+inline TSimdi32x4 tfSimd4iOr(TSimdi32x4 arg1, TSimdi32x4 arg2){
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_or_si128(arg1, arg2); 
+#else
+    return { arg1.v[0] | arg2.v[0], arg1.v[1] | arg2.v[1], arg1.v[2] | arg2.v[2], arg1.v[3] | arg2.v[3] };
+#endif
+}
+inline TSimdi32x4 tfSimd4iXor(TSimdi32x4 arg1, TSimdi32x4 arg2){
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_xor_si128(arg1, arg2); 
+#else
+    return { arg1.v[0] ^ arg2.v[0], arg1.v[1] ^ arg2.v[1], arg1.v[2] ^ arg2.v[2], arg1.v[3] ^ arg2.v[3] };
+#endif
+}
+
+inline TSimd32fx4 tfSimd4fNot(TSimd32fx4 value) { 
+#if defined(TF_FEATURE_CPU_SSE)
+    const TSimd32fx4 invert = tfSimd4fSplat((float)(0xFFFFFFFF));
+    return _mm_andnot_ps(value, invert);
+#else
+    TSimdi32x4 result = { { ~((int32_t)value.v[0]), ~((int32_t)value.v[1]),
+                                 ~((int32_t)value.v[2]), ~((int32_t)value.v[3]) } };
+    return tfSimd4iToSimd4f(result);
+#endif
+}
+inline TSimd32fx4 tfSimd4fAnd(TSimd32fx4 arg1, TSimd32fx4 arg2) { 
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_and_ps(arg1, arg2);
+#else
+    TSimdi32x4 result = { ((int32_t)arg1.v[0]) & ((int32_t)arg2.v[0]), ((int32_t)arg1.v[1]) & ((int32_t)arg2.v[1]),
+                               ((int32_t)arg1.v[2]) & ((int32_t)arg2.v[2]), ((int32_t)arg1.v[3]) & ((int32_t)arg2.v[3]) };
+    return tfSimd4iToSimd4f(result);
+#endif
+}
+inline TSimd32fx4 tfSimd4fAndNot(TSimd32fx4 arg1, TSimd32fx4 arg2) {
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_andnot_ps(arg1, arg2);
+#else
+    TSimdi32x4 result = { { ~((int32_t)arg1.v[0]) & ((int32_t)arg2.v[0]),
+                                 ~((int32_t)arg1.v[1]) & ((int32_t)arg2.v[1]),
+                                 ~((int32_t)arg1.v[2]) & ((int32_t)arg2.v[2]),
+                                 ~((int32_t)arg1.v[3]) & ((int32_t)arg2.v[3]) } };
+    return tfSimd4iToSimd4f(result);
+#endif
+}
+inline TSimd32fx4 tfSimd4fOr(TSimd32fx4 arg1, TSimd32fx4 arg2) { 
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_or_ps(arg1, arg2);
+#else
+    TSimdi32x4 result = { { ((int32_t)arg1.v[0]) | ((int32_t)arg2.v[0]),
+                                 ((int32_t)arg1.v[1]) | ((int32_t)arg2.v[1]),
+                                 ((int32_t)arg1.v[2]) | ((int32_t)arg2.v[2]),
+                                 ((int32_t)arg1.v[3]) | ((int32_t)arg2.v[3]) } };
+    return tfSimd4iToSimd4f(result);
+#endif
+}
+inline TSimd32fx4 tfSimd4fXor(TSimd32fx4 arg1, TSimd32fx4 arg2) { 
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_xor_ps(arg1, arg2);
+#else
+    TSimdi32x4 result = { { ((int32_t)arg1.v[0]) ^ ((int32_t)arg2.v[0]),
+                                 ((int32_t)arg1.v[1]) ^ ((int32_t)arg2.v[1]),
+                                 ((int32_t)arg1.v[2]) ^ ((int32_t)arg2.v[2]),
+                                 ((int32_t)arg1.v[3]) ^ ((int32_t)arg2.v[3]) } };
+    return tfSimd4iToSimd4f(result);
+#endif
+}
+
+inline TSimd32fx4 tfSimd4fFloor(TSimd32fx4 value) {
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_floor_ps(value);
+#else
+    return { { floorf(value.v[0]), floorf(value.v[1]), floorf(value.v[2]), floorf(value.v[3]) } };
+#endif
+}
+inline TSimd32fx4 tfSimd4fCeil(TSimd32fx4 value) {
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_ceil_ps(value);
+#else
+    return { { ceilf(value.v[0]), ceilf(value.v[1]), ceilf(value.v[2]), ceilf(value.v[3]) } };
+#endif
+}
+inline TSimd32fx4 tfSimd4fRound(TSimd32fx4 value) {
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_round_ps(value, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+#else
+    // While 'roundf' may seem the obvious choice here, it rounds halfway cases
+    // away from zero regardless of the current rounding mode, but 'rintf' uses
+    // the current rounding mode which is consistent with other implementations.
+    return { { rintf(value.v[0]), rintf(value.v[1]), rintf(value.v[2]), rintf(value.v[3]) } };
+#endif
+}
+inline TSimd32fx4 tfSimd4fTruncate(TSimd32fx4 value) { return tfSimd4iToSimd4f(tfSimd4fToSimd4i(value)); }
+inline TSimd32fx4 tfSimd4fMin(TSimd32fx4 arg1, TSimd32fx4 arg2)
+{
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_min_ps(arg1, arg2);
+#else
+    return { { fminf(arg1.v[0], arg2.v[0]), fminf(arg1.v[1], arg2.v[1]), fminf(arg1.v[2], arg2.v[2]), fminf(arg1.v[3], arg2.v[3]) } };
+#endif
+}
+inline TSimd32fx4 tfSimd4fMax(TSimd32fx4 arg1, TSimd32fx4 arg2) {
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_max_ps(arg1, arg2);
+#else
+    return { { fmaxf(arg1.v[0], arg2.v[0]), fmaxf(arg1.v[1], arg2.v[1]), fmaxf(arg1.v[2], arg2.v[2]), fmaxf(arg1.v[3], arg2.v[3]) } };
+#endif
+}
+inline TSimd32fx4 tfSimd4fClamp(TSimd32fx4 value, TSimd32fx4 min, TSimd32fx4 max)
+{
+    return tfSimd4fMax(min, tfSimd4fMin(value, max));
+}
+
+inline TSimdi32x4 tfSimd4fToSimd4i(TSimd32fx4 value)
+{
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_castps_si128(value);
+#elif defined(TF_FEATURE_CPU_SCALAR)
+    return { (int32_t)value.v[0], (int32_t)value.v[1], (int32_t)value.v[2], (int32_t)value.v[3] };
+#endif
+}
+
+inline TSimd32fx4 tfSimd4iToSimd4f(TSimdi32x4 value)
+{
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_castsi128_ps(value);
+#elif defined(TF_FEATURE_CPU_SCALAR)
+    return { (float)value.v[0], (float)value.v[1], (float)value.v[2], (float)value.v[3] };
+#endif
+}
+
+inline float tfS32x4FSelectIndex0(TSimd32fx4 value) { 
+#if defined(TF_FEATURE_CPU_SSE)
+  return _mm_cvtss_f32(value);
+#elif defined(TF_FEATURE_CPU_SCALAR)
+  return value.v[0];
+#endif
+}
+inline float tfS32x4FSelectIndex1(TSimd32fx4 value) { return tfS32x4FSelectIndex0(tfSimd4fSplatIndex1(value)); }
+inline float tfS32x4FSelectIndex2(TSimd32fx4 value) { return tfS32x4FSelectIndex0(tfSimd4fSplatIndex2(value)); }
+inline float tfS32x4FSelectIndex3(TSimd32fx4 value) { return tfS32x4FSelectIndex0(tfSimd4fSplatIndex3(value)); }
+
+inline TSimd32fx4 tfSimd4fAdd(TSimd32fx4 arg1, TSimd32fx4 arg2)
+{
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_add_ps(arg1, arg2);
+#else
+    return {
+        arg1.v[0] + arg2.v[0],
+        arg1.v[1] + arg2.v[1],
+        arg1.v[2] + arg2.v[2],
+        arg1.v[3] + arg2.v[3],
+    };
+#endif
+}
+inline TSimd32fx4 tfSimd4fSub(TSimd32fx4 arg1, TSimd32fx4 arg2)
+{
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_sub_ps(arg1, arg2);
+#else
+    return {
+        arg1.v[0] - arg2.v[0],
+        arg1.v[1] - arg2.v[1],
+        arg1.v[2] - arg2.v[2],
+        arg1.v[3] - arg2.v[3],
+    };
+#endif
+}
+inline TSimd32fx4 tfSimd4fMul(TSimd32fx4 arg1, TSimd32fx4 arg2) {
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_mul_ps(arg1, arg2); 
+#else
+    return {
+        arg1.v[0] * arg2.v[0],
+        arg1.v[1] * arg2.v[1],
+        arg1.v[2] * arg2.v[2],
+        arg1.v[3] * arg2.v[3],
+    };
+#endif
+
+}
+inline TSimd32fx4 tfSimd4fMadd(TSimd32fx4 mul1, TSimd32fx4 mul2, TSimd32fx4 add)
+{
+#if 0
+    return _mm_fmadd_ps(mul1, mul2, add); // Requires FMA CPUID
+#else
+    return tfSimd4fAdd(tfSimd4fMul(mul1, mul2), add);
+#endif
+}
+inline TSimd32fx4 tfSimd4fDiv(TSimd32fx4 arg1, TSimd32fx4 arg2) {
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_div_ps(arg1, arg2);
+#else
+    return {
+        arg1.v[0] / arg2.v[0],
+        arg1.v[1] / arg2.v[1],
+        arg1.v[2] / arg2.v[2],
+        arg1.v[3] / arg2.v[3],
+    };
+#endif
+}
+
+inline TSimd32fx4 tfSimd4fAbs(TSimd32fx4 value) { 
+#if defined(TF_FEATURE_CPU_SSE)
+    return value;
+    //return _mm_abs_epi32(value); 
+#else
+    return {
+        abs(value.v[0]),
+        abs(value.v[1]),
+        abs(value.v[2]),
+        abs(value.v[3]),
+    };
+#endif
+}
+inline TSimd32fx4 tfSimdFloat4Load(float x, float y, float z, float w) {
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_set_ps(w, z, y, x);
+#else
+    return { x, y, z, w };
+#endif
+}
+
+
+inline TSimdi32x4 tfSimdInt4Load(int32_t x, int32_t y, int32_t z, int32_t w) {
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_set_epi32(w, z, y, x);
+#else
+    return { x, y, z, w };
+#endif
+}
+
+inline Simd_FloatType tfSimd4fToSimd1f(TSimd32fx4 value)
+{
+#if defined(TF_FEATURE_CPU_SSE)
+    return value;
+#else
+    return value.v[0];
+#endif
+}
+
+inline TSimdf32x2 tfSimd4fToSimd2f(TSimd32fx4 value)
+{
+#if defined(TF_FEATURE_CPU_SSE)
+    return value;
+#else
+    return {value.v[0], value.v[1]};
+#endif
+}
+
+inline TSimdf32x3 tfSimd4fToSimd3f(TSimd32fx4 value)
+{
+#if defined(TF_FEATURE_CPU_SSE)
+    return value;
+#else
+    return {value.v[0], value.v[1], value.v[2]};
+#endif
+}
+
+inline TSimd32fx4 tfSimd4fSplatIndex0(TSimd32fx4 value)
+{
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_shuffle_ps(value, value, _MM_SHUFFLE(0, 0, 0, 0));
+#else
+  return {value.v[0],value.v[0],value.v[0],value.v[0]};
+#endif
+}
+
+inline TSimd32fx4 tfSimd4fSplatIndex1(TSimd32fx4 value)
+{
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_shuffle_ps(value, value, _MM_SHUFFLE(1, 1, 1, 1));
+#else
+  return {value.v[1],value.v[1],value.v[1],value.v[1]};
+#endif
+}
+
+inline TSimd32fx4 tfSimd4fSplatIndex2(TSimd32fx4 value)
+{
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_shuffle_ps(value, value, _MM_SHUFFLE(2, 2, 2, 2));
+#else
+  return {value.v[2],value.v[2],value.v[2],value.v[2]};
+#endif
+}
+
+inline TSimd32fx4 tfSimd4fSplatIndex3(TSimd32fx4 value)
+{
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_shuffle_ps(value, value, _MM_SHUFFLE(3, 3, 3, 3));
+#else
+  return {value.v[3],value.v[3],value.v[3],value.v[3]};
+#endif
+}
+
+inline TSimdi32x4 tfSimd4iSplat(int32_t value)
+{
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_set1_epi32(value);
+#else
+    return { value, value, value, value };
+#endif
+}
+
+inline TSimd32fx4 tfSimd4fSplat(float value)
+{
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_set1_ps(value);
+#else
+    return { value, value, value, value };
+#endif
+}
+
+inline TSimdi32x4 tfSimdSplat4i(int32_t value)
+{
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_set1_epi32(value);
+#else
+    return { value, value, value, value };
+#endif
+}
+
+
+inline TSimdi32x4 tfSimd4iCmpEq(TSimdi32x4 arg1, TSimdi32x4 arg2) {
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_cmpeq_epi32(arg1, arg2);
+#else
+    return { { (arg1.v[0] == arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, 
+               (arg1.v[1] == arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
+               (arg1.v[2] == arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000, 
+               (arg1.v[3] == arg2.v[3]) ? (int32_t)0xFFFFFFFF : 0x00000000 } };
+#endif
+}
+inline TSimdi32x4 tfSimd4iCmpNeq(TSimdi32x4 arg1, TSimdi32x4 arg2) {
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_xor_si128(
+        _mm_cmpeq_epi32(arg1, arg2), 
+        _mm_set1_epi32((int32_t)0xFFFFFFFF));
+#else
+    return { { (arg1.v[0] != arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, 
+               (arg1.v[1] != arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
+               (arg1.v[2] != arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000, 
+               (arg1.v[3] != arg2.v[3]) ? (int32_t)0xFFFFFFFF : 0x00000000 } };
+#endif
+}
+inline TSimdi32x4 tfSimd4iCmpGt(TSimdi32x4 arg1, TSimdi32x4 arg2) {
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_cmpgt_epi32(arg1, arg2);
+#else
+    return { { (arg1.v[0] > arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, 
+               (arg1.v[1] > arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
+               (arg1.v[2] > arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000, 
+               (arg1.v[3] > arg2.v[3]) ? (int32_t)0xFFFFFFFF : 0x00000000 } };
+#endif
+}
+inline TSimdi32x4 tfSimd4iCmpGtEq(TSimdi32x4 arg1, TSimdi32x4 arg2) {
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_or_si128(
+        _mm_cmpgt_epi32(arg1, arg2), 
+        _mm_cmpeq_epi32(arg1, arg2));
+#else
+    return { { (arg1.v[0] >= arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, 
+               (arg1.v[1] >= arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
+               (arg1.v[2] >= arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000, 
+               (arg1.v[3] >= arg2.v[3]) ? (int32_t)0xFFFFFFFF : 0x00000000 } };
+#endif
+}
+inline TSimdi32x4 tfSimd4iCmpLt(TSimdi32x4 arg1, TSimdi32x4 arg2) {
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_cmplt_epi32(arg1, arg2);
+#else
+    return { { (arg1.v[0] < arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, 
+               (arg1.v[1] < arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
+               (arg1.v[2] < arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000, 
+               (arg1.v[3] < arg2.v[3]) ? (int32_t)0xFFFFFFFF : 0x00000000 } };
+#endif
+}
+inline TSimdi32x4 tfSimd4iCmpLtEq(TSimdi32x4 arg1, TSimdi32x4 arg2) {
+#if defined(TF_FEATURE_CPU_SSE)
+    return _mm_or_si128(
+        _mm_cmplt_epi32(arg1, arg2), 
+        _mm_cmpeq_epi32(arg1, arg2));
+#else
+    return { { (arg1.v[0] < arg2.v[0]) ? (int32_t)0xFFFFFFFF : 0x00000000, 
+               (arg1.v[1] < arg2.v[1]) ? (int32_t)0xFFFFFFFF : 0x00000000,
+               (arg1.v[2] < arg2.v[2]) ? (int32_t)0xFFFFFFFF : 0x00000000, 
+               (arg1.v[3] < arg2.v[3]) ? (int32_t)0xFFFFFFFF : 0x00000000 } };
+#endif
+
+}
+
+
+
+
+//inline bool tfSimd4fCmpAllEq(TSimd32fx4 arg1, TSimd32fx4 arg2) {
+//#if defined(TF_FEATURE_CPU_SSE)
+//    const TSimd32fx4 compare = tfSimd4fCmpAllEq(arg1, arg2);
+//    return (_mm_movemask_epi8(compare) & 0xf) == 0xf;
+//#else
+//    for(int i = 0; i < 4; i++) {
+//        if (arg1.v[i] != arg2.v[i])
+//        {
+//            return false;
+//        }
+//    }
+//    return true;
+//#endif
+//
+//}
+
+inline bool tfSimd4iCmpAllEq(TSimdi32x4 arg1, TSimdi32x4 arg2)
+{
+#if defined(TF_FEATURE_CPU_SSE)
+    const TSimdi32x4 compare = tfSimd4iCmpEq(arg1, arg2);
+    return (_mm_movemask_epi8(compare) & 0xf) == 0xf;
+#else
+    for(int i = 0; i < 4; i++) {
+        if (arg1.v[i] != arg2.v[i])
+        {
+            return false;
+        }
+    }
+    return true;
+#endif
+}
diff --git a/Forge/Math/TF_SimdQuat32x4.h b/Forge/Math/TF_SimdQuat32x4.h
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/Forge/tests/Math/BUCK b/Forge/tests/Math/BUCK
index 2b5ea6503b..41ca447827 100644
--- a/Forge/tests/Math/BUCK
+++ b/Forge/tests/Math/BUCK
@@ -2,12 +2,11 @@ load(":defs.bzl", "cxx_math_simd_test")
 
 math_utils_header = ["TF_MathUtils.h"]
 
-cxx_math_simd_test(name = "TF_SimdFloat2x32Test",srcs = ["TF_SimdFloat2x32Test.cpp"])
-cxx_math_simd_test(name = "TF_SimdFloat3x32Test",srcs = ["TF_SimdFloat3x32Test.cpp"])
-cxx_math_simd_test(name = "TF_SimdFloat4x32Test",srcs = ["TF_SimdFloat4x32Test.cpp"]) 
+# cxx_math_simd_test(name = "TF_SimdFloat2x32Test",srcs = ["TF_SimdFloat2x32Test.cpp"])
+# cxx_math_simd_test(name = "TF_SimdFloat3x32Test",srcs = ["TF_SimdFloat3x32Test.cpp"])
+# cxx_math_simd_test(name = "TF_SimdFloat4x32Test",srcs = ["TF_SimdFloat4x32Test.cpp"]) 
+# cxx_math_simd_test(name = "TF_SimdFloat3Test",srcs = ["TF_SimdFloat3Test.cpp"])
+# cxx_math_simd_test(name = "TF_SimdFloat4x4Test", srcs = ["TF_SimdFloat4x4Test.cpp"])
 
-cxx_math_simd_test(name = "TF_SimdFloat4Test",srcs = ["TF_SimdFloat4Test.cpp"])
-cxx_math_simd_test(name = "TF_SimdFloat3Test",srcs = ["TF_SimdFloat3Test.cpp"])
-
-cxx_math_simd_test(name = "TF_SimdFloat4x4Test", srcs = ["TF_SimdFloat4x4Test.cpp"])
+cxx_math_simd_test(name = "TF_Simdf32x4Test", srcs = ["TF_Simd32x4Test.cpp"])
 
diff --git a/Forge/tests/Math/TF_MathUtils.h b/Forge/tests/Math/TF_MathUtils.h
index e0456f82d2..b63f42f440 100644
--- a/Forge/tests/Math/TF_MathUtils.h
+++ b/Forge/tests/Math/TF_MathUtils.h
@@ -19,35 +19,47 @@
   EXPECT_EQ(__a.getW(), __b.getW()); \
 } while(false);
 
+#define LOG_FORMAT_SIMD_32x4f(input) \
+    "%.3f, %.3f, %.3f, %.3f", tfSimdSelect0_f32x4(input), tfSimdSelect1_f32x4(input), tfSimdSelect2_f32x4(input), tfSimdSelect3_f32x4(input)
+#define LOG_FORMAT_SIMD_32x4i(input) \
+    "%d, %d, %d, %d", tfSimdSelect0_i32x4(input), tfSimdSelect1_i32x4(input), tfSimdSelect2_i32x4(input), tfSimdSelect3_i32x4(input)
 
-static inline void debugPrintSimd4F(TSimdFloat4 input) {
-  DLOGF(LogLevel::eDEBUG, "%.3f, %.3f, %.3f, %.3f",
-          tfSimd4fSelectIndex0(input.mRow),
-          tfSimd4fSelectIndex1(input.mRow),
-          tfSimd4fSelectIndex2(input.mRow),
-          tfSimd4fSelectIndex3(input.mRow));
+#define LOG_SIMD_32x4x4f(input, LOG, ...)                                                                                            \
+    LOG(__VA_ARGS__, "%.3f, %.3f, %.3f, %.3f, %.3f, %.3f, %.3f, %.3f, %.3f, %.3f, %.3f, %.3f", tfSimdSelectIndex0_f32x4(input.mCol0), \
+        tfSimdSelectIndex1_f32x4(input.mCol1), tfSimdSelectIndex2_f32x4(input.mCol2), tfSimdSelectIndex3_f32x4(input.mCol3),            \
+        tfSimdSelectIndex0_f32x4(input.mCol0), tfSimdSelectIndex1_f32x4(input.mCol1), tfSimdSelectIndex2_f32x4(input.mCol2),            \
+        tfSimdSelectIndex3_f32x4(input.mCol3), tfSimdSelectIndex0_f32x4(input.mCol0), tfSimdSelectIndex1_f32x4(input.mCol1),            \
+        tfSimdSelectIndex2_f32x4(input.mCol2), tfSimdSelectIndex3_f32x4(input.mCol3), tfSimdSelectIndex0_f32x4(input.mCol0),            \
+        tfSimdSelectIndex1_f32x4(input.mCol1), tfSimdSelectIndex2_f32x4(input.mCol2), tfSimdSelectIndex3_f32x4(input.mCol3))
+
+static inline void debugPrintSimd4F(Tsimd_f32x4_t input) {
+ // DLOGF(LogLevel::eDEBUG, "%.3f, %.3f, %.3f, %.3f, %.3f, %.3f, %.3f, %.3f, %.3f, %.3f, %.3f, %.3f",
+ //         tfSimdSelectIndex0_f32x4(input),
+ //         tfSimdSelectIndex1_f32x4(input),
+ //         tfSimdSelectIndex2_f32x4(input),
+ //         tfSimdSelectIndex3_f32x4(input));
 }
-static inline void debugPrintSimd4x4F(TSimdFloat4x4 input) {
-    DLOGF(LogLevel::eDEBUG,"%.3f, %.3f, %.3f, %.3f, %.3f, %.3f, %.3f, %.3f, %.3f, %.3f, %.3f, %.3f",
-          tfSimd4fSelectIndex0(input.mCol0),
-          tfSimd4fSelectIndex0(input.mCol1),
-          tfSimd4fSelectIndex0(input.mCol2),
-          tfSimd4fSelectIndex0(input.mCol3),
+static inline void debugPrintSimd4x4F(struct Tsimd_f32x4x4_s  input) {
+   // DLOGF(LogLevel::eDEBUG,"%.3f, %.3f, %.3f, %.3f, %.3f, %.3f, %.3f, %.3f, %.3f, %.3f, %.3f, %.3f",
+   //       tfSimdSelectIndex0_f32x4(input.mCol0),
+   //       tfSimdSelectIndex1_f32x4(input.mCol1),
+   //       tfSimdSelectIndex2_f32x4(input.mCol2),
+   //       tfSimdSelectIndex3_f32x4(input.mCol3),
 
-          tfSimd4fSelectIndex1(input.mCol0),
-          tfSimd4fSelectIndex1(input.mCol1),
-          tfSimd4fSelectIndex1(input.mCol2),
-          tfSimd4fSelectIndex1(input.mCol3),
-          
-          tfSimd4fSelectIndex2(input.mCol0),
-          tfSimd4fSelectIndex2(input.mCol1),
-          tfSimd4fSelectIndex2(input.mCol2),
-          tfSimd4fSelectIndex2(input.mCol3),
-          
-          tfSimd4fSelectIndex3(input.mCol0),
-          tfSimd4fSelectIndex3(input.mCol1),
-          tfSimd4fSelectIndex3(input.mCol2),
-          tfSimd4fSelectIndex3(input.mCol3)
-    );
+   //       tfSimdSelectIndex0_f32x4(input.mCol0),
+   //       tfSimdSelectIndex1_f32x4(input.mCol1),
+   //       tfSimdSelectIndex2_f32x4(input.mCol2),
+   //       tfSimdSelectIndex3_f32x4(input.mCol3),
+   //       
+   //       tfSimdSelectIndex0_f32x4(input.mCol0),
+   //       tfSimdSelectIndex1_f32x4(input.mCol1),
+   //       tfSimdSelectIndex2_f32x4(input.mCol2),
+   //       tfSimdSelectIndex3_f32x4(input.mCol3),
+   //       
+   //       tfSimdSelectIndex0_f32x4(input.mCol0),
+   //       tfSimdSelectIndex1_f32x4(input.mCol1),
+   //       tfSimdSelectIndex2_f32x4(input.mCol2),
+   //       tfSimdSelectIndex3_f32x4(input.mCol3)
+   // );
 }
 
diff --git a/Forge/tests/Math/TF_Simd2Test.cpp b/Forge/tests/Math/TF_Simd2Test.cpp
new file mode 100644
index 0000000000..5f59d0bf63
--- /dev/null
+++ b/Forge/tests/Math/TF_Simd2Test.cpp
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) Contributors to the Open 3D Engine Project.
+ * For complete copyright and license terms please see the LICENSE at the root of this distribution.
+ *
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
+ *
+ */
+#include "TF_TestMain.h"
+#include "utest.h"
+
+#include "Forge/Math/TF_Simd32x2.h"
+#include "TF_MathUtils.h"
+
+
+UTEST(TF_Simd2, tfS32x2ICmpGt)
+{
+  struct {
+    Tsimd_i32x2_t a;
+    Tsimd_i32x2_t b;
+    Tsimd_i32x2_t test;
+  } tests[] = {
+      // ... existing test cases ...
+      // Edge cases:
+      {tfS32x2ILoadImmediate(INT32_MIN, INT32_MIN), tfS32x2ILoadImmediate(INT32_MAX, INT32_MAX), tfS32x2ILoadImmediate(TF_SIMD_FALSE, TF_SIMD_FALSE)},
+      {tfS32x2ILoadImmediate(INT32_MAX, INT32_MAX), tfS32x2ILoadImmediate(INT32_MIN, INT32_MIN), tfS32x2ILoadImmediate(TF_SIMD_TRUE, TF_SIMD_TRUE)},
+      // Mixed values:
+      {tfS32x2ILoadImmediate(0, -1), tfS32x2ILoadImmediate(1, 0), tfS32x2ILoadImmediate(TF_SIMD_FALSE, TF_SIMD_FALSE)},
+      // All elements equal:
+      {tfS32x2ILoadImmediate(42, 42), tfS32x2ILoadImmediate(42, 42), tfS32x2ILoadImmediate(TF_SIMD_FALSE, TF_SIMD_FALSE)},
+      // Different element types:
+      {tfS32x2ILoadImmediate(0, 1), tfS32x2ILoadImmediate(3, 2), tfS32x2ILoadImmediate(TF_SIMD_FALSE, TF_SIMD_FALSE)},
+  };
+  for(size_t i = 0; i < TF_ARRAY_COUNT(tests); i++) {
+    EXPECT_TRUE(tfS32x2ICmpAllEq(tfS32x2ICmpGt(tests[i].a, tests[i].b), tests[i].test));
+  }
+}
+
+UTEST(TF_Simd2, tfS32x2ICmpLt)
+{
+  struct {
+    Tsimd_i32x2_t a;
+    Tsimd_i32x2_t b;
+    Tsimd_i32x2_t test;
+  } tests[] = {
+      // Less than
+      {tfS32x2ILoadImmediate(12, 13), tfS32x2ILoadImmediate(16, 17), tfS32x2ILoadImmediate(TF_SIMD_TRUE, TF_SIMD_TRUE)},
+      // Mixed less than and equal
+      {tfS32x2ILoadImmediate(125, -12), tfS32x2ILoadImmediate(125, 13), tfS32x2ILoadImmediate(TF_SIMD_FALSE, TF_SIMD_TRUE)},
+      // All elements greater than or equal
+      {tfS32x2ILoadImmediate(1, 2), tfS32x2ILoadImmediate(-1, 0), tfS32x2ILoadImmediate(TF_SIMD_FALSE, TF_SIMD_FALSE)},
+      // Edge cases:
+      {tfS32x2ILoadImmediate(INT32_MIN, INT32_MIN), tfS32x2ILoadImmediate(INT32_MAX, INT32_MAX), tfS32x2ILoadImmediate(TF_SIMD_TRUE, TF_SIMD_TRUE)},
+      {tfS32x2ILoadImmediate(INT32_MAX, INT32_MAX), tfS32x2ILoadImmediate(INT32_MIN, INT32_MIN), tfS32x2ILoadImmediate(TF_SIMD_FALSE, TF_SIMD_FALSE)},
+  };
+  for(size_t i = 0; i < TF_ARRAY_COUNT(tests); i++) {
+    EXPECT_TRUE(tfS32x2ICmpAllEq(tfS32x2ICmpLt(tests[i].a, tests[i].b), tests[i].test));
+  }
+}
+
+
+
+UTEST(TF_Simd2, tfS32x2ICmpEq)
+{
+  struct {
+    Tsimd_i32x2_t a;
+    Tsimd_i32x2_t b;
+    Tsimd_i32x2_t test;
+  } tests[] = {
+       // Equal elements
+      {tfS32x2ILoadImmediate(12, 13), tfS32x2ILoadImmediate(12, 13), tfS32x2ILoadImmediate(TF_SIMD_TRUE, TF_SIMD_TRUE)},
+      // Mixed equal and unequal elements
+      {tfS32x2ILoadImmediate(125, -12), tfS32x2ILoadImmediate(125, -12), tfS32x2ILoadImmediate(TF_SIMD_TRUE, TF_SIMD_TRUE)},
+      // All elements unequal
+      {tfS32x2ILoadImmediate(1, 2), tfS32x2ILoadImmediate(5, 6), tfS32x2ILoadImmediate(TF_SIMD_FALSE, TF_SIMD_FALSE)},
+      // Edge cases:
+      {tfS32x2ILoadImmediate(INT32_MIN, INT32_MIN), tfS32x2ILoadImmediate(INT32_MIN, INT32_MIN), tfS32x2ILoadImmediate(TF_SIMD_TRUE, TF_SIMD_TRUE)},
+      {tfS32x2ILoadImmediate(INT32_MAX, INT32_MAX), tfS32x2ILoadImmediate(INT32_MAX, INT32_MAX), tfS32x2ILoadImmediate(TF_SIMD_TRUE, TF_SIMD_TRUE)},
+  };
+  for (size_t i = 0; i < TF_ARRAY_COUNT(tests); i++)
+  {
+      EXPECT_TRUE(tfS32x2ICmpAllEq(tfS32x2ICmpEq(tests[i].a, tests[i].b), tests[i].test));
+  }
+}
+
+UTEST(TF_Simd2, tfS32x2FZero)
+{
+    Tsimd_f32x2_t value = tfS32x2FZero();
+    EXPECT_NEAR(tfS32x2FSelectIndex0(value), 0.0f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x2FSelectIndex1(value), 0.0f, DEFAULT_EPSILON);
+}
+
+UTEST(TF_Simd2, tfS32x2FSplat)
+{
+    Tsimd_f32x2_t value = tfS32x2FSplat(23.f);
+    EXPECT_NEAR(tfS32x2FSelectIndex0(value), 23.f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x2FSelectIndex1(value), 23.f, DEFAULT_EPSILON);
+    Tsimd_f32x2_t value1 = tfS32x2FSplat(5.1f);
+    EXPECT_NEAR(tfS32x2FSelectIndex0(value1), 5.1f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x2FSelectIndex1(value1), 5.1f, DEFAULT_EPSILON);
+
+    Tsimd_f32x2_t value2 = tfSimdFloat2Load(5.1f, 1.0f);
+    EXPECT_NEAR(tfS32x2FSelectIndex0(value2), 5.1f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x2FSelectIndex1(value2), 1.0f, DEFAULT_EPSILON);
+}
+
+UTEST(TF_Simd2, tfS32x3iNot) {
+  struct {
+    Tsimd_i32x2_t test;
+    Tsimd_i32x2_t expect;
+  } tests[] = {
+       {tfS32x2ILoadImmediate(0xFFFFFFFF, 0x0000FFFF), tfS32x2ILoadImmediate(0, 0xFFFF0000)},
+  };   
+  for (size_t i = 0; i < TF_ARRAY_COUNT(tests); i++)
+  {
+      EXPECT_TRUE(tfS32x2ICmpAllEq(tfS32x2INot(tests[i].test), tests[i].expect));
+  }
+}
+
+UTEST(TF_Simd2, tfS32x2ISelect) {
+  struct {
+    Tsimd_i32x2_t a;
+    Tsimd_i32x2_t b;
+    Tsimd_i32x2_t mask;
+    Tsimd_i32x2_t expect;
+  } tests[] = {
+       {tfS32x2ILoadImmediate(10, 11), tfS32x2ILoadImmediate(123, -149), tfS32x2ILoadImmediate(0,0), tfS32x2ILoadImmediate(10, 11)},
+       {tfS32x2ILoadImmediate(10, 11), tfS32x2ILoadImmediate(123, -149), tfS32x2ILoadImmediate(TF_SIMD_TRUE,0), tfS32x2ILoadImmediate(123, 11)},
+       {tfS32x2ILoadImmediate(10, 11), tfS32x2ILoadImmediate(123, -149), tfS32x2ILoadImmediate(TF_SIMD_TRUE,0), tfS32x2ILoadImmediate(123, 11)},
+       {tfS32x2ILoadImmediate(10, 11), tfS32x2ILoadImmediate(123, -149), tfS32x2ILoadImmediate(TF_SIMD_TRUE,TF_SIMD_TRUE), tfS32x2ILoadImmediate(123, -149)},
+  };   
+  for (size_t i = 0; i < TF_ARRAY_COUNT(tests); i++)
+  {
+      EXPECT_TRUE(tfS32x2ICmpAllEq(tfS32x2ISelect(tests[i].a, tests[i].b, tests[i].mask), tests[i].expect));
+  }
+}
+
+//UTEST(TF_Simd2, tfS32x3FSelect) {
+//  struct {
+//    TSimd32fx4 a;
+//    TSimd32fx4 b;
+//    TSimd32fx4 mask;
+//    TSimd32fx4 expect;
+//  } tests[] = {
+//       {tfS32x3FLoadImmediate(10, 11, -13, 32), tfS32x3FLoadImmediate(123, -149, 0, 12), tfS32x3FLoadImmediate(0,0,0,0), tfS32x3FLoadImmediate(10, 11, -13, 32)},
+//       {tfS32x3FLoadImmediate(10, 11, -13, 32), tfS32x3FLoadImmediate(123, -149, 0, 12), tfS32x3FLoadImmediate(TF_SIMD_TRUE,0,0,0), tfS32x3FLoadImmediate(123, 11, -13, 32)},
+//       {tfS32x3FLoadImmediate(10, 11, -13, 32), tfS32x3FLoadImmediate(123, -149, 0, 12), tfS32x3FLoadImmediate(TF_SIMD_TRUE,0,TF_SIMD_TRUE,0), tfS32x3FLoadImmediate(123, 11, 0, 32)},
+//       {tfS32x3FLoadImmediate(10, 11, -13, 32), tfS32x3FLoadImmediate(123, -149, 0, 12), tfS32x3FLoadImmediate(TF_SIMD_TRUE,TF_SIMD_TRUE,TF_SIMD_TRUE,TF_SIMD_TRUE), tfS32x3FLoadImmediate(123, -149, 0, 12)},
+//  };   
+//  for (size_t i = 0; i < TF_ARRAY_COUNT(tests); i++)
+//  {
+//      //EXPECT_TRUE(tfS32x3FCmp(tfS32x2ISelect(tests[i].a, tests[i].b, tests[i].mask), tests[i].expect));
+//  }
+//}
+
+
+#include "Forge/Mem/TF_Memory.h"
+#include "Forge/TF_FileSystem.h"
+#include "Forge/TF_Log.h"
+UTEST_STATE();
+TF_UTEST_MAIN("TF_TF_Simd2")
+
+
diff --git a/Forge/tests/Math/TF_Simd32x4Test.cpp b/Forge/tests/Math/TF_Simd32x4Test.cpp
new file mode 100644
index 0000000000..affadf2384
--- /dev/null
+++ b/Forge/tests/Math/TF_Simd32x4Test.cpp
@@ -0,0 +1,756 @@
+/*
+ * Copyright (c) Contributors to the Open 3D Engine Project.
+ * For complete copyright and license terms please see the LICENSE at the root of this distribution.
+ *
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
+ *
+ */
+#include "TF_TestMain.h"
+#include "utest.h"
+
+#include "Forge/Math/TF_Simd32x4.h"
+#include "TF_MathUtils.h"
+
+UTEST(Tsimd_f32x4_t, Zero)
+{
+  Tsimd_f32x4_t value = tfSimdZero_f32x4();
+  EXPECT_NEAR(tfSimdSelect_f32x4(value, 0), 0, DEFAULT_EPSILON);
+  EXPECT_NEAR(tfSimdSelect_f32x4(value, 1), 0, DEFAULT_EPSILON);
+  EXPECT_NEAR(tfSimdSelect_f32x4(value, 2), 0, DEFAULT_EPSILON);
+  EXPECT_NEAR(tfSimdSelect_f32x4(value, 3), 0, DEFAULT_EPSILON);
+
+}
+
+UTEST(Tsimd_f32x4_t, Load_Select)
+{
+  Tsimd_f32x4_t value = tfSimdLoad_f32x4(123.0,12.f,45.f,12.5f);
+
+  EXPECT_NEAR(tfSimdSelect_f32x4(value, 0), 123.0f, DEFAULT_EPSILON);
+  EXPECT_NEAR(tfSimdSelect_f32x4(value, 1), 12.0f, DEFAULT_EPSILON);
+  EXPECT_NEAR(tfSimdSelect_f32x4(value, 2), 45.0f, DEFAULT_EPSILON);
+  EXPECT_NEAR(tfSimdSelect_f32x4(value, 3), 12.5f, DEFAULT_EPSILON);
+
+  EXPECT_NEAR(tfSimdSelect0_f32x4(value), 123.0f, DEFAULT_EPSILON);
+  EXPECT_NEAR(tfSimdSelect1_f32x4(value), 12.0f, DEFAULT_EPSILON);
+  EXPECT_NEAR(tfSimdSelect2_f32x4(value), 45.0f, DEFAULT_EPSILON);
+  EXPECT_NEAR(tfSimdSelect3_f32x4(value), 12.5f, DEFAULT_EPSILON);
+}
+
+UTEST(Tsimd_i32x4_t, Splat) {
+    Tsimd_i32x4_t value = tfSimdLoad_i32x4(123, 12, 45, 12);
+    {
+        Tsimd_i32x4_t test = tfSimdSplat_i32x4(123);
+        EXPECT_NEAR(tfSimdSelect0_i32x4(test), 123, DEFAULT_EPSILON);
+        EXPECT_NEAR(tfSimdSelect1_i32x4(test), 123, DEFAULT_EPSILON);
+        EXPECT_NEAR(tfSimdSelect2_i32x4(test), 123, DEFAULT_EPSILON);
+        EXPECT_NEAR(tfSimdSelect3_i32x4(test), 123, DEFAULT_EPSILON);
+    }
+    {
+        Tsimd_i32x4_t test = tfSimdSplat0_i32x4(value);
+        EXPECT_NEAR(tfSimdSelect0_i32x4(test), 123, DEFAULT_EPSILON);
+        EXPECT_NEAR(tfSimdSelect1_i32x4(test), 123, DEFAULT_EPSILON);
+        EXPECT_NEAR(tfSimdSelect2_i32x4(test), 123, DEFAULT_EPSILON);
+        EXPECT_NEAR(tfSimdSelect3_i32x4(test), 123, DEFAULT_EPSILON);
+    }
+
+    {
+        Tsimd_i32x4_t test = tfSimdSplat1_i32x4(value);
+        EXPECT_NEAR(tfSimdSelect0_i32x4(test), 12, DEFAULT_EPSILON);
+        EXPECT_NEAR(tfSimdSelect1_i32x4(test), 12, DEFAULT_EPSILON);
+        EXPECT_NEAR(tfSimdSelect2_i32x4(test), 12, DEFAULT_EPSILON);
+        EXPECT_NEAR(tfSimdSelect3_i32x4(test), 12, DEFAULT_EPSILON);
+    }
+
+    {
+        Tsimd_i32x4_t test = tfSimdSplat2_i32x4(value);
+        EXPECT_NEAR(tfSimdSelect0_i32x4(test), 45, DEFAULT_EPSILON);
+        EXPECT_NEAR(tfSimdSelect1_i32x4(test), 45, DEFAULT_EPSILON);
+        EXPECT_NEAR(tfSimdSelect2_i32x4(test), 45, DEFAULT_EPSILON);
+        EXPECT_NEAR(tfSimdSelect3_i32x4(test), 45, DEFAULT_EPSILON);
+    }
+
+    {
+        Tsimd_i32x4_t test = tfSimdSplat3_i32x4(value);
+        EXPECT_NEAR(tfSimdSelect0_i32x4(test), 12, DEFAULT_EPSILON);
+        EXPECT_NEAR(tfSimdSelect1_i32x4(test), 12, DEFAULT_EPSILON);
+        EXPECT_NEAR(tfSimdSelect2_i32x4(test), 12, DEFAULT_EPSILON);
+        EXPECT_NEAR(tfSimdSelect3_i32x4(test), 12, DEFAULT_EPSILON);
+    }
+}
+
+UTEST(Tsimd_f32x4_t, Splat) {
+  Tsimd_f32x4_t value = tfSimdLoad_f32x4(123.0,12.f,45.f,12.5f);
+  {
+      Tsimd_f32x4_t test = tfSimdSplat_f32x4(123.0f);
+      EXPECT_NEAR(tfSimdSelect0_f32x4(test), 123.0f, DEFAULT_EPSILON);
+      EXPECT_NEAR(tfSimdSelect1_f32x4(test), 123.0f, DEFAULT_EPSILON);
+      EXPECT_NEAR(tfSimdSelect2_f32x4(test), 123.0f, DEFAULT_EPSILON);
+      EXPECT_NEAR(tfSimdSelect3_f32x4(test), 123.0f, DEFAULT_EPSILON);
+
+  }
+  {
+      Tsimd_f32x4_t test = tfSimdSplat0_f32x4(value);
+      EXPECT_NEAR(tfSimdSelect0_f32x4(test), 123.0f, DEFAULT_EPSILON);
+      EXPECT_NEAR(tfSimdSelect1_f32x4(test), 123.0f, DEFAULT_EPSILON);
+      EXPECT_NEAR(tfSimdSelect2_f32x4(test), 123.0f, DEFAULT_EPSILON);
+      EXPECT_NEAR(tfSimdSelect3_f32x4(test), 123.0f, DEFAULT_EPSILON);
+  }
+
+  {
+      Tsimd_f32x4_t test = tfSimdSplat1_f32x4(value);
+      EXPECT_NEAR(tfSimdSelect0_f32x4(test), 12.0f, DEFAULT_EPSILON);
+      EXPECT_NEAR(tfSimdSelect1_f32x4(test), 12.0f, DEFAULT_EPSILON);
+      EXPECT_NEAR(tfSimdSelect2_f32x4(test), 12.0f, DEFAULT_EPSILON);
+      EXPECT_NEAR(tfSimdSelect3_f32x4(test), 12.0f, DEFAULT_EPSILON);
+  }
+
+  {
+      Tsimd_f32x4_t test = tfSimdSplat2_f32x4(value);
+      EXPECT_NEAR(tfSimdSelect0_f32x4(test), 45.0f, DEFAULT_EPSILON);
+      EXPECT_NEAR(tfSimdSelect1_f32x4(test), 45.0f, DEFAULT_EPSILON);
+      EXPECT_NEAR(tfSimdSelect2_f32x4(test), 45.0f, DEFAULT_EPSILON);
+      EXPECT_NEAR(tfSimdSelect3_f32x4(test), 45.0f, DEFAULT_EPSILON);
+  }
+
+  {
+      Tsimd_f32x4_t test = tfSimdSplat3_f32x4(value);
+      EXPECT_NEAR(tfSimdSelect0_f32x4(test), 12.5f, DEFAULT_EPSILON);
+      EXPECT_NEAR(tfSimdSelect1_f32x4(test), 12.5f, DEFAULT_EPSILON);
+      EXPECT_NEAR(tfSimdSelect2_f32x4(test), 12.5f, DEFAULT_EPSILON);
+      EXPECT_NEAR(tfSimdSelect3_f32x4(test), 12.5f, DEFAULT_EPSILON);
+  }
+}
+
+
+UTEST(Tsimd_i32x4_t, Load_Select)
+{
+  Tsimd_i32x4_t value = tfSimdLoad_i32x4(123,12,45,165);
+
+  EXPECT_EQ(tfSimdSelect_i32x4(value, 0), 123);
+  EXPECT_EQ(tfSimdSelect_i32x4(value, 1), 12);
+  EXPECT_EQ(tfSimdSelect_i32x4(value, 2), 45);
+  EXPECT_EQ(tfSimdSelect_i32x4(value, 3), 165);
+  
+  EXPECT_EQ(tfSimdSelect0_i32x4(value), 123);
+  EXPECT_EQ(tfSimdSelect1_i32x4(value), 12);
+  EXPECT_EQ(tfSimdSelect2_i32x4(value), 45);
+  EXPECT_EQ(tfSimdSelect3_i32x4(value), 165);
+}
+
+UTEST(Tsimd_f32x4_t, tfSimdCmpGt_f32x4)
+{
+  struct {
+    Tsimd_f32x4_t a;
+    Tsimd_f32x4_t b;
+    Tsimd_i32x4_t test;
+  } tests[] = {
+      // Mixed values:
+        // Mixed values:
+      {tfSimdLoad_f32x4(0, -1, 1, 0), tfSimdLoad_f32x4(1, 0, -1, 0), tfSimdLoad_i32x4(0, 0, -1, 0)},
+      // All elements greater:
+      {tfSimdLoad_f32x4(2, 3, 4, 5), tfSimdLoad_f32x4(1, 2, 3, 4), tfSimdLoad_i32x4(-1, -1, -1, -1)},
+      // All elements equal:
+      {tfSimdLoad_f32x4(1, 1, 1, 1), tfSimdLoad_f32x4(1, 1, 1, 1), tfSimdLoad_i32x4(0, 0, 0, 0)},
+      // All elements less:
+      {tfSimdLoad_f32x4(1, 2, 3, 4), tfSimdLoad_f32x4(2, 3, 4, 5), tfSimdLoad_i32x4(0, 0, 0, 0)},
+      // Mixed positive and negative values:
+      {tfSimdLoad_f32x4(-1, 2, -3, 4), tfSimdLoad_f32x4(-2, 1, -4, 3), tfSimdLoad_i32x4(-1, -1, -1, -1)},
+      // Zero comparison:
+      {tfSimdLoad_f32x4(0, 0, 0, 0), tfSimdLoad_f32x4(0, 0, 0, 0), tfSimdLoad_i32x4(0, 0, 0, 0)},
+  };
+  for(size_t i = 0; i < TF_ARRAY_COUNT(tests); i++) {
+      Tsimd_i32x4_t result = tfSimd_f32x4_To_i32x4(tfSimdCmpGt_f32x4(tests[i].a, tests[i].b));
+      DLOGF(LogLevel::eDEBUG,LOG_FORMAT_SIMD_32x4i(result));
+      EXPECT_TRUE(tfSimdCmpAllEq_i32x4(result, tests[i].test));
+  }
+}
+
+UTEST(Tsimd_f32x4_t, tfSimdCmpEq_f32x4)
+{
+  struct {
+    Tsimd_f32x4_t a;
+    Tsimd_f32x4_t b;
+    Tsimd_i32x4_t test;
+  } tests[] = {
+      // Mixed values:
+      {tfSimdLoad_f32x4(0, -1, 1, 0), tfSimdLoad_f32x4(1, 0, -1, 0), tfSimdLoad_i32x4(0, 0, 0, -1)},
+      // All elements equal:
+      {tfSimdLoad_f32x4(1, 1, 1, 1), tfSimdLoad_f32x4(1, 1, 1, 1), tfSimdLoad_i32x4(-1, -1, -1, -1)},
+      // All elements different:
+      {tfSimdLoad_f32x4(1, 2, 3, 4), tfSimdLoad_f32x4(5, 6, 7, 8), tfSimdLoad_i32x4(0, 0, 0, 0)},
+      // Mixed positive and negative values:
+      {tfSimdLoad_f32x4(-1, 2, -3, 4), tfSimdLoad_f32x4(-1, 2, -3, 4), tfSimdLoad_i32x4(-1, -1, -1, -1)},
+      // Zero comparison:
+      {tfSimdLoad_f32x4(0, 0, 0, 0), tfSimdLoad_f32x4(0, 0, 0, 0), tfSimdLoad_i32x4(-1, -1, -1, -1)},
+  };
+  for(size_t i = 0; i < TF_ARRAY_COUNT(tests); i++) {
+      Tsimd_i32x4_t result = tfSimd_f32x4_To_i32x4(tfSimdCmpEq_f32x4(tests[i].a, tests[i].b));
+      DLOGF(LogLevel::eDEBUG, LOG_FORMAT_SIMD_32x4i(result));
+      EXPECT_TRUE(tfSimdCmpAllEq_i32x4(result, tests[i].test));
+  }
+}
+
+UTEST(Tsimd_f32x4_t, tfSimdCmpNeq_f32x4)
+{
+  struct {
+    Tsimd_f32x4_t a;
+    Tsimd_f32x4_t b;
+    Tsimd_i32x4_t test;
+  } tests[] = {
+      // Mixed values:
+      {tfSimdLoad_f32x4(0, -1, 1, 0), tfSimdLoad_f32x4(1, 0, -1, 0), tfSimdLoad_i32x4(-1, -1, -1, 0)},
+      // All elements equal:
+      {tfSimdLoad_f32x4(1, 1, 1, 1), tfSimdLoad_f32x4(1, 1, 1, 1), tfSimdLoad_i32x4(0, 0, 0, 0)},
+      // All elements different:
+      {tfSimdLoad_f32x4(1, 2, 3, 4), tfSimdLoad_f32x4(5, 6, 7, 8), tfSimdLoad_i32x4(-1, -1, -1, -1)},
+      // Mixed positive and negative values:
+      {tfSimdLoad_f32x4(-1, 2, -3, 4), tfSimdLoad_f32x4(-1, 2, -3, 4), tfSimdLoad_i32x4(0, 0, 0, 0)},
+      // Zero comparison:
+      {tfSimdLoad_f32x4(0, 0, 0, 0), tfSimdLoad_f32x4(0, 0, 0, 0), tfSimdLoad_i32x4(0, 0, 0, 0)},
+  };
+  for(size_t i = 0; i < TF_ARRAY_COUNT(tests); i++) {
+      Tsimd_i32x4_t result = tfSimd_f32x4_To_i32x4(tfSimdCmpNeq_f32x4(tests[i].a, tests[i].b));
+      DLOGF(LogLevel::eDEBUG, LOG_FORMAT_SIMD_32x4i(result));
+      EXPECT_TRUE(tfSimdCmpAllEq_i32x4(result, tests[i].test));
+  }
+}
+
+UTEST(Tsimd_f32x4_t, tfSimdCmpGtEq_f32x4)
+{
+  struct {
+    Tsimd_f32x4_t a;
+    Tsimd_f32x4_t b;
+    Tsimd_i32x4_t test;
+  } tests[] = {
+     // Mixed values:
+      {tfSimdLoad_f32x4(12, 13, 14, 15), tfSimdLoad_f32x4(0, 0, 0, 0), tfSimdLoad_i32x4(-1, -1, -1, -1)},
+      // All elements greater or equal:
+      {tfSimdLoad_f32x4(2, 3, 4, 5), tfSimdLoad_f32x4(1, 2, 3, 4), tfSimdLoad_i32x4(-1, -1, -1, -1)},
+      // All elements equal:
+      {tfSimdLoad_f32x4(1, 1, 1, 1), tfSimdLoad_f32x4(1, 1, 1, 1), tfSimdLoad_i32x4(-1, -1, -1, -1)},
+      // All elements less:
+      {tfSimdLoad_f32x4(1, 2, 3, 4), tfSimdLoad_f32x4(2, 3, 4, 5), tfSimdLoad_i32x4(0, 0, 0, 0)},
+      // Mixed positive and negative values:
+      {tfSimdLoad_f32x4(-1, 2, -3, 4), tfSimdLoad_f32x4(-2, 1, -4, 3), tfSimdLoad_i32x4(-1, -1, -1, -1)},
+      // Zero comparison:
+      {tfSimdLoad_f32x4(0, 0, 0, 0), tfSimdLoad_f32x4(0, 0, 0, 0), tfSimdLoad_i32x4(-1, -1, -1, -1)},
+  };
+  for(size_t i = 0; i < TF_ARRAY_COUNT(tests); i++) {
+      Tsimd_i32x4_t result = tfSimd_f32x4_To_i32x4(tfSimdCmpGtEq_f32x4(tests[i].a, tests[i].b));
+      DLOGF(LogLevel::eDEBUG, LOG_FORMAT_SIMD_32x4i(result));
+      EXPECT_TRUE(tfSimdCmpAllEq_i32x4(result, tests[i].test));
+  }
+}
+
+UTEST(Tsimd_f32x4_t, tfSimdCmpLt_f32x4)
+{
+  struct {
+    Tsimd_f32x4_t a;
+    Tsimd_f32x4_t b;
+    Tsimd_i32x4_t test;
+  } tests[] = {
+      // Mixed values:
+      {tfSimdLoad_f32x4(0, -1, 1, 0), tfSimdLoad_f32x4(1, 0, -1, 0), tfSimdLoad_i32x4(-1, -1, 0, 0)},
+      // All elements less:
+      {tfSimdLoad_f32x4(1, 2, 3, 4), tfSimdLoad_f32x4(2, 3, 4, 5), tfSimdLoad_i32x4(-1, -1, -1, -1)},
+      // All elements equal:
+      {tfSimdLoad_f32x4(1, 1, 1, 1), tfSimdLoad_f32x4(1, 1, 1, 1), tfSimdLoad_i32x4(0, 0, 0, 0)},
+      // All elements greater:
+      {tfSimdLoad_f32x4(2, 3, 4, 5), tfSimdLoad_f32x4(1, 2, 3, 4), tfSimdLoad_i32x4(0, 0, 0, 0)},
+      // Mixed positive and negative values:
+      {tfSimdLoad_f32x4(-2, 1, -4, 3), tfSimdLoad_f32x4(-1, 2, -3, 4), tfSimdLoad_i32x4(-1, -1, -1, -1)},
+      // Zero comparison:
+      {tfSimdLoad_f32x4(0, 0, 0, 0), tfSimdLoad_f32x4(0, 0, 0, 0), tfSimdLoad_i32x4(0, 0, 0, 0)},
+  };
+  for(size_t i = 0; i < TF_ARRAY_COUNT(tests); i++) {
+      Tsimd_i32x4_t result = tfSimd_f32x4_To_i32x4(tfSimdCmpLt_f32x4(tests[i].a, tests[i].b));
+      DLOGF(LogLevel::eDEBUG, LOG_FORMAT_SIMD_32x4i(result));
+      EXPECT_TRUE(tfSimdCmpAllEq_i32x4(result, tests[i].test));
+  }
+}
+
+UTEST(Tsimd_f32x4_t, tfSimdCmpLtEq_f32x4)
+{
+  struct {
+    Tsimd_f32x4_t a;
+    Tsimd_f32x4_t b;
+    Tsimd_i32x4_t test;
+  } tests[] = {
+      // Mixed values:
+      {tfSimdLoad_f32x4(0, -1, 1, 0), tfSimdLoad_f32x4(1, 0, -1, 0), tfSimdLoad_i32x4(-1, -1, 0, -1)},
+      // All elements less or equal:
+      {tfSimdLoad_f32x4(1, 2, 3, 4), tfSimdLoad_f32x4(2, 3, 4, 5), tfSimdLoad_i32x4(-1, -1, -1, -1)},
+      // All elements equal:
+      {tfSimdLoad_f32x4(1, 1, 1, 1), tfSimdLoad_f32x4(1, 1, 1, 1), tfSimdLoad_i32x4(-1, -1, -1, -1)},
+      // All elements greater:
+      {tfSimdLoad_f32x4(2, 3, 4, 5), tfSimdLoad_f32x4(1, 2, 3, 4), tfSimdLoad_i32x4(0, 0, 0, 0)},
+      // Mixed positive and negative values:
+      {tfSimdLoad_f32x4(-2, 1, -4, 3), tfSimdLoad_f32x4(-1, 2, -3, 4), tfSimdLoad_i32x4(-1, -1, -1, -1)},
+      // Zero comparison:
+      {tfSimdLoad_f32x4(0, 0, 0, 0), tfSimdLoad_f32x4(0, 0, 0, 0), tfSimdLoad_i32x4(-1, -1, -1, -1)},
+  };
+  for(size_t i = 0; i < TF_ARRAY_COUNT(tests); i++) {
+      Tsimd_i32x4_t result = tfSimd_f32x4_To_i32x4(tfSimdCmpLtEq_f32x4(tests[i].a, tests[i].b));
+      DLOGF(LogLevel::eDEBUG, LOG_FORMAT_SIMD_32x4i(result));
+      EXPECT_TRUE(tfSimdCmpAllEq_i32x4(result, tests[i].test));
+  }
+}
+
+
+UTEST(Tsimd_i32x4_t, tfSimdCmpGt_i32x4)
+{
+  struct {
+    Tsimd_i32x4_t a;
+    Tsimd_i32x4_t b;
+    Tsimd_i32x4_t test;
+  } tests[] = {
+      // Mixed values:
+      {tfSimdLoad_i32x4(0, -1, 1, 0), tfSimdLoad_i32x4(1, 0, -1, 0), tfSimdLoad_i32x4(0, 0, -1, 0)},
+      // All elements greater:
+      {tfSimdLoad_i32x4(2, 3, 4, 5), tfSimdLoad_i32x4(1, 2, 3, 4), tfSimdLoad_i32x4(-1, -1, -1, -1)},
+      // All elements equal:
+      {tfSimdLoad_i32x4(1, 1, 1, 1), tfSimdLoad_i32x4(1, 1, 1, 1), tfSimdLoad_i32x4(0, 0, 0, 0)},
+      // All elements less:
+      {tfSimdLoad_i32x4(1, 2, 3, 4), tfSimdLoad_i32x4(2, 3, 4, 5), tfSimdLoad_i32x4(0, 0, 0, 0)},
+      // Mixed positive and negative values:
+      {tfSimdLoad_i32x4(-1, 2, -3, 4), tfSimdLoad_i32x4(-2, 1, -4, 3), tfSimdLoad_i32x4(-1, -1, -1, -1)},
+      // Zero comparison:
+      {tfSimdLoad_i32x4(0, 0, 0, 0), tfSimdLoad_i32x4(0, 0, 0, 0), tfSimdLoad_i32x4(0, 0, 0, 0)},
+  };
+  for(size_t i = 0; i < TF_ARRAY_COUNT(tests); i++) {
+      Tsimd_i32x4_t result = tfSimdCmpGt_i32x4(tests[i].a, tests[i].b);
+      DLOGF(LogLevel::eDEBUG, LOG_FORMAT_SIMD_32x4i(result));
+      EXPECT_TRUE(tfSimdCmpAllEq_i32x4(result, tests[i].test));
+  }
+}
+
+UTEST(Tsimd_i32x4_t, tfSimdCmpGtEq_i32x4)
+{
+  struct {
+    Tsimd_i32x4_t a;
+    Tsimd_i32x4_t b;
+    Tsimd_i32x4_t test;
+  } tests[] = {
+      // Mixed values:
+      {tfSimdLoad_i32x4(0, -1, 1, 0), tfSimdLoad_i32x4(1, 0, -1, 0), tfSimdLoad_i32x4(0, 0, -1, -1)},
+      // All elements greater or equal:
+      {tfSimdLoad_i32x4(2, 3, 4, 5), tfSimdLoad_i32x4(1, 2, 3, 4), tfSimdLoad_i32x4(-1, -1, -1, -1)},
+      // All elements equal:
+      {tfSimdLoad_i32x4(1, 1, 1, 1), tfSimdLoad_i32x4(1, 1, 1, 1), tfSimdLoad_i32x4(-1, -1, -1, -1)},
+      // All elements less:
+      {tfSimdLoad_i32x4(1, 2, 3, 4), tfSimdLoad_i32x4(2, 3, 4, 5), tfSimdLoad_i32x4(0, 0, 0, 0)},
+      // Mixed positive and negative values:
+      {tfSimdLoad_i32x4(-1, 2, -3, 4), tfSimdLoad_i32x4(-2, 1, -4, 3), tfSimdLoad_i32x4(-1, -1, -1, -1)},
+      // Zero comparison:
+      {tfSimdLoad_i32x4(0, 0, 0, 0), tfSimdLoad_i32x4(0, 0, 0, 0), tfSimdLoad_i32x4(-1, -1, -1, -1)},
+  };
+  for(size_t i = 0; i < TF_ARRAY_COUNT(tests); i++) {
+      Tsimd_i32x4_t result = tfSimdCmpGtEq_i32x4(tests[i].a, tests[i].b);
+      DLOGF(LogLevel::eDEBUG, LOG_FORMAT_SIMD_32x4i(result));
+      EXPECT_TRUE(tfSimdCmpAllEq_i32x4(result, tests[i].test));
+  }
+}
+
+UTEST(Tsimd_i32x4_t, tfSimdCmpLtEq_i32x4)
+{
+  struct {
+    Tsimd_i32x4_t a;
+    Tsimd_i32x4_t b;
+    Tsimd_i32x4_t test;
+  } tests[] = {
+      // Mixed values:
+      {tfSimdLoad_i32x4(0, -1, 1, 0), tfSimdLoad_i32x4(1, 0, -1, 0), tfSimdLoad_i32x4(-1, -1, 0, -1)},
+      // All elements less or equal:
+      {tfSimdLoad_i32x4(1, 2, 3, 4), tfSimdLoad_i32x4(2, 3, 4, 5), tfSimdLoad_i32x4(-1, -1, -1, -1)},
+      // All elements equal:
+      {tfSimdLoad_i32x4(1, 1, 1, 1), tfSimdLoad_i32x4(1, 1, 1, 1), tfSimdLoad_i32x4(-1, -1, -1, -1)},
+      // All elements greater:
+      {tfSimdLoad_i32x4(2, 3, 4, 5), tfSimdLoad_i32x4(1, 2, 3, 4), tfSimdLoad_i32x4(0, 0, 0, 0)},
+      // Mixed positive and negative values:
+      {tfSimdLoad_i32x4(-1, 2, -3, 4), tfSimdLoad_i32x4(-2, 1, -4, 3), tfSimdLoad_i32x4(0, 0, 0, 0)},
+      // Zero comparison:
+      {tfSimdLoad_i32x4(0, 0, 0, 0), tfSimdLoad_i32x4(0, 0, 0, 0), tfSimdLoad_i32x4(-1, -1, -1, -1)},
+  };
+  for(size_t i = 0; i < TF_ARRAY_COUNT(tests); i++) {
+      Tsimd_i32x4_t result = tfSimdCmpLtEq_i32x4(tests[i].a, tests[i].b);
+      DLOGF(LogLevel::eDEBUG, LOG_FORMAT_SIMD_32x4i(result));
+      EXPECT_TRUE(tfSimdCmpAllEq_i32x4(result, tests[i].test));
+  }
+}
+
+UTEST(Tsimd_i32x4_t, tfSimdCmpLt_i32x4)
+{
+  struct {
+    Tsimd_i32x4_t a;
+    Tsimd_i32x4_t b;
+    Tsimd_i32x4_t test;
+  } tests[] = {
+      // Mixed values:
+      {tfSimdLoad_i32x4(0, -1, 1, 0), tfSimdLoad_i32x4(1, 0, -1, 0), tfSimdLoad_i32x4(-1, -1, 0, 0)},
+      // All elements less:
+      {tfSimdLoad_i32x4(1, 2, 3, 4), tfSimdLoad_i32x4(2, 3, 4, 5), tfSimdLoad_i32x4(-1, -1, -1, -1)},
+      // All elements equal:
+      {tfSimdLoad_i32x4(1, 1, 1, 1), tfSimdLoad_i32x4(1, 1, 1, 1), tfSimdLoad_i32x4(0, 0, 0, 0)},
+      // All elements greater:
+      {tfSimdLoad_i32x4(2, 3, 4, 5), tfSimdLoad_i32x4(1, 2, 3, 4), tfSimdLoad_i32x4(0, 0, 0, 0)},
+      // Mixed positive and negative values:
+      {tfSimdLoad_i32x4(-1, 2, -3, 4), tfSimdLoad_i32x4(-2, 1, -4, 3), tfSimdLoad_i32x4(0, 0, 0, 0)},
+      // Zero comparison:
+      {tfSimdLoad_i32x4(0, 0, 0, 0), tfSimdLoad_i32x4(0, 0, 0, 0), tfSimdLoad_i32x4(0, 0, 0, 0)},
+  };
+  for(size_t i = 0; i < TF_ARRAY_COUNT(tests); i++) {
+      Tsimd_i32x4_t result = tfSimdCmpLt_i32x4(tests[i].a, tests[i].b);
+      DLOGF(LogLevel::eDEBUG, LOG_FORMAT_SIMD_32x4i(result));
+      EXPECT_TRUE(tfSimdCmpAllEq_i32x4(result, tests[i].test));
+  }
+}
+
+
+UTEST(Tsimd_i32x4_t, tfSimdCmpEq_i32x4)
+{
+  struct {
+    Tsimd_i32x4_t a;
+    Tsimd_i32x4_t b;
+    Tsimd_i32x4_t test;
+  } tests[] = {
+      // Mixed values:
+      {tfSimdLoad_i32x4(0, -1, 1, 0), tfSimdLoad_i32x4(1, 0, -1, 0), tfSimdLoad_i32x4(0, 0, 0, -1)},
+      // All elements equal:
+      {tfSimdLoad_i32x4(1, 1, 1, 1), tfSimdLoad_i32x4(1, 1, 1, 1), tfSimdLoad_i32x4(-1, -1, -1, -1)},
+      // All elements not equal:
+      {tfSimdLoad_i32x4(2, 3, 4, 5), tfSimdLoad_i32x4(1, 2, 3, 4), tfSimdLoad_i32x4(0, 0, 0, 0)},
+      // Mixed positive and negative values:
+      {tfSimdLoad_i32x4(-1, 2, -3, 4), tfSimdLoad_i32x4(-1, 2, -3, 4), tfSimdLoad_i32x4(-1, -1, -1, -1)},
+      // Zero comparison:
+      {tfSimdLoad_i32x4(0, 0, 0, 0), tfSimdLoad_i32x4(0, 0, 0, 0), tfSimdLoad_i32x4(-1, -1, -1, -1)},
+  };
+  for(size_t i = 0; i < TF_ARRAY_COUNT(tests); i++) {
+      Tsimd_i32x4_t result = tfSimdCmpEq_i32x4(tests[i].a, tests[i].b);
+      DLOGF(LogLevel::eDEBUG, LOG_FORMAT_SIMD_32x4i(result));
+      EXPECT_TRUE(tfSimdCmpAllEq_i32x4(result, tests[i].test));
+  }
+}
+
+
+UTEST(Tsimd_f32x4_t, tfSimdCmpAllGt_f32x4)
+{
+    EXPECT_TRUE(tfSimdCmpAllGt_f32x4(tfSimdLoad_f32x4(16, 16, 17.1f, 13.0f), tfSimdLoad_f32x4(2, 3, 4, 5)));
+    EXPECT_FALSE(tfSimdCmpAllGt_f32x4(tfSimdLoad_f32x4(2.0f, 0, 0, 0), tfSimdLoad_f32x4(2.0f, 3, 4, 5)));
+    EXPECT_FALSE(tfSimdCmpAllGt_f32x4(tfSimdLoad_f32x4(5, 6, 7, 8), tfSimdLoad_f32x4(4, 7, 6, 9)));
+    EXPECT_FALSE(tfSimdCmpAllGt_f32x4(tfSimdLoad_f32x4(1, 1, 1, 1), tfSimdLoad_f32x4(1, 1, 1, 1)));
+    EXPECT_FALSE(tfSimdCmpAllGt_f32x4(tfSimdLoad_f32x4(1, 2, 3, 4), tfSimdLoad_f32x4(2, 3, 4, 5)));
+    EXPECT_TRUE(tfSimdCmpAllGt_f32x4(tfSimdLoad_f32x4(1, -2, 3, -4), tfSimdLoad_f32x4(0, -3, 2, -5)));
+    EXPECT_FALSE(tfSimdCmpAllGt_f32x4(tfSimdLoad_f32x4(0, 0, 0, 0), tfSimdLoad_f32x4(0, 0, 0, 0)));
+}
+
+UTEST(Tsimd_f32x4_t, tfSimdCmpAllEq_f32x4)
+{
+    EXPECT_TRUE(tfSimdCmpAllEq_f32x4(tfSimdLoad_f32x4(1, 1, 1, 1), tfSimdLoad_f32x4(1, 1, 1, 1)));
+    EXPECT_FALSE(tfSimdCmpAllEq_f32x4(tfSimdLoad_f32x4(1, 2, 3, 4), tfSimdLoad_f32x4(1, 2, 3, 5)));
+    EXPECT_TRUE(tfSimdCmpAllEq_f32x4(tfSimdLoad_f32x4(0, 0, 0, 0), tfSimdLoad_f32x4(0, 0, 0, 0)));
+}
+
+UTEST(Tsimd_f32x4_t, tfSimdCmpAllNeq_f32x4)
+{
+    EXPECT_FALSE(tfSimdCmpAllNeq_f32x4(tfSimdLoad_f32x4(1, 1, 1, 1), tfSimdLoad_f32x4(1, 1, 1, 1)));
+    EXPECT_FALSE(tfSimdCmpAllLt_f32x4(tfSimdLoad_f32x4(0,0,0,0), tfSimdLoad_f32x4(0, 12, 33, 44)));
+    EXPECT_TRUE(tfSimdCmpAllNeq_f32x4(tfSimdLoad_f32x4(32, 45, 13, 4), tfSimdLoad_f32x4(1, 2, 3, 5)));
+    EXPECT_FALSE(tfSimdCmpAllNeq_f32x4(tfSimdLoad_f32x4(0, 0, 0, 0), tfSimdLoad_f32x4(0, 0, 0, 0)));
+}
+
+UTEST(Tsimd_f32x4_t, tfSimdCmpAllGtEq_f32x4)
+{
+    EXPECT_TRUE(tfSimdCmpAllGtEq_f32x4(tfSimdLoad_f32x4(2, 3, 4, 5), tfSimdLoad_f32x4(1, 2, 3, 4)));
+    EXPECT_FALSE(tfSimdCmpAllGtEq_f32x4(tfSimdLoad_f32x4(1, 2, 3, 4), tfSimdLoad_f32x4(1, 2, 3, 5)));
+    EXPECT_TRUE(tfSimdCmpAllGtEq_f32x4(tfSimdLoad_f32x4(1, 1, 1, 1), tfSimdLoad_f32x4(1, 1, 1, 1)));
+}
+
+UTEST(Tsimd_f32x4_t, tfSimdCmpAllLt_f32x4)
+{
+    EXPECT_TRUE(tfSimdCmpAllLt_f32x4(tfSimdLoad_f32x4(1, 2, 3, 4), tfSimdLoad_f32x4(2, 3, 4, 5)));
+    EXPECT_FALSE(tfSimdCmpAllLt_f32x4(tfSimdLoad_f32x4(2, 3, 4, 5), tfSimdLoad_f32x4(1, 2, 3, 4)));
+    EXPECT_FALSE(tfSimdCmpAllLt_f32x4(tfSimdLoad_f32x4(1, 1, 1, 1), tfSimdLoad_f32x4(1, 1, 1, 1)));
+}
+
+UTEST(Tsimd_f32x4_t, tfSimdCmpAllLtEq_f32x4)
+{
+    EXPECT_TRUE(tfSimdCmpAllLtEq_f32x4(tfSimdLoad_f32x4(1, 2, 3, 4), tfSimdLoad_f32x4(2, 3, 4, 5)));
+    EXPECT_FALSE(tfSimdCmpAllLtEq_f32x4(tfSimdLoad_f32x4(2, 3, 4, 5), tfSimdLoad_f32x4(1, 2, 3, 4)));
+    EXPECT_TRUE(tfSimdCmpAllLtEq_f32x4(tfSimdLoad_f32x4(1, 1, 1, 1), tfSimdLoad_f32x4(1, 1, 1, 1)));
+}
+
+UTEST(Tsimd_i32x4_t, tfSimdCmpAllGt_i32x4)
+{
+    EXPECT_TRUE(tfSimdCmpAllGt_i32x4(tfSimdLoad_i32x4(16, 16, 17, 13), tfSimdLoad_i32x4(2, 3, 4, 5)));
+    EXPECT_FALSE(tfSimdCmpAllGt_i32x4(tfSimdLoad_i32x4(2, 0, 0, 0), tfSimdLoad_i32x4(2, 3, 4, 5)));
+    EXPECT_FALSE(tfSimdCmpAllGt_i32x4(tfSimdLoad_i32x4(5, 6, 7, 8), tfSimdLoad_i32x4(4, 7, 6, 9)));
+    EXPECT_FALSE(tfSimdCmpAllGt_i32x4(tfSimdLoad_i32x4(1, 1, 1, 1), tfSimdLoad_i32x4(1, 1, 1, 1)));
+    EXPECT_FALSE(tfSimdCmpAllGt_i32x4(tfSimdLoad_i32x4(1, 2, 3, 4), tfSimdLoad_i32x4(2, 3, 4, 5)));
+    EXPECT_TRUE(tfSimdCmpAllGt_i32x4(tfSimdLoad_i32x4(1, -2, 3, -4), tfSimdLoad_i32x4(0, -3, 2, -5)));
+    EXPECT_FALSE(tfSimdCmpAllGt_i32x4(tfSimdLoad_i32x4(0, 0, 0, 0), tfSimdLoad_i32x4(0, 0, 0, 0)));
+}
+
+UTEST(Tsimd_i32x4_t, tfSimdCmpAllEq_i32x4)
+{
+    EXPECT_TRUE(tfSimdCmpAllEq_i32x4(tfSimdLoad_i32x4(1, 1, 1, 1), tfSimdLoad_i32x4(1, 1, 1, 1)));
+    EXPECT_FALSE(tfSimdCmpAllEq_i32x4(tfSimdLoad_i32x4(1, 2, 3, 4), tfSimdLoad_i32x4(1, 2, 3, 5)));
+    EXPECT_TRUE(tfSimdCmpAllEq_i32x4(tfSimdLoad_i32x4(0, 0, 0, 0), tfSimdLoad_i32x4(0, 0, 0, 0)));
+}
+
+UTEST(Tsimd_i32x4_t, tfSimdCmpAllNeq_i32x4)
+{
+    EXPECT_FALSE(tfSimdCmpAllNeq_i32x4(tfSimdLoad_i32x4(1, 1, 1, 1), tfSimdLoad_i32x4(1, 1, 1, 1)));
+    EXPECT_FALSE(tfSimdCmpAllNeq_i32x4(tfSimdLoad_i32x4(0, 0, 0, 0), tfSimdLoad_i32x4(0, 12, 33, 44)));
+    EXPECT_TRUE(tfSimdCmpAllNeq_i32x4(tfSimdLoad_i32x4(32, 45, 13, 4), tfSimdLoad_i32x4(1, 2, 3, 5)));
+    EXPECT_FALSE(tfSimdCmpAllNeq_i32x4(tfSimdLoad_i32x4(0, 0, 0, 0), tfSimdLoad_i32x4(0, 0, 0, 0)));
+}
+
+UTEST(Tsimd_i32x4_t, tfSimdCmpAllGtEq_i32x4)
+{
+    EXPECT_TRUE(tfSimdCmpAllGtEq_i32x4(tfSimdLoad_i32x4(2, 3, 4, 5), tfSimdLoad_i32x4(1, 2, 3, 4)));
+    EXPECT_FALSE(tfSimdCmpAllGtEq_i32x4(tfSimdLoad_i32x4(1, 2, 3, 4), tfSimdLoad_i32x4(1, 2, 3, 5)));
+    EXPECT_TRUE(tfSimdCmpAllGtEq_i32x4(tfSimdLoad_i32x4(1, 1, 1, 1), tfSimdLoad_i32x4(1, 1, 1, 1)));
+}
+
+UTEST(Tsimd_i32x4_t, tfSimdCmpAllLt_i32x4)
+{
+    EXPECT_TRUE(tfSimdCmpAllLt_i32x4(tfSimdLoad_i32x4(1, 2, 3, 4), tfSimdLoad_i32x4(2, 3, 4, 5)));
+    EXPECT_FALSE(tfSimdCmpAllLt_i32x4(tfSimdLoad_i32x4(2, 3, 4, 5), tfSimdLoad_i32x4(1, 2, 3, 4)));
+    EXPECT_FALSE(tfSimdCmpAllLt_i32x4(tfSimdLoad_i32x4(1, 1, 1, 1), tfSimdLoad_i32x4(1, 1, 1, 1)));
+}
+
+UTEST(Tsimd_i32x4_t, tfSimdCmpAllLtEq_i32x4)
+{
+    EXPECT_TRUE(tfSimdCmpAllLtEq_i32x4(tfSimdLoad_i32x4(1, 2, 3, 4), tfSimdLoad_i32x4(2, 3, 4, 5)));
+    EXPECT_FALSE(tfSimdCmpAllLtEq_i32x4(tfSimdLoad_i32x4(2, 3, 4, 5), tfSimdLoad_i32x4(1, 2, 3, 4)));
+    EXPECT_TRUE(tfSimdCmpAllLtEq_i32x4(tfSimdLoad_i32x4(1, 1, 1, 1), tfSimdLoad_i32x4(1, 1, 1, 1)));
+}
+
+
+UTEST(Tsimd_i32x4_t, tfSimdNot_i32x4)
+{
+    struct {
+        Tsimd_i32x4_t input;
+        Tsimd_i32x4_t expected;
+    } tests[] = {
+        {tfSimdLoad_i32x4(0, 0, 0, 0), tfSimdLoad_i32x4(~0, ~0, ~0, ~0)},
+        {tfSimdLoad_i32x4(1, 2, 3, 4), tfSimdLoad_i32x4(~1, ~2, ~3, ~4)},
+        {tfSimdLoad_i32x4(-1, -2, -3, -4), tfSimdLoad_i32x4(~-1, ~-2, ~-3, ~-4)},
+        {tfSimdLoad_i32x4(123, 456, 789, 101112), tfSimdLoad_i32x4(~123, ~456, ~789, ~101112)},
+        {tfSimdLoad_i32x4(0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFD, 0xFFFFFFFC), tfSimdLoad_i32x4(~0xFFFFFFFF, ~0xFFFFFFFE, ~0xFFFFFFFD, ~0xFFFFFFFC)},
+    };
+
+    for (size_t i = 0; i < TF_ARRAY_COUNT(tests); i++) {
+        Tsimd_i32x4_t result = tfSimdNot_i32x4(tests[i].input);
+        EXPECT_TRUE(tfSimdCmpAllEq_i32x4(result, tests[i].expected));
+    }
+}
+
+
+UTEST(Tsimd_i32x4_t, tfSimdAnd_i32x4)
+{
+  struct {
+    Tsimd_i32x4_t arg1;
+    Tsimd_i32x4_t arg2;
+    Tsimd_i32x4_t expected;
+  } tests[] = {
+    {tfSimdLoad_i32x4(0, 0, 0, 0), tfSimdLoad_i32x4(0, 0, 0, 0), tfSimdLoad_i32x4(0, 0, 0, 0)},
+    {tfSimdLoad_i32x4(1, 2, 3, 4), tfSimdLoad_i32x4(4, 3, 2, 1), tfSimdLoad_i32x4(0, 2, 2, 0)},
+    {tfSimdLoad_i32x4(-1, -2, -3, -4), tfSimdLoad_i32x4(1, 2, 3, 4), tfSimdLoad_i32x4(1, 2, 1, 4)},
+    {tfSimdLoad_i32x4(0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFD, 0xFFFFFFFC), tfSimdLoad_i32x4(0xFFFFFFFB, 0xFFFFFFFA, 0xFFFFFFF9, 0xFFFFFFF8), tfSimdLoad_i32x4(0xFFFFFFFB, 0xFFFFFFFA, 0xFFFFFFF9, 0xFFFFFFF8)},
+  };
+
+  for (size_t i = 0; i < TF_ARRAY_COUNT(tests); i++) {
+    Tsimd_i32x4_t result = tfSimdAnd_i32x4(tests[i].arg1, tests[i].arg2);
+    DLOGF(LogLevel::eDEBUG, LOG_FORMAT_SIMD_32x4i(result));
+    EXPECT_TRUE(tfSimdCmpAllEq_i32x4(result, tests[i].expected));
+  }
+}
+
+
+UTEST(Tsimd_i32x4_t, tfSimdAndNot_i32x4)
+{
+  struct {
+    Tsimd_i32x4_t arg1;
+    Tsimd_i32x4_t arg2;
+    Tsimd_i32x4_t expected;
+  } tests[] = {
+    {tfSimdLoad_i32x4(0, 0, 0, 0), tfSimdLoad_i32x4(0, 0, 0, 0), tfSimdLoad_i32x4(0, 0, 0, 0)},
+    {tfSimdLoad_i32x4(1, 2, 3, 4), tfSimdLoad_i32x4(4, 3, 2, 1), tfSimdLoad_i32x4(4, 1, 0, 1)},
+    {tfSimdLoad_i32x4(-1, -2, -3, -4), tfSimdLoad_i32x4(1, 2, 3, 4), tfSimdLoad_i32x4(0, 0, 2, 0)},
+    {tfSimdLoad_i32x4(123, 456, 789, 101112), tfSimdLoad_i32x4(654, 321, 987, 654321), tfSimdLoad_i32x4(644, 1, 202, 553217)},
+  };
+
+  for (size_t i = 0; i < TF_ARRAY_COUNT(tests); i++) {
+    Tsimd_i32x4_t result = tfSimdAndNot_i32x4(tests[i].arg1, tests[i].arg2);
+    DLOGF(LogLevel::eDEBUG, LOG_FORMAT_SIMD_32x4i(result));
+    EXPECT_TRUE(tfSimdCmpAllEq_i32x4(result, tests[i].expected));
+  }
+}
+
+UTEST(Tsimd_i32x4_t, tfSimdOr_i32x4)
+{
+  struct {
+    Tsimd_i32x4_t arg1;
+    Tsimd_i32x4_t arg2;
+    Tsimd_i32x4_t expected;
+  } tests[] = {
+    {tfSimdLoad_i32x4(0, 0, 0, 0), tfSimdLoad_i32x4(0, 0, 0, 0), tfSimdLoad_i32x4(0, 0, 0, 0)},
+    {tfSimdLoad_i32x4(1, 2, 3, 4), tfSimdLoad_i32x4(4, 3, 2, 1), tfSimdLoad_i32x4(5, 3, 3, 5)},
+    {tfSimdLoad_i32x4(-1, -2, -3, -4), tfSimdLoad_i32x4(1, 2, 3, 4), tfSimdLoad_i32x4(-1, -2, -1, -4)},
+  };
+
+  for (size_t i = 0; i < TF_ARRAY_COUNT(tests); i++) {
+    Tsimd_i32x4_t result = tfSimdOr_i32x4(tests[i].arg1, tests[i].arg2);
+    DLOGF(LogLevel::eDEBUG, LOG_FORMAT_SIMD_32x4i(result));
+    EXPECT_TRUE(tfSimdCmpAllEq_i32x4(result, tests[i].expected));
+  }
+}
+
+
+UTEST(Tsimd_i32x4_t, tfSimdXor_i32x4)
+{
+  struct {
+    Tsimd_i32x4_t arg1;
+    Tsimd_i32x4_t arg2;
+    Tsimd_i32x4_t expected;
+  } tests[] = {
+    {tfSimdLoad_i32x4(0, 0, 0, 0), tfSimdLoad_i32x4(0, 0, 0, 0), tfSimdLoad_i32x4(0, 0, 0, 0)},
+    {tfSimdLoad_i32x4(1, 2, 3, 4), tfSimdLoad_i32x4(4, 3, 2, 1), tfSimdLoad_i32x4(5, 1, 1, 5)},
+    {tfSimdLoad_i32x4(-1, -2, -3, -4), tfSimdLoad_i32x4(1, 2, 3, 4), tfSimdLoad_i32x4(-2, -4, -2, -8)},
+    {tfSimdLoad_i32x4(123, 456, 789, 101112), tfSimdLoad_i32x4(654, 321, 987, 654321), tfSimdLoad_i32x4(757,137,206,553225)},
+  };
+
+  for (size_t i = 0; i < TF_ARRAY_COUNT(tests); i++) {
+    Tsimd_i32x4_t result = tfSimdXor_i32x4(tests[i].arg1, tests[i].arg2);
+    DLOGF(LogLevel::eDEBUG, LOG_FORMAT_SIMD_32x4i(result));
+    EXPECT_TRUE(tfSimdCmpAllEq_i32x4(result, tests[i].expected));
+  }
+}
+
+UTEST(Tsimd_f32x4_t, tfSimdAdd_f32x4)
+{
+  struct {
+    Tsimd_f32x4_t a;
+    Tsimd_f32x4_t b;
+    Tsimd_f32x4_t expected;
+  } tests[] = {
+    {tfSimdLoad_f32x4(0.0f, 0.0f, 0.0f, 0.0f), tfSimdLoad_f32x4(0.0f, 0.0f, 0.0f, 0.0f), tfSimdLoad_f32x4(0.0f, 0.0f, 0.0f, 0.0f)},
+    {tfSimdLoad_f32x4(1.0f, 2.0f, 3.0f, 4.0f), tfSimdLoad_f32x4(4.0f, 3.0f, 2.0f, 1.0f), tfSimdLoad_f32x4(5.0f, 5.0f, 5.0f, 5.0f)},
+    {tfSimdLoad_f32x4(-1.0f, -2.0f, -3.0f, -4.0f), tfSimdLoad_f32x4(1.0f, 2.0f, 3.0f, 4.0f), tfSimdLoad_f32x4(0.0f, 0.0f, 0.0f, 0.0f)},
+    {tfSimdLoad_f32x4(123.45f, 456.78f, 789.01f, 101112.13f), tfSimdLoad_f32x4(654.32f, 321.09f, 987.65f, 654321.98f), tfSimdLoad_f32x4(777.77f, 777.87f, 1776.66f, 755434.11f)},
+    {tfSimdLoad_f32x4(0.1f, 0.2f, 0.3f, 0.4f), tfSimdLoad_f32x4(0.9f, 0.8f, 0.7f, 0.6f), tfSimdLoad_f32x4(1.0f, 1.0f, 1.0f, 1.0f)},
+  };
+
+  for (size_t i = 0; i < TF_ARRAY_COUNT(tests); i++) {
+    Tsimd_f32x4_t result = tfSimdAdd_f32x4(tests[i].a, tests[i].b);
+    EXPECT_TRUE(tfSimdCmpAllEq_f32x4(result, tests[i].expected));
+  }
+}
+
+UTEST(Tsimd_f32x4_t, tfSimdMul_f32x4)
+{
+  struct {
+    Tsimd_f32x4_t a;
+    Tsimd_f32x4_t b;
+    Tsimd_f32x4_t expected;
+  } tests[] = {
+    {tfSimdLoad_f32x4(1.0f, 2.0f, 3.0f, 4.0f), tfSimdLoad_f32x4(1.0f, 2.0f, 3.0f, 4.0f), tfSimdLoad_f32x4(1.0f, 4.0f, 9.0f, 16.0f)},
+    {tfSimdLoad_f32x4(0.0f, 0.0f, 0.0f, 0.0f), tfSimdLoad_f32x4(1.0f, 2.0f, 3.0f, 4.0f), tfSimdLoad_f32x4(0.0f, 0.0f, 0.0f, 0.0f)},
+    {tfSimdLoad_f32x4(-1.0f, -2.0f, -3.0f, -4.0f), tfSimdLoad_f32x4(1.0f, 2.0f, 3.0f, 4.0f), tfSimdLoad_f32x4(-1.0f, -4.0f, -9.0f, -16.0f)},
+    {tfSimdLoad_f32x4(1.5f, 2.5f, 3.5f, 4.5f), tfSimdLoad_f32x4(2.0f, 2.0f, 2.0f, 2.0f), tfSimdLoad_f32x4(3.0f, 5.0f, 7.0f, 9.0f)},
+    {tfSimdLoad_f32x4(1.0f, 0.0f, -1.0f, 0.0f), tfSimdLoad_f32x4(0.0f, 1.0f, 0.0f, -1.0f), tfSimdLoad_f32x4(0.0f, 0.0f, 0.0f, 0.0f)},
+  };
+
+  for (size_t i = 0; i < TF_ARRAY_COUNT(tests); i++) {
+    Tsimd_f32x4_t result = tfSimdMul_f32x4(tests[i].a, tests[i].b);
+    EXPECT_TRUE(tfSimdCmpAllEq_f32x4(result, tests[i].expected));
+  }
+}
+
+UTEST(Tsimd_f32x4_t, tfSimdDiv_f32x4)
+{
+  struct {
+    Tsimd_f32x4_t a;
+    Tsimd_f32x4_t b;
+    Tsimd_f32x4_t expected;
+  } tests[] = {
+    {tfSimdLoad_f32x4(4.0f, 9.0f, 16.0f, 25.0f), tfSimdLoad_f32x4(2.0f, 3.0f, 4.0f, 5.0f), tfSimdLoad_f32x4(2.0f, 3.0f, 4.0f, 5.0f)},
+    {tfSimdLoad_f32x4(1.0f, 1.0f, 1.0f, 1.0f), tfSimdLoad_f32x4(1.0f, 1.0f, 1.0f, 1.0f), tfSimdLoad_f32x4(1.0f, 1.0f, 1.0f, 1.0f)},
+    {tfSimdLoad_f32x4(-4.0f, -9.0f, -16.0f, -25.0f), tfSimdLoad_f32x4(2.0f, 3.0f, 4.0f, 5.0f), tfSimdLoad_f32x4(-2.0f, -3.0f, -4.0f, -5.0f)},
+    {tfSimdLoad_f32x4(0.0f, 0.0f, 0.0f, 0.0f), tfSimdLoad_f32x4(1.0f, 2.0f, 3.0f, 4.0f), tfSimdLoad_f32x4(0.0f, 0.0f, 0.0f, 0.0f)},
+    {tfSimdLoad_f32x4(1.0f, 2.0f, 3.0f, 4.0f), tfSimdLoad_f32x4(1.0f, 2.0f, 3.0f, 4.0f), tfSimdLoad_f32x4(1.0f, 1.0f, 1.0f, 1.0f)},
+  };
+
+  for (size_t i = 0; i < TF_ARRAY_COUNT(tests); i++) {
+    Tsimd_f32x4_t result = tfSimdDiv_f32x4(tests[i].a, tests[i].b);
+    EXPECT_TRUE(tfSimdCmpAllEq_f32x4(result, tests[i].expected));
+  }
+}
+
+UTEST(Tsimd_f32x4_t, tfSimdAbs_f32x4)
+{
+  struct {
+    Tsimd_f32x4_t input;
+    Tsimd_f32x4_t expected;
+  } tests[] = {
+    {tfSimdLoad_f32x4(-1.0f, -2.0f, -3.0f, -4.0f), tfSimdLoad_f32x4(1.0f, 2.0f, 3.0f, 4.0f)},
+    {tfSimdLoad_f32x4(1.0f, -2.0f, 3.0f, -4.0f), tfSimdLoad_f32x4(1.0f, 2.0f, 3.0f, 4.0f)},
+    {tfSimdLoad_f32x4(0.0f, -0.0f, 0.0f, -0.0f), tfSimdLoad_f32x4(0.0f, 0.0f, 0.0f, 0.0f)},
+    {tfSimdLoad_f32x4(5.0f, -6.0f, 7.0f, -8.0f), tfSimdLoad_f32x4(5.0f, 6.0f, 7.0f, 8.0f)},
+    {tfSimdLoad_f32x4(-9.0f, 10.0f, -11.0f, 12.0f), tfSimdLoad_f32x4(9.0f, 10.0f, 11.0f, 12.0f)},
+  };
+
+  for (size_t i = 0; i < TF_ARRAY_COUNT(tests); i++) {
+    Tsimd_f32x4_t result = tfSimdAbs_f32x4(tests[i].input);
+    EXPECT_TRUE(tfSimdCmpAllEq_f32x4(result, tests[i].expected));
+  }
+}
+
+UTEST(Tsimd_f32x4_t, tfSimdMadd_f32x4)
+{
+  struct {
+    Tsimd_f32x4_t a;
+    Tsimd_f32x4_t b;
+    Tsimd_f32x4_t c;
+    Tsimd_f32x4_t expected;
+  } tests[] = {
+    {tfSimdLoad_f32x4(1.0f, 2.0f, 3.0f, 4.0f), tfSimdLoad_f32x4(5.0f, 6.0f, 7.0f, 8.0f), tfSimdLoad_f32x4(9.0f, 10.0f, 11.0f, 12.0f), tfSimdLoad_f32x4(14.0f, 22.0f, 32.0f, 44.0f)},
+    {tfSimdLoad_f32x4(-1.0f, -2.0f, -3.0f, -4.0f), tfSimdLoad_f32x4(5.0f, 6.0f, 7.0f, 8.0f), tfSimdLoad_f32x4(9.0f, 10.0f, 11.0f, 12.0f), tfSimdLoad_f32x4(4.0f, -2.0f, -10.0f, -20.0f)},
+    {tfSimdLoad_f32x4(0.0f, 0.0f, 0.0f, 0.0f), tfSimdLoad_f32x4(5.0f, 6.0f, 7.0f, 8.0f), tfSimdLoad_f32x4(9.0f, 10.0f, 11.0f, 12.0f), tfSimdLoad_f32x4(9.0f, 10.0f, 11.0f, 12.0f)},
+    {tfSimdLoad_f32x4(1.0f, 2.0f, 3.0f, 4.0f), tfSimdLoad_f32x4(0.0f, 0.0f, 0.0f, 0.0f), tfSimdLoad_f32x4(9.0f, 10.0f, 11.0f, 12.0f), tfSimdLoad_f32x4(9.0f, 10.0f, 11.0f, 12.0f)},
+    {tfSimdLoad_f32x4(1.0f, 2.0f, 3.0f, 4.0f), tfSimdLoad_f32x4(5.0f, 6.0f, 7.0f, 8.0f), tfSimdLoad_f32x4(0.0f, 0.0f, 0.0f, 0.0f), tfSimdLoad_f32x4(5.0f, 12.0f, 21.0f, 32.0f)},
+  };
+
+  for (size_t i = 0; i < TF_ARRAY_COUNT(tests); i++) {
+    Tsimd_f32x4_t result = tfSimdMadd_f32x4(tests[i].a, tests[i].b, tests[i].c);
+    EXPECT_TRUE(tfSimdCmpAllEq_f32x4(result, tests[i].expected));
+  }
+}
+
+UTEST(Tsimd_f32x4_t, tfSimdDot_f32x4)
+{
+    Tsimd_f32x4_t a = tfSimdLoad_f32x4(1.0f, 2.0f, 3.0f, 4.0f);
+    Tsimd_f32x4_t b = tfSimdLoad_f32x4(5.0f, 6.0f, 7.0f, 8.0f);
+    Tsimd_f32x4_t result = tfSimdDot_f32x4(a, b);
+    Tsimd_f32x4_t expected = tfSimdLoad_f32x4(70.0f, 70.0f, 70.0f, 70.0f); // 1*5 + 2*6 + 3*7 + 4*8 = 70
+
+    EXPECT_TRUE(tfSimdCmpAllEq_f32x4(result, expected));
+}
+
+UTEST(Tsimd_f32x4_t, tfSimdDot_f32x4_f32)
+{
+    Tsimd_f32x4_t a = tfSimdLoad_f32x4(1.0f, 2.0f, 3.0f, 4.0f);
+    Tsimd_f32x4_t b = tfSimdLoad_f32x4(5.0f, 6.0f, 7.0f, 8.0f);
+    float result = tfSimdDot_f32x4_f32(a, b);
+    float expected = 70.0f; // 1*5 + 2*6 + 3*7 + 4*8 = 70
+
+    EXPECT_EQ(result, expected);
+}
+
+#include "Forge/Mem/TF_Memory.h"
+#include "Forge/TF_FileSystem.h"
+#include "Forge/TF_Log.h"
+UTEST_STATE();
+TF_UTEST_MAIN("TF_Simd4")
+
diff --git a/Forge/tests/Math/TF_Simd3Test.cpp b/Forge/tests/Math/TF_Simd3Test.cpp
new file mode 100644
index 0000000000..f445f561d2
--- /dev/null
+++ b/Forge/tests/Math/TF_Simd3Test.cpp
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) Contributors to the Open 3D Engine Project.
+ * For complete copyright and license terms please see the LICENSE at the root of this distribution.
+ *
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
+ *
+ */
+#include "TF_TestMain.h"
+#include "utest.h"
+
+#include "Forge/Math/TF_Simd32x3.h"
+#include "TF_MathUtils.h"
+
+
+UTEST(TF_Simd3, tfS32x3iCmpGt)
+{
+  struct {
+    Tsimd_i32x3_t a;
+    Tsimd_i32x3_t b;
+    Tsimd_i32x3_t test;
+  } tests[] = {
+      // ... existing test cases ...
+      // Edge cases:
+      {tfSimdInt3Load(INT32_MIN, INT32_MIN, INT32_MIN), tfSimdInt3Load(INT32_MAX, INT32_MAX, INT32_MAX), tfSimdInt3Load(TF_SIMD_FALSE, TF_SIMD_FALSE, TF_SIMD_FALSE)},
+      {tfSimdInt3Load(INT32_MAX, INT32_MAX, INT32_MAX), tfSimdInt3Load(INT32_MIN, INT32_MIN, INT32_MIN), tfSimdInt3Load(TF_SIMD_TRUE, TF_SIMD_TRUE, TF_SIMD_TRUE)},
+      // Mixed values:
+      {tfSimdInt3Load(0, -1, 1), tfSimdInt3Load(1, 0, -1), tfSimdInt3Load(TF_SIMD_FALSE, TF_SIMD_FALSE, TF_SIMD_TRUE)},
+      // All elements equal:
+      {tfSimdInt3Load(42, 42, 42), tfSimdInt3Load(42, 42, 42), tfSimdInt3Load(TF_SIMD_FALSE, TF_SIMD_FALSE, TF_SIMD_FALSE)},
+      // Different element types:
+      {tfSimdInt3Load(0, 1, 2), tfSimdInt3Load(3, 2, 1), tfSimdInt3Load(TF_SIMD_FALSE, TF_SIMD_FALSE, TF_SIMD_TRUE)},
+  };
+  for(size_t i = 0; i < TF_ARRAY_COUNT(tests); i++) {
+    EXPECT_TRUE(tfS32x3iCmpAllEq(tfS32x3iCmpGt(tests[i].a, tests[i].b), tests[i].test));
+  }
+}
+
+UTEST(TF_Simd3, tfS32x3iCmpLt)
+{
+  struct {
+    Tsimd_i32x3_t a;
+    Tsimd_i32x3_t b;
+    Tsimd_i32x3_t test;
+  } tests[] = {
+      // Less than
+      {tfSimdInt3Load(12, 13, 14), tfSimdInt3Load(16, 17, 18), tfSimdInt3Load(TF_SIMD_TRUE, TF_SIMD_TRUE, TF_SIMD_TRUE)},
+      // Mixed less than and equal
+      {tfSimdInt3Load(125, -12, 153), tfSimdInt3Load(125, 13, 153), tfSimdInt3Load(TF_SIMD_FALSE, TF_SIMD_TRUE, TF_SIMD_FALSE)},
+      // All elements greater than or equal
+      {tfSimdInt3Load(1, 2, 3), tfSimdInt3Load(-1, 0, 1), tfSimdInt3Load(TF_SIMD_FALSE, TF_SIMD_FALSE, TF_SIMD_FALSE)},
+      // Edge cases:
+      {tfSimdInt3Load(INT32_MIN, INT32_MIN, INT32_MIN), tfSimdInt3Load(INT32_MAX, INT32_MAX, INT32_MAX), tfSimdInt3Load(TF_SIMD_TRUE, TF_SIMD_TRUE, TF_SIMD_TRUE)},
+      {tfSimdInt3Load(INT32_MAX, INT32_MAX, INT32_MAX), tfSimdInt3Load(INT32_MIN, INT32_MIN, INT32_MIN), tfSimdInt3Load(TF_SIMD_FALSE, TF_SIMD_FALSE, TF_SIMD_FALSE)},
+  };
+  for(size_t i = 0; i < TF_ARRAY_COUNT(tests); i++) {
+    EXPECT_TRUE(tfS32x3iCmpAllEq(tfS32x3iCmpLt(tests[i].a, tests[i].b), tests[i].test));
+  }
+}
+
+
+
+UTEST(TF_Simd3, tfS32x3iCmpEq)
+{
+  struct {
+    Tsimd_i32x3_t a;
+    Tsimd_i32x3_t b;
+    Tsimd_i32x3_t test;
+  } tests[] = {
+       // Equal elements
+      {tfSimdInt3Load(12, 13, 14), tfSimdInt3Load(12, 13, 14), tfSimdInt3Load(TF_SIMD_TRUE, TF_SIMD_TRUE, TF_SIMD_TRUE)},
+      // Mixed equal and unequal elements
+      {tfSimdInt3Load(125, -12, 153), tfSimdInt3Load(125, -12, 14), tfSimdInt3Load(TF_SIMD_TRUE, TF_SIMD_TRUE, TF_SIMD_FALSE)},
+      // All elements unequal
+      {tfSimdInt3Load(1, 2, 3), tfSimdInt3Load(5, 6, 7), tfSimdInt3Load(TF_SIMD_FALSE, TF_SIMD_FALSE, TF_SIMD_FALSE)},
+      // Edge cases:
+      {tfSimdInt3Load(INT32_MIN, INT32_MIN, INT32_MIN), tfSimdInt3Load(INT32_MIN, INT32_MIN, INT32_MIN), tfSimdInt3Load(TF_SIMD_TRUE, TF_SIMD_TRUE, TF_SIMD_TRUE)},
+      {tfSimdInt3Load(INT32_MAX, INT32_MAX, INT32_MAX), tfSimdInt3Load(INT32_MAX, INT32_MAX, INT32_MAX), tfSimdInt3Load(TF_SIMD_TRUE, TF_SIMD_TRUE, TF_SIMD_TRUE)},
+  };
+  for (size_t i = 0; i < TF_ARRAY_COUNT(tests); i++)
+  {
+      EXPECT_TRUE(tfS32x3iCmpAllEq(tfS32x3iCmpEq(tests[i].a, tests[i].b), tests[i].test));
+  }
+}
+
+UTEST(TF_Simd3, tfS32x3FZero)
+{
+    Tsimd_f32x3_t value = tfS32x3FZero();
+    EXPECT_NEAR(tfS32x3FSelectIndex0(value), 0.0f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x3FSelectIndex1(value), 0.0f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x3FSelectIndex2(value), 0.0f, DEFAULT_EPSILON);
+}
+
+UTEST(TF_Simd3, tfSimdSplat4f)
+{
+    Tsimd_f32x3_t value = tfS32x3FSplat(23.f);
+    EXPECT_NEAR(tfS32x3FSelectIndex0(value), 23.f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x3FSelectIndex1(value), 23.f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x3FSelectIndex2(value), 23.f, DEFAULT_EPSILON);
+    Tsimd_f32x3_t value1 = tfS32x3FSplat(5.1f);
+    EXPECT_NEAR(tfS32x3FSelectIndex0(value1), 5.1f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x3FSelectIndex1(value1), 5.1f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x3FSelectIndex2(value1), 5.1f, DEFAULT_EPSILON);
+
+    Tsimd_f32x3_t value2 = tfSimdFloat3Load(5.1f, 1.0f, 2.0f);
+    EXPECT_NEAR(tfS32x3FSelectIndex0(value2), 5.1f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x3FSelectIndex1(value2), 1.0f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x3FSelectIndex2(value2), 2.0f, DEFAULT_EPSILON);
+}
+
+UTEST(TF_Simd3, tfS32x3iNot) {
+  struct {
+    Tsimd_i32x3_t test;
+    Tsimd_i32x3_t expect;
+  } tests[] = {
+       {tfSimdInt3Load(0xFFFFFFFF, 0x0000FFFF, 0xFFFF0000), tfSimdInt3Load(0, 0xFFFF0000, 0x0000FFFF)},
+  };   
+  for (size_t i = 0; i < TF_ARRAY_COUNT(tests); i++)
+  {
+      EXPECT_TRUE(tfS32x3iCmpAllEq(tfS32x3iNot(tests[i].test), tests[i].expect));
+  }
+}
+
+UTEST(TF_Simd3, tfS32x3iSelect) {
+  struct {
+    Tsimd_i32x3_t a;
+    Tsimd_i32x3_t b;
+    Tsimd_i32x3_t mask;
+    Tsimd_i32x3_t expect;
+  } tests[] = {
+       {tfSimdInt3Load(10, 11, -13), tfSimdInt3Load(123, -149, 0), tfSimdInt3Load(0,0,0), tfSimdInt3Load(10, 11, -13)},
+       {tfSimdInt3Load(10, 11, -13), tfSimdInt3Load(123, -149, 0), tfSimdInt3Load(TF_SIMD_TRUE,0,0), tfSimdInt3Load(123, 11, -13)},
+       {tfSimdInt3Load(10, 11, -13), tfSimdInt3Load(123, -149, 0), tfSimdInt3Load(TF_SIMD_TRUE,0,TF_SIMD_TRUE), tfSimdInt3Load(123, 11, 0)},
+       {tfSimdInt3Load(10, 11, -13), tfSimdInt3Load(123, -149, 0), tfSimdInt3Load(TF_SIMD_TRUE,TF_SIMD_TRUE,TF_SIMD_TRUE), tfSimdInt3Load(123, -149, 0)},
+  };   
+  for (size_t i = 0; i < TF_ARRAY_COUNT(tests); i++)
+  {
+      EXPECT_TRUE(tfS32x3iCmpAllEq(tfS32x3iSelect(tests[i].a, tests[i].b, tests[i].mask), tests[i].expect));
+  }
+}
+
+//UTEST(TF_Simd3, tfS32x3FSelect) {
+//  struct {
+//    Tsimd_f32x3_t a;
+//    Tsimd_f32x3_t b;
+//    Tsimd_f32x3_t mask;
+//    Tsimd_f32x3_t expect;
+//  } tests[] = {
+//       {tfSimdFloat4Load(10, 11, -13, 32), tfSimdFloat4Load(123, -149, 0, 12), tfSimdFloat4Load(0,0,0,0), tfSimdFloat4Load(10, 11, -13, 32)},
+//       {tfSimdFloat4Load(10, 11, -13, 32), tfSimdFloat4Load(123, -149, 0, 12), tfSimdFloat4Load(TF_SIMD_TRUE,0,0,0), tfSimdFloat4Load(123, 11, -13, 32)},
+//       {tfSimdFloat4Load(10, 11, -13, 32), tfSimdFloat4Load(123, -149, 0, 12), tfSimdFloat4Load(TF_SIMD_TRUE,0,TF_SIMD_TRUE,0), tfSimdFloat4Load(123, 11, 0, 32)},
+//       {tfSimdFloat4Load(10, 11, -13, 32), tfSimdFloat4Load(123, -149, 0, 12), tfSimdFloat4Load(TF_SIMD_TRUE,TF_SIMD_TRUE,TF_SIMD_TRUE,TF_SIMD_TRUE), tfSimdFloat4Load(123, -149, 0, 12)},
+//  };   
+//  for (size_t i = 0; i < TF_ARRAY_COUNT(tests); i++)
+//  {
+//      //EXPECT_TRUE(tfS32x3FCmp(tfS32x3iSelect(tests[i].a, tests[i].b, tests[i].mask), tests[i].expect));
+//  }
+//}
+
+
+#include "Forge/Mem/TF_Memory.h"
+#include "Forge/TF_FileSystem.h"
+#include "Forge/TF_Log.h"
+UTEST_STATE();
+TF_UTEST_MAIN("TF_Simd3")
+
diff --git a/Forge/tests/Math/TF_SimdFloat2x32Test.cpp b/Forge/tests/Math/TF_SimdFloat2x32Test.cpp
index 2c18054f5d..5f59d0bf63 100644
--- a/Forge/tests/Math/TF_SimdFloat2x32Test.cpp
+++ b/Forge/tests/Math/TF_SimdFloat2x32Test.cpp
@@ -12,143 +12,143 @@
 #include "TF_MathUtils.h"
 
 
-UTEST(TF_Simd2, tfSimd2iCmpGt)
+UTEST(TF_Simd2, tfS32x2ICmpGt)
 {
   struct {
-    TSimdInt32x2 a;
-    TSimdInt32x2 b;
-    TSimdInt32x2 test;
+    Tsimd_i32x2_t a;
+    Tsimd_i32x2_t b;
+    Tsimd_i32x2_t test;
   } tests[] = {
       // ... existing test cases ...
       // Edge cases:
-      {tfSimd2iLoadImmediate(INT32_MIN, INT32_MIN), tfSimd2iLoadImmediate(INT32_MAX, INT32_MAX), tfSimd2iLoadImmediate(TF_SIMD_FALSE, TF_SIMD_FALSE)},
-      {tfSimd2iLoadImmediate(INT32_MAX, INT32_MAX), tfSimd2iLoadImmediate(INT32_MIN, INT32_MIN), tfSimd2iLoadImmediate(TF_SIMD_TRUE, TF_SIMD_TRUE)},
+      {tfS32x2ILoadImmediate(INT32_MIN, INT32_MIN), tfS32x2ILoadImmediate(INT32_MAX, INT32_MAX), tfS32x2ILoadImmediate(TF_SIMD_FALSE, TF_SIMD_FALSE)},
+      {tfS32x2ILoadImmediate(INT32_MAX, INT32_MAX), tfS32x2ILoadImmediate(INT32_MIN, INT32_MIN), tfS32x2ILoadImmediate(TF_SIMD_TRUE, TF_SIMD_TRUE)},
       // Mixed values:
-      {tfSimd2iLoadImmediate(0, -1), tfSimd2iLoadImmediate(1, 0), tfSimd2iLoadImmediate(TF_SIMD_FALSE, TF_SIMD_FALSE)},
+      {tfS32x2ILoadImmediate(0, -1), tfS32x2ILoadImmediate(1, 0), tfS32x2ILoadImmediate(TF_SIMD_FALSE, TF_SIMD_FALSE)},
       // All elements equal:
-      {tfSimd2iLoadImmediate(42, 42), tfSimd2iLoadImmediate(42, 42), tfSimd2iLoadImmediate(TF_SIMD_FALSE, TF_SIMD_FALSE)},
+      {tfS32x2ILoadImmediate(42, 42), tfS32x2ILoadImmediate(42, 42), tfS32x2ILoadImmediate(TF_SIMD_FALSE, TF_SIMD_FALSE)},
       // Different element types:
-      {tfSimd2iLoadImmediate(0, 1), tfSimd2iLoadImmediate(3, 2), tfSimd2iLoadImmediate(TF_SIMD_FALSE, TF_SIMD_FALSE)},
+      {tfS32x2ILoadImmediate(0, 1), tfS32x2ILoadImmediate(3, 2), tfS32x2ILoadImmediate(TF_SIMD_FALSE, TF_SIMD_FALSE)},
   };
   for(size_t i = 0; i < TF_ARRAY_COUNT(tests); i++) {
-    EXPECT_TRUE(tfSimd2iCmpAllEq(tfSimd2iCmpGt(tests[i].a, tests[i].b), tests[i].test));
+    EXPECT_TRUE(tfS32x2ICmpAllEq(tfS32x2ICmpGt(tests[i].a, tests[i].b), tests[i].test));
   }
 }
 
-UTEST(TF_Simd2, tfSimd2iCmpLt)
+UTEST(TF_Simd2, tfS32x2ICmpLt)
 {
   struct {
-    TSimdInt32x2 a;
-    TSimdInt32x2 b;
-    TSimdInt32x2 test;
+    Tsimd_i32x2_t a;
+    Tsimd_i32x2_t b;
+    Tsimd_i32x2_t test;
   } tests[] = {
       // Less than
-      {tfSimd2iLoadImmediate(12, 13), tfSimd2iLoadImmediate(16, 17), tfSimd2iLoadImmediate(TF_SIMD_TRUE, TF_SIMD_TRUE)},
+      {tfS32x2ILoadImmediate(12, 13), tfS32x2ILoadImmediate(16, 17), tfS32x2ILoadImmediate(TF_SIMD_TRUE, TF_SIMD_TRUE)},
       // Mixed less than and equal
-      {tfSimd2iLoadImmediate(125, -12), tfSimd2iLoadImmediate(125, 13), tfSimd2iLoadImmediate(TF_SIMD_FALSE, TF_SIMD_TRUE)},
+      {tfS32x2ILoadImmediate(125, -12), tfS32x2ILoadImmediate(125, 13), tfS32x2ILoadImmediate(TF_SIMD_FALSE, TF_SIMD_TRUE)},
       // All elements greater than or equal
-      {tfSimd2iLoadImmediate(1, 2), tfSimd2iLoadImmediate(-1, 0), tfSimd2iLoadImmediate(TF_SIMD_FALSE, TF_SIMD_FALSE)},
+      {tfS32x2ILoadImmediate(1, 2), tfS32x2ILoadImmediate(-1, 0), tfS32x2ILoadImmediate(TF_SIMD_FALSE, TF_SIMD_FALSE)},
       // Edge cases:
-      {tfSimd2iLoadImmediate(INT32_MIN, INT32_MIN), tfSimd2iLoadImmediate(INT32_MAX, INT32_MAX), tfSimd2iLoadImmediate(TF_SIMD_TRUE, TF_SIMD_TRUE)},
-      {tfSimd2iLoadImmediate(INT32_MAX, INT32_MAX), tfSimd2iLoadImmediate(INT32_MIN, INT32_MIN), tfSimd2iLoadImmediate(TF_SIMD_FALSE, TF_SIMD_FALSE)},
+      {tfS32x2ILoadImmediate(INT32_MIN, INT32_MIN), tfS32x2ILoadImmediate(INT32_MAX, INT32_MAX), tfS32x2ILoadImmediate(TF_SIMD_TRUE, TF_SIMD_TRUE)},
+      {tfS32x2ILoadImmediate(INT32_MAX, INT32_MAX), tfS32x2ILoadImmediate(INT32_MIN, INT32_MIN), tfS32x2ILoadImmediate(TF_SIMD_FALSE, TF_SIMD_FALSE)},
   };
   for(size_t i = 0; i < TF_ARRAY_COUNT(tests); i++) {
-    EXPECT_TRUE(tfSimd2iCmpAllEq(tfSimd2iCmpLt(tests[i].a, tests[i].b), tests[i].test));
+    EXPECT_TRUE(tfS32x2ICmpAllEq(tfS32x2ICmpLt(tests[i].a, tests[i].b), tests[i].test));
   }
 }
 
 
 
-UTEST(TF_Simd2, tfSimd2iCmpEq)
+UTEST(TF_Simd2, tfS32x2ICmpEq)
 {
   struct {
-    TSimdInt32x2 a;
-    TSimdInt32x2 b;
-    TSimdInt32x2 test;
+    Tsimd_i32x2_t a;
+    Tsimd_i32x2_t b;
+    Tsimd_i32x2_t test;
   } tests[] = {
        // Equal elements
-      {tfSimd2iLoadImmediate(12, 13), tfSimd2iLoadImmediate(12, 13), tfSimd2iLoadImmediate(TF_SIMD_TRUE, TF_SIMD_TRUE)},
+      {tfS32x2ILoadImmediate(12, 13), tfS32x2ILoadImmediate(12, 13), tfS32x2ILoadImmediate(TF_SIMD_TRUE, TF_SIMD_TRUE)},
       // Mixed equal and unequal elements
-      {tfSimd2iLoadImmediate(125, -12), tfSimd2iLoadImmediate(125, -12), tfSimd2iLoadImmediate(TF_SIMD_TRUE, TF_SIMD_TRUE)},
+      {tfS32x2ILoadImmediate(125, -12), tfS32x2ILoadImmediate(125, -12), tfS32x2ILoadImmediate(TF_SIMD_TRUE, TF_SIMD_TRUE)},
       // All elements unequal
-      {tfSimd2iLoadImmediate(1, 2), tfSimd2iLoadImmediate(5, 6), tfSimd2iLoadImmediate(TF_SIMD_FALSE, TF_SIMD_FALSE)},
+      {tfS32x2ILoadImmediate(1, 2), tfS32x2ILoadImmediate(5, 6), tfS32x2ILoadImmediate(TF_SIMD_FALSE, TF_SIMD_FALSE)},
       // Edge cases:
-      {tfSimd2iLoadImmediate(INT32_MIN, INT32_MIN), tfSimd2iLoadImmediate(INT32_MIN, INT32_MIN), tfSimd2iLoadImmediate(TF_SIMD_TRUE, TF_SIMD_TRUE)},
-      {tfSimd2iLoadImmediate(INT32_MAX, INT32_MAX), tfSimd2iLoadImmediate(INT32_MAX, INT32_MAX), tfSimd2iLoadImmediate(TF_SIMD_TRUE, TF_SIMD_TRUE)},
+      {tfS32x2ILoadImmediate(INT32_MIN, INT32_MIN), tfS32x2ILoadImmediate(INT32_MIN, INT32_MIN), tfS32x2ILoadImmediate(TF_SIMD_TRUE, TF_SIMD_TRUE)},
+      {tfS32x2ILoadImmediate(INT32_MAX, INT32_MAX), tfS32x2ILoadImmediate(INT32_MAX, INT32_MAX), tfS32x2ILoadImmediate(TF_SIMD_TRUE, TF_SIMD_TRUE)},
   };
   for (size_t i = 0; i < TF_ARRAY_COUNT(tests); i++)
   {
-      EXPECT_TRUE(tfSimd2iCmpAllEq(tfSimd2iCmpEq(tests[i].a, tests[i].b), tests[i].test));
+      EXPECT_TRUE(tfS32x2ICmpAllEq(tfS32x2ICmpEq(tests[i].a, tests[i].b), tests[i].test));
   }
 }
 
-UTEST(TF_Simd2, tfSimd2fZero)
+UTEST(TF_Simd2, tfS32x2FZero)
 {
-    TSimdFloat32x2 value = tfSimd2fZero();
-    EXPECT_NEAR(tfSimd2fSelectIndex0(value), 0.0f, DEFAULT_EPSILON);
-    EXPECT_NEAR(tfSimd2fSelectIndex1(value), 0.0f, DEFAULT_EPSILON);
+    Tsimd_f32x2_t value = tfS32x2FZero();
+    EXPECT_NEAR(tfS32x2FSelectIndex0(value), 0.0f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x2FSelectIndex1(value), 0.0f, DEFAULT_EPSILON);
 }
 
-UTEST(TF_Simd2, tfSimd2fSplat)
+UTEST(TF_Simd2, tfS32x2FSplat)
 {
-    TSimdFloat32x2 value = tfSimd2fSplat(23.f);
-    EXPECT_NEAR(tfSimd2fSelectIndex0(value), 23.f, DEFAULT_EPSILON);
-    EXPECT_NEAR(tfSimd2fSelectIndex1(value), 23.f, DEFAULT_EPSILON);
-    TSimdFloat32x2 value1 = tfSimd2fSplat(5.1f);
-    EXPECT_NEAR(tfSimd2fSelectIndex0(value1), 5.1f, DEFAULT_EPSILON);
-    EXPECT_NEAR(tfSimd2fSelectIndex1(value1), 5.1f, DEFAULT_EPSILON);
-
-    TSimdFloat32x2 value2 = tfSimdFloat2Load(5.1f, 1.0f);
-    EXPECT_NEAR(tfSimd2fSelectIndex0(value2), 5.1f, DEFAULT_EPSILON);
-    EXPECT_NEAR(tfSimd2fSelectIndex1(value2), 1.0f, DEFAULT_EPSILON);
+    Tsimd_f32x2_t value = tfS32x2FSplat(23.f);
+    EXPECT_NEAR(tfS32x2FSelectIndex0(value), 23.f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x2FSelectIndex1(value), 23.f, DEFAULT_EPSILON);
+    Tsimd_f32x2_t value1 = tfS32x2FSplat(5.1f);
+    EXPECT_NEAR(tfS32x2FSelectIndex0(value1), 5.1f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x2FSelectIndex1(value1), 5.1f, DEFAULT_EPSILON);
+
+    Tsimd_f32x2_t value2 = tfSimdFloat2Load(5.1f, 1.0f);
+    EXPECT_NEAR(tfS32x2FSelectIndex0(value2), 5.1f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x2FSelectIndex1(value2), 1.0f, DEFAULT_EPSILON);
 }
 
-UTEST(TF_Simd2, tfSimd3iNot) {
+UTEST(TF_Simd2, tfS32x3iNot) {
   struct {
-    TSimdInt32x2 test;
-    TSimdInt32x2 expect;
+    Tsimd_i32x2_t test;
+    Tsimd_i32x2_t expect;
   } tests[] = {
-       {tfSimd2iLoadImmediate(0xFFFFFFFF, 0x0000FFFF), tfSimd2iLoadImmediate(0, 0xFFFF0000)},
+       {tfS32x2ILoadImmediate(0xFFFFFFFF, 0x0000FFFF), tfS32x2ILoadImmediate(0, 0xFFFF0000)},
   };   
   for (size_t i = 0; i < TF_ARRAY_COUNT(tests); i++)
   {
-      EXPECT_TRUE(tfSimd2iCmpAllEq(tfSimd2iNot(tests[i].test), tests[i].expect));
+      EXPECT_TRUE(tfS32x2ICmpAllEq(tfS32x2INot(tests[i].test), tests[i].expect));
   }
 }
 
-UTEST(TF_Simd2, tfSimd2iSelect) {
+UTEST(TF_Simd2, tfS32x2ISelect) {
   struct {
-    TSimdInt32x2 a;
-    TSimdInt32x2 b;
-    TSimdInt32x2 mask;
-    TSimdInt32x2 expect;
+    Tsimd_i32x2_t a;
+    Tsimd_i32x2_t b;
+    Tsimd_i32x2_t mask;
+    Tsimd_i32x2_t expect;
   } tests[] = {
-       {tfSimd2iLoadImmediate(10, 11), tfSimd2iLoadImmediate(123, -149), tfSimd2iLoadImmediate(0,0), tfSimd2iLoadImmediate(10, 11)},
-       {tfSimd2iLoadImmediate(10, 11), tfSimd2iLoadImmediate(123, -149), tfSimd2iLoadImmediate(TF_SIMD_TRUE,0), tfSimd2iLoadImmediate(123, 11)},
-       {tfSimd2iLoadImmediate(10, 11), tfSimd2iLoadImmediate(123, -149), tfSimd2iLoadImmediate(TF_SIMD_TRUE,0), tfSimd2iLoadImmediate(123, 11)},
-       {tfSimd2iLoadImmediate(10, 11), tfSimd2iLoadImmediate(123, -149), tfSimd2iLoadImmediate(TF_SIMD_TRUE,TF_SIMD_TRUE), tfSimd2iLoadImmediate(123, -149)},
+       {tfS32x2ILoadImmediate(10, 11), tfS32x2ILoadImmediate(123, -149), tfS32x2ILoadImmediate(0,0), tfS32x2ILoadImmediate(10, 11)},
+       {tfS32x2ILoadImmediate(10, 11), tfS32x2ILoadImmediate(123, -149), tfS32x2ILoadImmediate(TF_SIMD_TRUE,0), tfS32x2ILoadImmediate(123, 11)},
+       {tfS32x2ILoadImmediate(10, 11), tfS32x2ILoadImmediate(123, -149), tfS32x2ILoadImmediate(TF_SIMD_TRUE,0), tfS32x2ILoadImmediate(123, 11)},
+       {tfS32x2ILoadImmediate(10, 11), tfS32x2ILoadImmediate(123, -149), tfS32x2ILoadImmediate(TF_SIMD_TRUE,TF_SIMD_TRUE), tfS32x2ILoadImmediate(123, -149)},
   };   
   for (size_t i = 0; i < TF_ARRAY_COUNT(tests); i++)
   {
-      EXPECT_TRUE(tfSimd2iCmpAllEq(tfSimd2iSelect(tests[i].a, tests[i].b, tests[i].mask), tests[i].expect));
+      EXPECT_TRUE(tfS32x2ICmpAllEq(tfS32x2ISelect(tests[i].a, tests[i].b, tests[i].mask), tests[i].expect));
   }
 }
 
-//UTEST(TF_Simd2, tfSimd3fSelect) {
+//UTEST(TF_Simd2, tfS32x3FSelect) {
 //  struct {
 //    TSimd32fx4 a;
 //    TSimd32fx4 b;
 //    TSimd32fx4 mask;
 //    TSimd32fx4 expect;
 //  } tests[] = {
-//       {tfSimd3fLoadImmediate(10, 11, -13, 32), tfSimd3fLoadImmediate(123, -149, 0, 12), tfSimd3fLoadImmediate(0,0,0,0), tfSimd3fLoadImmediate(10, 11, -13, 32)},
-//       {tfSimd3fLoadImmediate(10, 11, -13, 32), tfSimd3fLoadImmediate(123, -149, 0, 12), tfSimd3fLoadImmediate(TF_SIMD_TRUE,0,0,0), tfSimd3fLoadImmediate(123, 11, -13, 32)},
-//       {tfSimd3fLoadImmediate(10, 11, -13, 32), tfSimd3fLoadImmediate(123, -149, 0, 12), tfSimd3fLoadImmediate(TF_SIMD_TRUE,0,TF_SIMD_TRUE,0), tfSimd3fLoadImmediate(123, 11, 0, 32)},
-//       {tfSimd3fLoadImmediate(10, 11, -13, 32), tfSimd3fLoadImmediate(123, -149, 0, 12), tfSimd3fLoadImmediate(TF_SIMD_TRUE,TF_SIMD_TRUE,TF_SIMD_TRUE,TF_SIMD_TRUE), tfSimd3fLoadImmediate(123, -149, 0, 12)},
+//       {tfS32x3FLoadImmediate(10, 11, -13, 32), tfS32x3FLoadImmediate(123, -149, 0, 12), tfS32x3FLoadImmediate(0,0,0,0), tfS32x3FLoadImmediate(10, 11, -13, 32)},
+//       {tfS32x3FLoadImmediate(10, 11, -13, 32), tfS32x3FLoadImmediate(123, -149, 0, 12), tfS32x3FLoadImmediate(TF_SIMD_TRUE,0,0,0), tfS32x3FLoadImmediate(123, 11, -13, 32)},
+//       {tfS32x3FLoadImmediate(10, 11, -13, 32), tfS32x3FLoadImmediate(123, -149, 0, 12), tfS32x3FLoadImmediate(TF_SIMD_TRUE,0,TF_SIMD_TRUE,0), tfS32x3FLoadImmediate(123, 11, 0, 32)},
+//       {tfS32x3FLoadImmediate(10, 11, -13, 32), tfS32x3FLoadImmediate(123, -149, 0, 12), tfS32x3FLoadImmediate(TF_SIMD_TRUE,TF_SIMD_TRUE,TF_SIMD_TRUE,TF_SIMD_TRUE), tfS32x3FLoadImmediate(123, -149, 0, 12)},
 //  };   
 //  for (size_t i = 0; i < TF_ARRAY_COUNT(tests); i++)
 //  {
-//      //EXPECT_TRUE(tfSimd3fCmp(tfSimd2iSelect(tests[i].a, tests[i].b, tests[i].mask), tests[i].expect));
+//      //EXPECT_TRUE(tfS32x3FCmp(tfS32x2ISelect(tests[i].a, tests[i].b, tests[i].mask), tests[i].expect));
 //  }
 //}
 
diff --git a/Forge/tests/Math/TF_SimdFloat3x32Test.cpp b/Forge/tests/Math/TF_SimdFloat3x32Test.cpp
index d9e28b0580..5f6dcfadba 100644
--- a/Forge/tests/Math/TF_SimdFloat3x32Test.cpp
+++ b/Forge/tests/Math/TF_SimdFloat3x32Test.cpp
@@ -12,138 +12,138 @@
 #include "TF_MathUtils.h"
 
 
-UTEST(TF_Simd3, tfSimd3iCmpGt)
+UTEST(TF_Simd3, tfS32x3iCmpGt)
 {
   struct {
-    TSimdInt32x3 a;
-    TSimdInt32x3 b;
-    TSimdInt32x3 test;
+    Tsimd_i32x3_t a;
+    Tsimd_i32x3_t b;
+    Tsimd_i32x3_t test;
   } tests[] = {
       // ... existing test cases ...
       // Edge cases:
-      {tfSimdInt3x32Load(INT32_MIN, INT32_MIN, INT32_MIN), tfSimdInt3x32Load(INT32_MAX, INT32_MAX, INT32_MAX), tfSimdInt3x32Load(TF_SIMD_FALSE, TF_SIMD_FALSE, TF_SIMD_FALSE)},
-      {tfSimdInt3x32Load(INT32_MAX, INT32_MAX, INT32_MAX), tfSimdInt3x32Load(INT32_MIN, INT32_MIN, INT32_MIN), tfSimdInt3x32Load(TF_SIMD_TRUE, TF_SIMD_TRUE, TF_SIMD_TRUE)},
+      {tfSimd3x32ILoad(INT32_MIN, INT32_MIN, INT32_MIN), tfSimd3x32ILoad(INT32_MAX, INT32_MAX, INT32_MAX), tfSimd3x32ILoad(TF_SIMD_FALSE, TF_SIMD_FALSE, TF_SIMD_FALSE)},
+      {tfSimd3x32ILoad(INT32_MAX, INT32_MAX, INT32_MAX), tfSimd3x32ILoad(INT32_MIN, INT32_MIN, INT32_MIN), tfSimd3x32ILoad(TF_SIMD_TRUE, TF_SIMD_TRUE, TF_SIMD_TRUE)},
       // Mixed values:
-      {tfSimdInt3x32Load(0, -1, 1), tfSimdInt3x32Load(1, 0, -1), tfSimdInt3x32Load(TF_SIMD_FALSE, TF_SIMD_FALSE, TF_SIMD_TRUE)},
+      {tfSimd3x32ILoad(0, -1, 1), tfSimd3x32ILoad(1, 0, -1), tfSimd3x32ILoad(TF_SIMD_FALSE, TF_SIMD_FALSE, TF_SIMD_TRUE)},
       // All elements equal:
-      {tfSimdInt3x32Load(42, 42, 42), tfSimdInt3x32Load(42, 42, 42), tfSimdInt3x32Load(TF_SIMD_FALSE, TF_SIMD_FALSE, TF_SIMD_FALSE)},
+      {tfSimd3x32ILoad(42, 42, 42), tfSimd3x32ILoad(42, 42, 42), tfSimd3x32ILoad(TF_SIMD_FALSE, TF_SIMD_FALSE, TF_SIMD_FALSE)},
       // Different element types:
-      {tfSimdInt3x32Load(0, 1, 2), tfSimdInt3x32Load(3, 2, 1), tfSimdInt3x32Load(TF_SIMD_FALSE, TF_SIMD_FALSE, TF_SIMD_TRUE)},
+      {tfSimd3x32ILoad(0, 1, 2), tfSimd3x32ILoad(3, 2, 1), tfSimd3x32ILoad(TF_SIMD_FALSE, TF_SIMD_FALSE, TF_SIMD_TRUE)},
   };
   for(size_t i = 0; i < TF_ARRAY_COUNT(tests); i++) {
-    EXPECT_TRUE(tfSimd3iCmpAllEq(tfSimd3iCmpGt(tests[i].a, tests[i].b), tests[i].test));
+    EXPECT_TRUE(tfS32x3iCmpAllEq(tfS32x3iCmpGt(tests[i].a, tests[i].b), tests[i].test));
   }
 }
 
-UTEST(TF_Simd3, tfSimd3iCmpLt)
+UTEST(TF_Simd3, tfS32x3iCmpLt)
 {
   struct {
-    TSimdInt32x3 a;
-    TSimdInt32x3 b;
-    TSimdInt32x3 test;
+    Tsimd_i32x3_t a;
+    Tsimd_i32x3_t b;
+    Tsimd_i32x3_t test;
   } tests[] = {
       // Less than
-      {tfSimdInt3x32Load(12, 13, 14), tfSimdInt3x32Load(16, 17, 18), tfSimdInt3x32Load(TF_SIMD_TRUE, TF_SIMD_TRUE, TF_SIMD_TRUE)},
+      {tfSimd3x32ILoad(12, 13, 14), tfSimd3x32ILoad(16, 17, 18), tfSimd3x32ILoad(TF_SIMD_TRUE, TF_SIMD_TRUE, TF_SIMD_TRUE)},
       // Mixed less than and equal
-      {tfSimdInt3x32Load(125, -12, 153), tfSimdInt3x32Load(125, 13, 153), tfSimdInt3x32Load(TF_SIMD_FALSE, TF_SIMD_TRUE, TF_SIMD_FALSE)},
+      {tfSimd3x32ILoad(125, -12, 153), tfSimd3x32ILoad(125, 13, 153), tfSimd3x32ILoad(TF_SIMD_FALSE, TF_SIMD_TRUE, TF_SIMD_FALSE)},
       // All elements greater than or equal
-      {tfSimdInt3x32Load(1, 2, 3), tfSimdInt3x32Load(-1, 0, 1), tfSimdInt3x32Load(TF_SIMD_FALSE, TF_SIMD_FALSE, TF_SIMD_FALSE)},
+      {tfSimd3x32ILoad(1, 2, 3), tfSimd3x32ILoad(-1, 0, 1), tfSimd3x32ILoad(TF_SIMD_FALSE, TF_SIMD_FALSE, TF_SIMD_FALSE)},
       // Edge cases:
-      {tfSimdInt3x32Load(INT32_MIN, INT32_MIN, INT32_MIN), tfSimdInt3x32Load(INT32_MAX, INT32_MAX, INT32_MAX), tfSimdInt3x32Load(TF_SIMD_TRUE, TF_SIMD_TRUE, TF_SIMD_TRUE)},
-      {tfSimdInt3x32Load(INT32_MAX, INT32_MAX, INT32_MAX), tfSimdInt3x32Load(INT32_MIN, INT32_MIN, INT32_MIN), tfSimdInt3x32Load(TF_SIMD_FALSE, TF_SIMD_FALSE, TF_SIMD_FALSE)},
+      {tfSimd3x32ILoad(INT32_MIN, INT32_MIN, INT32_MIN), tfSimd3x32ILoad(INT32_MAX, INT32_MAX, INT32_MAX), tfSimd3x32ILoad(TF_SIMD_TRUE, TF_SIMD_TRUE, TF_SIMD_TRUE)},
+      {tfSimd3x32ILoad(INT32_MAX, INT32_MAX, INT32_MAX), tfSimd3x32ILoad(INT32_MIN, INT32_MIN, INT32_MIN), tfSimd3x32ILoad(TF_SIMD_FALSE, TF_SIMD_FALSE, TF_SIMD_FALSE)},
   };
   for(size_t i = 0; i < TF_ARRAY_COUNT(tests); i++) {
-    EXPECT_TRUE(tfSimd3iCmpAllEq(tfSimd3iCmpLt(tests[i].a, tests[i].b), tests[i].test));
+    EXPECT_TRUE(tfS32x3iCmpAllEq(tfS32x3iCmpLt(tests[i].a, tests[i].b), tests[i].test));
   }
 }
 
 
 
-UTEST(TF_Simd3, tfSimd3iCmpEq)
+UTEST(TF_Simd3, tfS32x3iCmpEq)
 {
   struct {
-    TSimdInt32x3 a;
-    TSimdInt32x3 b;
-    TSimdInt32x3 test;
+    Tsimd_i32x3_t a;
+    Tsimd_i32x3_t b;
+    Tsimd_i32x3_t test;
   } tests[] = {
        // Equal elements
-      {tfSimdInt3x32Load(12, 13, 14), tfSimdInt3x32Load(12, 13, 14), tfSimdInt3x32Load(TF_SIMD_TRUE, TF_SIMD_TRUE, TF_SIMD_TRUE)},
+      {tfSimd3x32ILoad(12, 13, 14), tfSimd3x32ILoad(12, 13, 14), tfSimd3x32ILoad(TF_SIMD_TRUE, TF_SIMD_TRUE, TF_SIMD_TRUE)},
       // Mixed equal and unequal elements
-      {tfSimdInt3x32Load(125, -12, 153), tfSimdInt3x32Load(125, -12, 14), tfSimdInt3x32Load(TF_SIMD_TRUE, TF_SIMD_TRUE, TF_SIMD_FALSE)},
+      {tfSimd3x32ILoad(125, -12, 153), tfSimd3x32ILoad(125, -12, 14), tfSimd3x32ILoad(TF_SIMD_TRUE, TF_SIMD_TRUE, TF_SIMD_FALSE)},
       // All elements unequal
-      {tfSimdInt3x32Load(1, 2, 3), tfSimdInt3x32Load(5, 6, 7), tfSimdInt3x32Load(TF_SIMD_FALSE, TF_SIMD_FALSE, TF_SIMD_FALSE)},
+      {tfSimd3x32ILoad(1, 2, 3), tfSimd3x32ILoad(5, 6, 7), tfSimd3x32ILoad(TF_SIMD_FALSE, TF_SIMD_FALSE, TF_SIMD_FALSE)},
       // Edge cases:
-      {tfSimdInt3x32Load(INT32_MIN, INT32_MIN, INT32_MIN), tfSimdInt3x32Load(INT32_MIN, INT32_MIN, INT32_MIN), tfSimdInt3x32Load(TF_SIMD_TRUE, TF_SIMD_TRUE, TF_SIMD_TRUE)},
-      {tfSimdInt3x32Load(INT32_MAX, INT32_MAX, INT32_MAX), tfSimdInt3x32Load(INT32_MAX, INT32_MAX, INT32_MAX), tfSimdInt3x32Load(TF_SIMD_TRUE, TF_SIMD_TRUE, TF_SIMD_TRUE)},
+      {tfSimd3x32ILoad(INT32_MIN, INT32_MIN, INT32_MIN), tfSimd3x32ILoad(INT32_MIN, INT32_MIN, INT32_MIN), tfSimd3x32ILoad(TF_SIMD_TRUE, TF_SIMD_TRUE, TF_SIMD_TRUE)},
+      {tfSimd3x32ILoad(INT32_MAX, INT32_MAX, INT32_MAX), tfSimd3x32ILoad(INT32_MAX, INT32_MAX, INT32_MAX), tfSimd3x32ILoad(TF_SIMD_TRUE, TF_SIMD_TRUE, TF_SIMD_TRUE)},
   };
   for (size_t i = 0; i < TF_ARRAY_COUNT(tests); i++)
   {
-      EXPECT_TRUE(tfSimd3iCmpAllEq(tfSimd3iCmpEq(tests[i].a, tests[i].b), tests[i].test));
+      EXPECT_TRUE(tfS32x3iCmpAllEq(tfS32x3iCmpEq(tests[i].a, tests[i].b), tests[i].test));
   }
 }
 
-UTEST(TF_Simd3, tfSimd3fZero)
+UTEST(TF_Simd3, tfS32x3FZero)
 {
-    TSimdFloat32x3 value = tfSimd3fZero();
-    EXPECT_NEAR(tfSimd3fSelectIndex0(value), 0.0f, DEFAULT_EPSILON);
-    EXPECT_NEAR(tfSimd3fSelectIndex1(value), 0.0f, DEFAULT_EPSILON);
-    EXPECT_NEAR(tfSimd3fSelectIndex2(value), 0.0f, DEFAULT_EPSILON);
+    Tsimd_f32x3_t value = tfS32x3FZero();
+    EXPECT_NEAR(tfS32x3FSelectIndex0(value), 0.0f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x3FSelectIndex1(value), 0.0f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x3FSelectIndex2(value), 0.0f, DEFAULT_EPSILON);
 }
 
 UTEST(TF_Simd3, tfSimdSplat4f)
 {
-    TSimdFloat32x3 value = tfSimd3fSplat(23.f);
-    EXPECT_NEAR(tfSimd3fSelectIndex0(value), 23.f, DEFAULT_EPSILON);
-    EXPECT_NEAR(tfSimd3fSelectIndex1(value), 23.f, DEFAULT_EPSILON);
-    EXPECT_NEAR(tfSimd3fSelectIndex2(value), 23.f, DEFAULT_EPSILON);
-    TSimdFloat32x3 value1 = tfSimd3fSplat(5.1f);
-    EXPECT_NEAR(tfSimd3fSelectIndex0(value1), 5.1f, DEFAULT_EPSILON);
-    EXPECT_NEAR(tfSimd3fSelectIndex1(value1), 5.1f, DEFAULT_EPSILON);
-    EXPECT_NEAR(tfSimd3fSelectIndex2(value1), 5.1f, DEFAULT_EPSILON);
-
-    TSimdFloat32x3 value2 = tfSimdFloat3x32Load(5.1f, 1.0f, 2.0f);
-    EXPECT_NEAR(tfSimd3fSelectIndex0(value2), 5.1f, DEFAULT_EPSILON);
-    EXPECT_NEAR(tfSimd3fSelectIndex1(value2), 1.0f, DEFAULT_EPSILON);
-    EXPECT_NEAR(tfSimd3fSelectIndex2(value2), 2.0f, DEFAULT_EPSILON);
+    Tsimd_f32x3_t value = tfS32x3FSplat(23.f);
+    EXPECT_NEAR(tfS32x3FSelectIndex0(value), 23.f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x3FSelectIndex1(value), 23.f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x3FSelectIndex2(value), 23.f, DEFAULT_EPSILON);
+    Tsimd_f32x3_t value1 = tfS32x3FSplat(5.1f);
+    EXPECT_NEAR(tfS32x3FSelectIndex0(value1), 5.1f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x3FSelectIndex1(value1), 5.1f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x3FSelectIndex2(value1), 5.1f, DEFAULT_EPSILON);
+
+    Tsimd_f32x3_t value2 = tfSimd3x32FLoad(5.1f, 1.0f, 2.0f);
+    EXPECT_NEAR(tfS32x3FSelectIndex0(value2), 5.1f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x3FSelectIndex1(value2), 1.0f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x3FSelectIndex2(value2), 2.0f, DEFAULT_EPSILON);
 }
 
-UTEST(TF_Simd3, tfSimd3iNot) {
+UTEST(TF_Simd3, tfS32x3iNot) {
   struct {
-    TSimdInt32x3 test;
-    TSimdInt32x3 expect;
+    Tsimd_i32x3_t test;
+    Tsimd_i32x3_t expect;
   } tests[] = {
-       {tfSimdInt3x32Load(0xFFFFFFFF, 0x0000FFFF, 0xFFFF0000), tfSimdInt3x32Load(0, 0xFFFF0000, 0x0000FFFF)},
+       {tfSimd3x32ILoad(0xFFFFFFFF, 0x0000FFFF, 0xFFFF0000), tfSimd3x32ILoad(0, 0xFFFF0000, 0x0000FFFF)},
   };   
   for (size_t i = 0; i < TF_ARRAY_COUNT(tests); i++)
   {
-      EXPECT_TRUE(tfSimd3iCmpAllEq(tfSimd3iNot(tests[i].test), tests[i].expect));
+      EXPECT_TRUE(tfS32x3iCmpAllEq(tfS32x3iNot(tests[i].test), tests[i].expect));
   }
 }
 
-UTEST(TF_Simd3, tfSimd3iSelect) {
+UTEST(TF_Simd3, tfS32x3iSelect) {
   struct {
-    TSimdInt32x3 a;
-    TSimdInt32x3 b;
-    TSimdInt32x3 mask;
-    TSimdInt32x3 expect;
+    Tsimd_i32x3_t a;
+    Tsimd_i32x3_t b;
+    Tsimd_i32x3_t mask;
+    Tsimd_i32x3_t expect;
   } tests[] = {
-       {tfSimdInt3x32Load(10, 11, -13), tfSimdInt3x32Load(123, -149, 0), tfSimdInt3x32Load(0,0,0), tfSimdInt3x32Load(10, 11, -13)},
-       {tfSimdInt3x32Load(10, 11, -13), tfSimdInt3x32Load(123, -149, 0), tfSimdInt3x32Load(TF_SIMD_TRUE,0,0), tfSimdInt3x32Load(123, 11, -13)},
-       {tfSimdInt3x32Load(10, 11, -13), tfSimdInt3x32Load(123, -149, 0), tfSimdInt3x32Load(TF_SIMD_TRUE,0,TF_SIMD_TRUE), tfSimdInt3x32Load(123, 11, 0)},
-       {tfSimdInt3x32Load(10, 11, -13), tfSimdInt3x32Load(123, -149, 0), tfSimdInt3x32Load(TF_SIMD_TRUE,TF_SIMD_TRUE,TF_SIMD_TRUE), tfSimdInt3x32Load(123, -149, 0)},
+       {tfSimd3x32ILoad(10, 11, -13), tfSimd3x32ILoad(123, -149, 0), tfSimd3x32ILoad(0,0,0), tfSimd3x32ILoad(10, 11, -13)},
+       {tfSimd3x32ILoad(10, 11, -13), tfSimd3x32ILoad(123, -149, 0), tfSimd3x32ILoad(TF_SIMD_TRUE,0,0), tfSimd3x32ILoad(123, 11, -13)},
+       {tfSimd3x32ILoad(10, 11, -13), tfSimd3x32ILoad(123, -149, 0), tfSimd3x32ILoad(TF_SIMD_TRUE,0,TF_SIMD_TRUE), tfSimd3x32ILoad(123, 11, 0)},
+       {tfSimd3x32ILoad(10, 11, -13), tfSimd3x32ILoad(123, -149, 0), tfSimd3x32ILoad(TF_SIMD_TRUE,TF_SIMD_TRUE,TF_SIMD_TRUE), tfSimd3x32ILoad(123, -149, 0)},
   };   
   for (size_t i = 0; i < TF_ARRAY_COUNT(tests); i++)
   {
-      EXPECT_TRUE(tfSimd3iCmpAllEq(tfSimd3iSelect(tests[i].a, tests[i].b, tests[i].mask), tests[i].expect));
+      EXPECT_TRUE(tfS32x3iCmpAllEq(tfS32x3iSelect(tests[i].a, tests[i].b, tests[i].mask), tests[i].expect));
   }
 }
 
-//UTEST(TF_Simd3, tfSimd3fSelect) {
+//UTEST(TF_Simd3, tfS32x3FSelect) {
 //  struct {
-//    TSimdFloat32x3 a;
-//    TSimdFloat32x3 b;
-//    TSimdFloat32x3 mask;
-//    TSimdFloat32x3 expect;
+//    Tsimd_f32x3_t a;
+//    Tsimd_f32x3_t b;
+//    Tsimd_f32x3_t mask;
+//    Tsimd_f32x3_t expect;
 //  } tests[] = {
 //       {tfSimdFloat4Load(10, 11, -13, 32), tfSimdFloat4Load(123, -149, 0, 12), tfSimdFloat4Load(0,0,0,0), tfSimdFloat4Load(10, 11, -13, 32)},
 //       {tfSimdFloat4Load(10, 11, -13, 32), tfSimdFloat4Load(123, -149, 0, 12), tfSimdFloat4Load(TF_SIMD_TRUE,0,0,0), tfSimdFloat4Load(123, 11, -13, 32)},
@@ -152,7 +152,7 @@ UTEST(TF_Simd3, tfSimd3iSelect) {
 //  };   
 //  for (size_t i = 0; i < TF_ARRAY_COUNT(tests); i++)
 //  {
-//      //EXPECT_TRUE(tfSimd3fCmp(tfSimd3iSelect(tests[i].a, tests[i].b, tests[i].mask), tests[i].expect));
+//      //EXPECT_TRUE(tfS32x3FCmp(tfS32x3iSelect(tests[i].a, tests[i].b, tests[i].mask), tests[i].expect));
 //  }
 //}
 
diff --git a/Forge/tests/Math/TF_SimdFloat4Test.cpp b/Forge/tests/Math/TF_SimdFloat4Test.cpp
deleted file mode 100644
index e7f7bad4eb..0000000000
--- a/Forge/tests/Math/TF_SimdFloat4Test.cpp
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) Contributors to the Open 3D Engine Project.
- * For complete copyright and license terms please see the LICENSE at the root of this distribution.
- *
- * SPDX-License-Identifier: Apache-2.0 OR MIT
- *
- */
-#include "TF_TestMain.h"
-#include "utest.h"
-
-#include "Forge/Math/TF_SimdFloat.h"
-#include "TF_MathUtils.h"
-
-
-UTEST(TSimdFloat4 , tfLoadSimd4F)
-{
-  TSimdFloat4 value = tfLoadSimd4F(123.0,12.f,45.f,12.5f);
-
-  EXPECT_NEAR(tfGetElemSimd4F(value, 0), 123.0f, DEFAULT_EPSILON);
-  EXPECT_NEAR(tfGetElemSimd4F(value, 1), 12.0f, DEFAULT_EPSILON);
-  EXPECT_NEAR(tfGetElemSimd4F(value, 2), 45.0f, DEFAULT_EPSILON);
-  EXPECT_NEAR(tfGetElemSimd4F(value, 3), 12.5f, DEFAULT_EPSILON);
-
-  EXPECT_NEAR(tfGetXSimd4F(value), 123.0f, DEFAULT_EPSILON);
-  EXPECT_NEAR(tfGetYSimd4F(value), 12.0f, DEFAULT_EPSILON);
-  EXPECT_NEAR(tfGetZSimd4F(value), 45.0f, DEFAULT_EPSILON);
-  EXPECT_NEAR(tfGetWSimd4F(value), 12.5f, DEFAULT_EPSILON);
-}
-
-UTEST(TF_Matrix, tfVectorEleAdd4F)
-{
-    struct {
-        TSimdFloat4 a;
-        TSimdFloat4 b;
-        TSimdFloat4 test;
-    } tests[] = {
-        { tfLoadSimd4F(1, 0, 0, 0), tfLoadSimd4F(0, 1, 0, 0), tfLoadSimd4F(1, 1, 0, 0) },       // Original test case
-        { tfLoadSimd4F(2, 3, 4, 5), tfLoadSimd4F(6, 7, 8, 9), tfLoadSimd4F(8, 10, 12, 14) },    // Test with larger numbers
-        { tfLoadSimd4F(-1, 2, -3, 4), tfLoadSimd4F(5, -6, 7, -8), tfLoadSimd4F(4, -4, 4, -4) }, // Test with negative numbers
-    };
-
-    for (size_t i = 0; i < TF_ARRAY_COUNT(tests); i++) {
-        TSimdFloat4 result = tfVectorEleAdd4F(tests[i].a, tests[i].b);
-        debugPrintSimd4F(result);
-        EXPECT_TRUE(tfIsCloseSimd4F(result, tests[i].test, DEFAULT_EPSILON));
-    }
-}
-
-
-
-UTEST(TSimdFloat4, tfGetRowSimd4x4F) {
-    TSimdFloat4x4 mat =
-        tfLoadSimd4x4F(1.0f, 2.0f, 3.0f, 4.0f, 
-                       5.0f, 6.0f, 7.0f, 8.0f, 
-                       9.0f, 10.0f, 11.0f, 12.0f, 
-                       13.0f, 14.0f, 15.0f, 16.0f);
-
-    EXPECT_TRUE(tfIsCloseSimd4F(tfGetRowSimd4x4F(mat, 0), tfLoadSimd4F(1.0f, 2.0f, 3.0f, 4.0f), DEFAULT_EPSILON));
-    EXPECT_TRUE(tfIsCloseSimd4F(tfGetRowSimd4x4F(mat, 1), tfLoadSimd4F(5.0f, 6.0f, 7.0f, 8.0f), DEFAULT_EPSILON));
-    EXPECT_TRUE(tfIsCloseSimd4F(tfGetRowSimd4x4F(mat, 2), tfLoadSimd4F(9.0f, 10.0f, 11.0f, 12.0f), DEFAULT_EPSILON));
-    EXPECT_TRUE(tfIsCloseSimd4F(tfGetRowSimd4x4F(mat, 3), tfLoadSimd4F(13.0f, 14.0f, 15.0f, 16.0f), DEFAULT_EPSILON));
-}
-
-//UTEST(TSimdFloat4, tfIsCloseSimd3x4F) {
-//    TSimdFloat4x3 mat =
-//        tfLoadSimd3x4F(1.0f, 2.0f, 3.0f,  
-//                       5.0f, 6.0f, 7.0f,  
-//                       9.0f, 10.0f, 11.0f,  
-//                       13.0f, 14.0f, 15.0f );
-//
-//    EXPECT_TRUE(tfIsCloseSimd3F(tfGetRowSimd3x4F(mat, 0), tfLoadSimd4F(1.0f, 2.0f, 3.0f, 4.0f), DEFAULT_EPSILON));
-//    EXPECT_TRUE(tfIsCloseSimd3F(tfGetRowSimd3x4F(mat, 1), tfLoadSimd4F(5.0f, 6.0f, 7.0f, 8.0f), DEFAULT_EPSILON));
-//    EXPECT_TRUE(tfIsCloseSimd3F(tfGetRowSimd3x4F(mat, 2), tfLoadSimd4F(9.0f, 10.0f, 11.0f, 12.0f), DEFAULT_EPSILON));
-//    EXPECT_TRUE(tfIsCloseSimd3F(tfGetRowSimd3x4F(mat, 3), tfLoadSimd4F(13.0f, 14.0f, 15.0f, 16.0f), DEFAULT_EPSILON));
-//}
-
-
-#include "Forge/Mem/TF_Memory.h"
-#include "Forge/TF_FileSystem.h"
-#include "Forge/TF_Log.h"
-UTEST_STATE();
-TF_UTEST_MAIN("TF_Simd4")
-
diff --git a/Forge/tests/Math/TF_SimdFloat4x32Test.cpp b/Forge/tests/Math/TF_SimdFloat4x32Test.cpp
index 07e59c29e4..82aab45c84 100644
--- a/Forge/tests/Math/TF_SimdFloat4x32Test.cpp
+++ b/Forge/tests/Math/TF_SimdFloat4x32Test.cpp
@@ -14,9 +14,9 @@
 UTEST(TF_Simd4, tfSimd4iCmpGt)
 {
   struct {
-    TSimdInt32x4 a;
-    TSimdInt32x4 b;
-    TSimdInt32x4 test;
+    Tsimd_i32x4_t a;
+    Tsimd_i32x4_t b;
+    Tsimd_i32x4_t test;
   } tests[] = {
       // ... existing test cases ...
       // Edge cases:
@@ -37,9 +37,9 @@ UTEST(TF_Simd4, tfSimd4iCmpGt)
 UTEST(TF_Simd4, tfSimd4iCmpLt)
 {
   struct {
-    TSimdInt32x4 a;
-    TSimdInt32x4 b;
-    TSimdInt32x4 test;
+    Tsimd_i32x4_t a;
+    Tsimd_i32x4_t b;
+    Tsimd_i32x4_t test;
   } tests[] = {
       // Less than
       {tfSimdInt4x32Load(12, 13, 14, 15), tfSimdInt4x32Load(16, 17, 18, 19), tfSimdInt4x32Load(TF_SIMD_TRUE, TF_SIMD_TRUE, TF_SIMD_TRUE, TF_SIMD_TRUE)},
@@ -61,9 +61,9 @@ UTEST(TF_Simd4, tfSimd4iCmpLt)
 UTEST(TF_Simd4, tfSimd4iCmpEq)
 {
   struct {
-    TSimdInt32x4 a;
-    TSimdInt32x4 b;
-    TSimdInt32x4 test;
+    Tsimd_i32x4_t a;
+    Tsimd_i32x4_t b;
+    Tsimd_i32x4_t test;
   } tests[] = {
        // Equal elements
       {tfSimdInt4x32Load(12, 13, 14, 15), tfSimdInt4x32Load(12, 13, 14, 15), tfSimdInt4x32Load(TF_SIMD_TRUE, TF_SIMD_TRUE, TF_SIMD_TRUE, TF_SIMD_TRUE)},
@@ -77,58 +77,58 @@ UTEST(TF_Simd4, tfSimd4iCmpEq)
   };
   for (size_t i = 0; i < TF_ARRAY_COUNT(tests); i++)
   {
-      EXPECT_TRUE(tfSimd4iCmpAllEq(tfSimd4iCmpEq(tests[i].a, tests[i].b), tests[i].test));
+      EXPECT_TRUE(Tsimd_i32x4_tCmpAllEq(Tsimd_i32x4_tCmpEq(tests[i].a, tests[i].b), tests[i].test));
   }
 }
 
 UTEST(TF_Simd4, tfSimd4fZero)
 {
-    TSimdFloat32x4 value = tfSimd4fZero();
-    EXPECT_NEAR(tfSimd4fSelectIndex0(value), 0.0f, DEFAULT_EPSILON);
-    EXPECT_NEAR(tfSimd4fSelectIndex1(value), 0.0f, DEFAULT_EPSILON);
-    EXPECT_NEAR(tfSimd4fSelectIndex2(value), 0.0f, DEFAULT_EPSILON);
-    EXPECT_NEAR(tfSimd4fSelectIndex3(value), 0.0f, DEFAULT_EPSILON);
+    Tsimd_f32x4_t value = tfS32x4FZero();
+    EXPECT_NEAR(tfS32x4FSelectIndex0(value), 0.0f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x4FSelectIndex1(value), 0.0f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x4FSelectIndex2(value), 0.0f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x4FSelectIndex3(value), 0.0f, DEFAULT_EPSILON);
 }
 
 UTEST(TF_Simd4, tfSimdSplat4f)
 {
-    TSimdFloat32x4 value = tfSimd4fSplat(23.f);
-    EXPECT_NEAR(tfSimd4fSelectIndex0(value), 23.f, DEFAULT_EPSILON);
-    EXPECT_NEAR(tfSimd4fSelectIndex1(value), 23.f, DEFAULT_EPSILON);
-    EXPECT_NEAR(tfSimd4fSelectIndex2(value), 23.f, DEFAULT_EPSILON);
-    EXPECT_NEAR(tfSimd4fSelectIndex3(value), 23.f, DEFAULT_EPSILON);
-    TSimdFloat32x4 value1 = tfSimd4fSplat(5.1f);
-    EXPECT_NEAR(tfSimd4fSelectIndex0(value1), 5.1f, DEFAULT_EPSILON);
-    EXPECT_NEAR(tfSimd4fSelectIndex1(value1), 5.1f, DEFAULT_EPSILON);
-    EXPECT_NEAR(tfSimd4fSelectIndex2(value1), 5.1f, DEFAULT_EPSILON);
-    EXPECT_NEAR(tfSimd4fSelectIndex3(value1), 5.1f, DEFAULT_EPSILON);
+    Tsimd_f32x4_t value = tfS32x4FSplat(23.f);
+    EXPECT_NEAR(tfS32x4FSelectIndex0(value), 23.f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x4FSelectIndex1(value), 23.f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x4FSelectIndex2(value), 23.f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x4FSelectIndex3(value), 23.f, DEFAULT_EPSILON);
+    Tsimd_f32x4_t value1 = tfS32x4FSplat(5.1f);
+    EXPECT_NEAR(tfS32x4FSelectIndex0(value1), 5.1f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x4FSelectIndex1(value1), 5.1f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x4FSelectIndex2(value1), 5.1f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x4FSelectIndex3(value1), 5.1f, DEFAULT_EPSILON);
 
-    TSimdFloat32x4 value2 = tfSimdFloat4x32Load(5.1f, 1.0f, 2.0f, 3.0f);
-    EXPECT_NEAR(tfSimd4fSelectIndex0(value2), 5.1f, DEFAULT_EPSILON);
-    EXPECT_NEAR(tfSimd4fSelectIndex1(value2), 1.0f, DEFAULT_EPSILON);
-    EXPECT_NEAR(tfSimd4fSelectIndex2(value2), 2.0f, DEFAULT_EPSILON);
-    EXPECT_NEAR(tfSimd4fSelectIndex3(value2), 3.0f, DEFAULT_EPSILON);
+    Tsimd_f32x4_t value2 = tfS32x4FLoad(5.1f, 1.0f, 2.0f, 3.0f);
+    EXPECT_NEAR(tfS32x4FSelectIndex0(value2), 5.1f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x4FSelectIndex1(value2), 1.0f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x4FSelectIndex2(value2), 2.0f, DEFAULT_EPSILON);
+    EXPECT_NEAR(tfS32x4FSelectIndex3(value2), 3.0f, DEFAULT_EPSILON);
 }
 
 UTEST(TF_Simd4, tfSimd4iNot) {
   struct {
-    TSimdInt32x4 test;
-    TSimdInt32x4 expect;
+    Tsimd_i32x4_t test;
+    Tsimd_i32x4_t expect;
   } tests[] = {
        {tfSimdInt4x32Load(0xFFFFFFFF, 0x0000FFFF, 0xFFFF0000, 0x000000FF), tfSimdInt4x32Load(0, 0xFFFF0000, 0x0000FFFF, 0xFFFFFF00)},
   };   
   for (size_t i = 0; i < TF_ARRAY_COUNT(tests); i++)
   {
-      EXPECT_TRUE(tfSimd4iCmpAllEq(tfSimd4iNot(tests[i].test), tests[i].expect));
+      EXPECT_TRUE(Tsimd_i32x4_tCmpAllEq(Tsimd_i32x4_tNot(tests[i].test), tests[i].expect));
   }
 }
 
 UTEST(TF_Simd4, tfSimd4iSelect) {
   struct {
-    TSimdInt32x4 a;
-    TSimdInt32x4 b;
-    TSimdInt32x4 mask;
-    TSimdInt32x4 expect;
+    Tsimd_i32x4_t a;
+    Tsimd_i32x4_t b;
+    Tsimd_i32x4_t mask;
+    Tsimd_i32x4_t expect;
   } tests[] = {
        {tfSimdInt4x32Load(10, 11, -13, 32), tfSimdInt4x32Load(123, -149, 0, 12), tfSimdInt4x32Load(0,0,0,0), tfSimdInt4x32Load(10, 11, -13, 32)},
        {tfSimdInt4x32Load(10, 11, -13, 32), tfSimdInt4x32Load(123, -149, 0, 12), tfSimdInt4x32Load(TF_SIMD_TRUE,0,0,0), tfSimdInt4x32Load(123, 11, -13, 32)},
@@ -143,10 +143,10 @@ UTEST(TF_Simd4, tfSimd4iSelect) {
 
 //UTEST(TF_Simd4, tfSimd4fSelect) {
 //  struct {
-//    TSimdFloat32x4 a;
-//    TSimdFloat32x4 b;
-//    TSimdFloat32x4 mask;
-//    TSimdFloat32x4 expect;
+//    Tsimd_f32x4_t a;
+//    Tsimd_f32x4_t b;
+//    Tsimd_f32x4_t mask;
+//    Tsimd_f32x4_t expect;
 //  } tests[] = {
 //       {tfSimdFloat4Load(10, 11, -13, 32), tfSimdFloat4Load(123, -149, 0, 12), tfSimdFloat4Load(0,0,0,0), tfSimdFloat4Load(10, 11, -13, 32)},
 //       {tfSimdFloat4Load(10, 11, -13, 32), tfSimdFloat4Load(123, -149, 0, 12), tfSimdFloat4Load(TF_SIMD_TRUE,0,0,0), tfSimdFloat4Load(123, 11, -13, 32)},
diff --git a/Forge/tests/Math/TF_SimdFloat4x4Test.cpp b/Forge/tests/Math/TF_SimdFloat4x4Test.cpp
index 78fa023e9f..832cf3f22d 100644
--- a/Forge/tests/Math/TF_SimdFloat4x4Test.cpp
+++ b/Forge/tests/Math/TF_SimdFloat4x4Test.cpp
@@ -34,7 +34,7 @@ UTEST(TF_SimdFloat4x4, tfMatTranpose4x4F)
 
     for (size_t i = 0; i < TF_ARRAY_COUNT(tests); i++)
     {
-        TSimdFloat4x4 result = tfMatTranpose4x4F(tests[i].a);
+        TSimdFloat4x4 result = tfTransposeSimd4x4F(tests[i].a);
         debugPrintSimd4x4F(result);
         EXPECT_TRUE(tfIsCloseSimd4x4F(result, tests[i].test, DEFAULT_EPSILON));
     }
@@ -68,7 +68,7 @@ UTEST(TF_SimdFloat4x4, tfMatMul4x4F_4x4F)
 
     for (size_t i = 0; i < TF_ARRAY_COUNT(tests); i++)
     {
-        TSimdFloat4x4 result = tfMatMul4x4F_4x4F(tests[i].a, tests[i].b);
+        TSimdFloat4x4 result = tfMulSimd4x4F_4x4F(tests[i].a, tests[i].b);
         debugPrintSimd4x4F(result);
         EXPECT_TRUE(tfIsCloseSimd4x4F(result, tests[i].test, DEFAULT_EPSILON));
     }
@@ -137,7 +137,7 @@ UTEST(TF_SimdFloat4x4, tfVectorMul4x4F)
     };
     for (size_t i = 0; i < TF_ARRAY_COUNT(tests); i++)
     {
-        TSimdFloat4 result = tfVectorMul4x4F(tests[i].a, tests[i].b);
+        TSimdFloat4 result = tfVectorMulSimd4x4F(tests[i].a, tests[i].b);
         debugPrintSimd4F(result);
         EXPECT_TRUE(tfIsCloseSimd4F(result, tests[i].test, DEFAULT_EPSILON));
     }