Skip to content

Commit

Permalink
start on test cases and fix broken abs
Browse files Browse the repository at this point in the history
Signed-off-by: Michael Pollind <[email protected]>
  • Loading branch information
pollend committed Jul 30, 2024
1 parent f43a908 commit 6adaf0a
Show file tree
Hide file tree
Showing 12 changed files with 274 additions and 58 deletions.
4 changes: 2 additions & 2 deletions Forge/Math/Internal/SimdTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ struct TSimdFloat1x4 {
TSimdFloat32x4 mCol0;
};


struct TSimdFloat2x4 {
TSimdFloat32x4 mCol0;
TSimdFloat32x4 mCol1;
Expand All @@ -101,7 +100,7 @@ struct TSimdFloat4x4

struct TSimdFloat3
{
TSimdFloat32x3 mValue;
TSimdFloat32x3 mRow;
};

struct TSimdFloat1x3
Expand Down Expand Up @@ -140,4 +139,5 @@ struct TSimdFloat2x2
};



#endif
59 changes: 41 additions & 18 deletions Forge/Math/TF_Matrix.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
#include "Forge/Math/Internal/SimdTypes.h"
#include "Internal/SimdTypes.h"
#include "Internal/ScalarTypes.h"

#include "TF_Simd4x32.h"
#include "TF_Simd3x32.h"
#include "TF_Simd2x32.h"

#include "Forge/TF_Log.h"

static inline TSimdFloat4x4 tfMatMul4x4F_4x4F(TSimdFloat4x4 a0, TSimdFloat4x4 a1);
static inline TSimdFloat3x4 tfMatMul4x4F_3x4F(TSimdFloat4x4 a0, TSimdFloat3x4 a1);
Expand All @@ -15,31 +19,50 @@ static inline TSimdFloat3x3 tfMatrixMul(TSimdFloat3x3 a0, TSimdFloat3x3 a1);
static inline TSimdFloat3x3 tfMatrixMul(TSimdFloat3x3 a0, TSimdFloat2x3 a1);
static inline TSimdFloat3x3 tfMatrixMul(TSimdFloat3x3 a0, TSimdFloat1x3 a1);

static inline TSimdFloat4x4 tfVectorMul4x4F(TSimdFloat4x4 a0, TSimdFloat4 a1);
static inline TSimdFloat4x4 tfVectorMul3x3F(TSimdFloat3x3 a0, TSimdFloat3 a1);
static inline TSimdFloat2x2 tfVectorMul2x2F(TSimdFloat2x2 a0, TSimdFloat2 a1);
static inline TSimdFloat4 tfVectorMul4x4F(const TSimdFloat4x4* a0, const TSimdFloat4* a1);
static inline TSimdFloat3 tfVectorMul3x3F(const TSimdFloat3x3* a0, const TSimdFloat3* a1);
static inline TSimdFloat2 tfVectorMul2x2F(const TSimdFloat2x2* a0, const TSimdFloat2* a1);

static inline TSimdFloat2 tfVectorEleAdd2F(TSimdFloat2 a0, TSimdFloat2 a1);
static inline TSimdFloat2 tfVectorEleAdd3F(TSimdFloat2 a0, TSimdFloat2 a1);
static inline TSimdFloat2 tfVectorEleAdd4F(TSimdFloat2 a0, TSimdFloat2 a1);



// conviences if cpp is avaliable
#ifdef __cplusplus
static inline TSimdFloat4x4 operator*(const TSimdFloat4x4& a, const TSimdFloat4x4& b) { }
// static inline TSimdFloat4x4 operator*(const TSimdFloat4x4& a, const TSimdFloat4x4& b) { }
#endif

/**
* Multiplication of a 4x4 matrix and a 4 element vector
**/
static inline TSimdFloat4x4 tfVectorMul(TSimdFloat4x4 a0, TSimdFloat4 a1) {
TSimdFloat32x4 xxxx = tfSimd4fSplatIndex0(a1.mRow);
TSimdFloat32x4 yyyy = tfSimd4fSplatIndex1(a1.mRow);
TSimdFloat32x4 zzzz = tfSimd4fSplatIndex2(a1.mRow);
TSimdFloat32x4 wwww = tfSimd4fSplatIndex3(a1.mRow);
TSimdFloat32x4 res = tfSimd4fMul(a0.mCol0, xxxx);
res = tfSimd4fMadd(a0.mCol1, yyyy, res);
res = tfSimd4fMadd(a0.mCol2, zzzz, res);
res = tfSimd4fMadd(a0.mCol3, wwww, res);
return {res};
static inline TSimdFloat4 tfVectorMul4x4F(const TSimdFloat4x4* a0, const TSimdFloat4* a1)
{
ASSERT(a0);
ASSERT(a1);
TSimdFloat32x4 xxxx = tfSimd4fSplatIndex0(a1->mRow);
TSimdFloat32x4 yyyy = tfSimd4fSplatIndex1(a1->mRow);
TSimdFloat32x4 zzzz = tfSimd4fSplatIndex2(a1->mRow);
TSimdFloat32x4 wwww = tfSimd4fSplatIndex3(a1->mRow);
TSimdFloat32x4 res = tfSimd4fMul(a0->mCol0, xxxx);
res = tfSimd4fMadd(a0->mCol1, yyyy, res);
res = tfSimd4fMadd(a0->mCol2, zzzz, res);
res = tfSimd4fMadd(a0->mCol3, wwww, res);
return { res };
}




/**
* Multiplication of a 3x3 matrix and a 4 element vector
**/
static inline TSimdFloat3 tfVectorMul3x3F(const TSimdFloat3x3* a0, const TSimdFloat3* a1) {
ASSERT(a0);
ASSERT(a1);
TSimdFloat32x3 xxxx = tfSimd3fSplatIndex0(a1->mRow);
TSimdFloat32x3 yyyy = tfSimd3fSplatIndex1(a1->mRow);
TSimdFloat32x3 zzzz = tfSimd3fSplatIndex2(a1->mRow);
TSimdFloat32x3 res = tfSimd3fMul(a0->mCol0, xxxx);
res = tfSimd3fMadd(a0->mCol1, yyyy, res);
res = tfSimd3fMadd(a0->mCol2, zzzz, res);
return {res};
}
33 changes: 0 additions & 33 deletions Forge/Math/TF_Simd.h

This file was deleted.

3 changes: 2 additions & 1 deletion Forge/Math/TF_Simd2x32.h
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,8 @@ inline TSimdFloat32x2 tfSimd2fDiv(TSimdFloat32x2 arg1, TSimdFloat32x2 arg2) {

inline TSimdFloat32x2 tfSimd2fAbs(TSimdFloat32x2 value) {
#if defined(TF_FEATURE_CPU_SSE)
return _mm_and_ps(value, _mm_set1_ps((float)(0x7FFFFFFF)));
const TSimdFloat32x4 signMask = tfSimd2iToSimd2f(tfSimd2iSplat(0x7FFFFFFF));
return _mm_and_ps(value, signMask );
#elif defined(TF_FEATURE_CPU_NEON)
return vabs_f32(value);
#else
Expand Down
3 changes: 2 additions & 1 deletion Forge/Math/TF_Simd3x32.h
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,8 @@ inline TSimdFloat32x3 tfSimd3fDiv(TSimdFloat32x3 arg1, TSimdFloat32x3 arg2) {

inline TSimdFloat32x3 tfSimd3fAbs(TSimdFloat32x3 value) {
#if defined(TF_FEATURE_CPU_SSE)
return _mm_and_ps(value, _mm_set1_ps((float)(0x7FFFFFFF)));
const TSimdFloat32x4 signMask = tfSimd3iToSimd3f(tfSimd3iSplat(0x7FFFFFFF));
return _mm_and_ps(value, signMask );
#elif defined(TF_FEATURE_CPU_NEON)
return vabsq_f32(value);
#else
Expand Down
38 changes: 37 additions & 1 deletion Forge/Math/TF_Simd4x32.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#define TF_SIMD_4_32_H

#include "Forge/Math/Internal/SimdTypes.h"
#include "Forge/TF_Log.h"

inline TSimdFloat32x4 tfSimd4fSplat(float value);
inline TSimdInt32x4 tfSimd4iSplat(int32_t value);
Expand Down Expand Up @@ -81,6 +82,8 @@ inline TSimdFloat32x4 tfSimd4fCmpLtEq(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2);

inline bool tfSimd4iCmpAllEq(TSimdInt32x4 arg1, TSimdInt32x4 arg2);
inline bool tfSimd4fCmpAllEq(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2);
inline bool tfSimd4fCmpAllLt(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2);
inline bool tfSimd4fCmpAllGt(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2);

// ----------------------------------------------------------------
// --------------------- Implementaion ----------------------------
Expand Down Expand Up @@ -434,7 +437,8 @@ inline TSimdFloat32x4 tfSimd4fDiv(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {

inline TSimdFloat32x4 tfSimd4fAbs(TSimdFloat32x4 value) {
#if defined(TF_FEATURE_CPU_SSE)
return _mm_and_ps(value, _mm_set1_ps((float)(0x7FFFFFFF)));
const TSimdFloat32x4 signMask = tfSimd4iToSimd4f(tfSimd4iSplat(0x7FFFFFFF));
return _mm_and_ps(value, signMask );
#elif defined(TF_FEATURE_CPU_NEON)
return vabsq_f32(value);
#else
Expand Down Expand Up @@ -714,6 +718,38 @@ inline TSimdInt32x4 tfSimd4iCmpLtEq(TSimdInt32x4 arg1, TSimdInt32x4 arg2) {

}

inline bool tfSimd4fCmpAllLt(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
#if defined(TF_FEATURE_CPU_SSE)
TSimdFloat32x4 compare = tfSimd4fCmpLt(arg1, arg2);
return (_mm_movemask_ps(compare) & 0xf) == 0xf;
#else
for(int i = 0; i < 4; i++) {
if (arg1.v[i] >= arg2.v[i])
{
return false;
}
}
return true;
#endif

}

inline bool tfSimd4fCmpAllGt(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
#if defined(TF_FEATURE_CPU_SSE)
TSimdFloat32x4 compare = tfSimd4fCmpGt(arg1, arg2);
return (_mm_movemask_ps(compare) & 0xf) == 0xf;
#else
for(int i = 0; i < 4; i++) {
if (arg1.v[i] <= arg2.v[i])
{
return false;
}
}
return true;
#endif

}

inline bool tfSimd4fCmpAllEq(TSimdFloat32x4 arg1, TSimdFloat32x4 arg2) {
#if defined(TF_FEATURE_CPU_SSE)
TSimdFloat32x4 compare = tfSimd4fCmpEq(arg1, arg2);
Expand Down
59 changes: 59 additions & 0 deletions Forge/Math/TF_SimdCommon.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#pragma once
#ifndef TF_SIMD_COMMON_H
#define TF_SIMD_COMMON_H

#include "Internal/SimdTypes.h"

#include "TF_Simd4x32.h"
#include "TF_Simd3x32.h"
#include "TF_Simd2x32.h"

#include "Forge/TF_Log.h"


static inline TSimdFloat4 tfLoadSimd4F(float x, float y, float z, float w);
static inline TSimdFloat4x4 tfLoadSimd4x4F(
float m00, float m01, float m02, float m03,
float m10, float m11, float m12, float m13,
float m20, float m21, float m22, float m23,
float m30, float m31, float m32, float m33
);

static inline bool tfCmpEqSimd4F(TSimdFloat4 a, TSimdFloat4 b, float epsilon);
static inline bool tfCmpEqSimd4x4F(TSimdFloat4x4 a, TSimdFloat4x4 b, float epsilon);

static inline bool tfCmpEqSimd4F(TSimdFloat4 a, TSimdFloat4 b, float epsilon) {
const TSimdFloat32x4 diff = tfSimd4fSub(a.mRow, b.mRow);
return tfSimd4fCmpAllLt(tfSimd4fAbs(diff), tfSimd4fSplat(epsilon));
}

static inline bool tfCmpEqSimd4x4F(TSimdFloat4x4 a, TSimdFloat4x4 b, float epsilon) {
return
tfSimd4fCmpAllLt(tfSimd4fAbs(tfSimd4fSub({a.mCol0}, {b.mCol0})), tfSimd4fSplat(epsilon)) &&
tfSimd4fCmpAllLt(tfSimd4fAbs(tfSimd4fSub({a.mCol1}, {b.mCol1})), tfSimd4fSplat(epsilon)) &&
tfSimd4fCmpAllLt(tfSimd4fAbs(tfSimd4fSub({a.mCol2}, {b.mCol2})), tfSimd4fSplat(epsilon)) &&
tfSimd4fCmpAllLt(tfSimd4fAbs(tfSimd4fSub({a.mCol3}, {b.mCol3})), tfSimd4fSplat(epsilon));
}


static inline TSimdFloat4 tfLoadSimd4F(float x, float y, float z, float w) {
TSimdFloat4 res;
res.mRow = tfSimdFloat4Load(x,y,z,w);
return res;
}

static inline TSimdFloat4x4 tfLoadSimd4x4F(
float m00, float m01, float m02, float m03,
float m10, float m11, float m12, float m13,
float m20, float m21, float m22, float m23,
float m30, float m31, float m32, float m33
) {
TSimdFloat4x4 result;
result.mCol0 = tfSimdFloat4Load(m00, m10, m20, m30);
result.mCol1 = tfSimdFloat4Load(m01, m11, m21, m31);
result.mCol2 = tfSimdFloat4Load(m02, m12, m22, m32);
result.mCol3 = tfSimdFloat4Load(m03, m13, m23, m33);
return result;
}

#endif
1 change: 0 additions & 1 deletion Forge/tests/BUCK
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ cxx_test(
visibility = ['PUBLIC']
)


cxx_test(
name = 'TF_StringTest',
srcs = [
Expand Down
33 changes: 33 additions & 0 deletions Forge/tests/Math/BUCK
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,36 @@ cxx_test(
headers = math_utils_header,
visibility = ['PUBLIC']
)

cxx_test(
name = 'TF_Matrix',
srcs = [
'TF_MatrixTest.cpp',
],
link_style = "static",
deps = [
"@tf//:TF",
"@tf//tests:TF_utest"
],
header_namespace = "",
headers = math_utils_header,
visibility = ['PUBLIC']
)

cxx_test(
name = 'TF_Matrix_Scalar',
srcs = [
'TF_MatrixTest.cpp',
],
link_style = "static",
preprocessor_flags = [
"-DTF_FEATURE_CPU_SCALAR=1"
],
deps = [
"@tf//:TF",
"@tf//tests:TF_utest"
],
header_namespace = "",
headers = math_utils_header,
visibility = ['PUBLIC']
)
2 changes: 2 additions & 0 deletions Forge/tests/Math/TF_MathUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,5 @@
EXPECT_EQ(__a.getW(), __b.getW()); \
} while(false);



Loading

0 comments on commit 6adaf0a

Please sign in to comment.