Skip to content

Commit

Permalink
Better value range test
Browse files Browse the repository at this point in the history
  • Loading branch information
keijiro committed Jan 26, 2025
1 parent 3554852 commit e89abf1
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 89 deletions.
166 changes: 88 additions & 78 deletions Assets/Test/ValueRangeTest.compute
Original file line number Diff line number Diff line change
@@ -1,121 +1,131 @@
#pragma kernel Aggregate
#pragma kernel Gradient1D
#pragma kernel Classic2D
#pragma kernel Classic3D
#pragma kernel Simplex2D
#pragma kernel Simplex3D

#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Macros.hlsl"
#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Random.hlsl"
#include "Packages/jp.keijiro.noiseshader/Shader/Noise1D.hlsl"
#include "Packages/jp.keijiro.noiseshader/Shader/ClassicNoise2D.hlsl"
#include "Packages/jp.keijiro.noiseshader/Shader/ClassicNoise3D.hlsl"
#include "Packages/jp.keijiro.noiseshader/Shader/SimplexNoise2D.hlsl"
#include "Packages/jp.keijiro.noiseshader/Shader/SimplexNoise3D.hlsl"

// A single iteration of Bob Jenkins' One-At-A-Time hashing algorithm.
uint JenkinsHash(uint x)
{
x += (x << 10u);
x ^= (x >> 6u);
x += (x << 3u);
x ^= (x >> 11u);
x += (x << 15u);
return x;
}

// Construct a float with half-open range [0, 1) using low 23 bits.
// All zeros yields 0, all ones yields the next smallest representable value below 1.
float ConstructFloat(int m) {
const int ieeeMantissa = 0x007FFFFF; // Binary FP32 mantissa bitmask
const int ieeeOne = 0x3F800000; // 1.0 in FP32 IEEE

m &= ieeeMantissa; // Keep only mantissa bits (fractional part)
m |= ieeeOne; // Add fractional part to 1.0
static const uint TotalIteration = 1u << 30;
static const uint ThreadPerGroup = 64u;
static const uint IterationPerThread = 512u;
static const uint ThreadGroupCount = TotalIteration / ThreadPerGroup / IterationPerThread;
static const float ParameterRange = 300;

float f = asfloat(m); // Range [1, 2)
return f - 1; // Range [0, 1)
float RandomFloat(uint thread_index, uint iteration)
{
uint seed = thread_index * IterationPerThread + iteration;
float x = GenerateHashedRandomFloat(seed);
return (x * 2 - 1) * ParameterRange;
}

float ConstructFloat(uint m)
float2 RandomFloat2(uint thread_index, uint iteration)
{
return ConstructFloat(asint(m));
uint seed = (thread_index * IterationPerThread + iteration) * 2u;
float x = GenerateHashedRandomFloat(seed);
float y = GenerateHashedRandomFloat(seed + 1u);
return (float2(x, y) * 2 - 1) * ParameterRange;
}

// Pseudo-random value in half-open range [0, 1). The distribution is reasonably uniform.
// Ref: https://stackoverflow.com/a/17479300
float GenerateHashedRandomFloat(uint x)
float3 RandomFloat3(uint thread_index, uint iteration)
{
return ConstructFloat(JenkinsHash(x));
uint seed = (thread_index * IterationPerThread + iteration) * 3u;
float x = GenerateHashedRandomFloat(seed);
float y = GenerateHashedRandomFloat(seed + 1u);
float z = GenerateHashedRandomFloat(seed + 2u);
return (float3(x, y, z) * 2 - 1) * ParameterRange;
}

float2 RandomFloat2(uint thread_id, uint index)
RWStructuredBuffer<float> _AccBuffer;
RWStructuredBuffer<float> _OutBuffer;

groupshared float shared_array[ThreadPerGroup];

float GetSharedArrayMax()
{
uint offset = thread_id << 26;
float x = GenerateHashedRandomFloat(offset + index * 2u + 0u);
float y = GenerateHashedRandomFloat(offset + index * 2u + 1u);
return float2(x, y) * 600 - 300;
float temp = shared_array[0];
for (uint i = 1; i < ThreadPerGroup; i++)
temp = max(temp, shared_array[i]);
return temp;
}

float3 RandomFloat3(uint thread_id, uint index)
[numthreads(1, 1, 1)]
void Aggregate()
{
uint offset = thread_id << 26;
float x = GenerateHashedRandomFloat(offset + index * 4u + 0u);
float y = GenerateHashedRandomFloat(offset + index * 4u + 1u);
float z = GenerateHashedRandomFloat(offset + index * 4u + 2u);
return float3(x, y, z) * 600 - 300;
float temp = _AccBuffer[0];
for (uint i = 1; i < ThreadGroupCount; i++)
temp = max(temp, _AccBuffer[i]);
_OutBuffer[0] = temp;
}

RWStructuredBuffer<float> _Output;

#define THREAD_COUNT 64
#define ITERATION (1u << 24)

groupshared float shared_array[THREAD_COUNT];

float GetSharedArrayMax()
[numthreads(ThreadPerGroup, 1, 1)]
void Gradient1D(uint group_id : SV_GroupID,
uint group_thread_id: SV_GroupThreadID,
uint dispatch_id : SV_DispatchThreadID)
{
float maxval = shared_array[0];
for (uint i = 1; i < THREAD_COUNT; i++)
maxval = max(maxval, shared_array[i]);
return maxval;
float temp = 0;
for (uint i = 0; i < IterationPerThread; i++)
temp = max(temp, abs(GradientNoise(RandomFloat(dispatch_id, i))));
shared_array[group_thread_id] = temp;
GroupMemoryBarrierWithGroupSync();
if (group_thread_id == 0) _AccBuffer[group_id] = GetSharedArrayMax();
}

[numthreads(THREAD_COUNT, 1, 1)]
void Classic2D(uint id : SV_DispatchThreadID)
[numthreads(ThreadPerGroup, 1, 1)]
void Classic2D(uint group_id : SV_GroupID,
uint group_thread_id: SV_GroupThreadID,
uint dispatch_id : SV_DispatchThreadID)
{
float maxval = 0;
for (uint i = 0; i < ITERATION; i++)
maxval = max(maxval, abs(ClassicNoise(RandomFloat2(id, i))));
shared_array[id] = maxval;
float2 temp = 0;
for (uint i = 0; i < IterationPerThread; i++)
temp = max(temp, abs(ClassicNoise(RandomFloat2(dispatch_id, i))));
shared_array[group_thread_id] = max(temp.x, temp.y);
GroupMemoryBarrierWithGroupSync();
if (id.x == 0) _Output[0] = GetSharedArrayMax();
if (group_thread_id == 0) _AccBuffer[group_id] = GetSharedArrayMax();
}

[numthreads(THREAD_COUNT, 1, 1)]
void Classic3D(uint id : SV_DispatchThreadID)
[numthreads(ThreadPerGroup, 1, 1)]
void Classic3D(uint group_id : SV_GroupID,
uint group_thread_id: SV_GroupThreadID,
uint dispatch_id : SV_DispatchThreadID)
{
float maxval = 0;
for (uint i = 0; i < ITERATION; i++)
maxval = max(maxval, abs(ClassicNoise(RandomFloat3(id, i))));
shared_array[id] = maxval;
float3 temp = 0;
for (uint i = 0; i < IterationPerThread; i++)
temp = max(temp, abs(ClassicNoise(RandomFloat3(dispatch_id, i))));
shared_array[group_thread_id] = max(max(temp.x, temp.y), temp.z);
GroupMemoryBarrierWithGroupSync();
if (id.x == 0) _Output[0] = GetSharedArrayMax();
if (group_thread_id == 0) _AccBuffer[group_id] = GetSharedArrayMax();
}

[numthreads(THREAD_COUNT, 1, 1)]
void Simplex2D(uint id : SV_DispatchThreadID)
[numthreads(ThreadPerGroup, 1, 1)]
void Simplex2D(uint group_id : SV_GroupID,
uint group_thread_id: SV_GroupThreadID,
uint dispatch_id : SV_DispatchThreadID)
{
float maxval = 0;
for (uint i = 0; i < ITERATION; i++)
maxval = max(maxval, abs(SimplexNoise(RandomFloat2(id, i))));
shared_array[id] = maxval;
float2 temp = 0;
for (uint i = 0; i < IterationPerThread; i++)
temp = max(temp, abs(SimplexNoise(RandomFloat2(dispatch_id, i))));
shared_array[group_thread_id] = max(temp.x, temp.y);
GroupMemoryBarrierWithGroupSync();
if (id.x == 0) _Output[0] = GetSharedArrayMax();
if (group_thread_id == 0) _AccBuffer[group_id] = GetSharedArrayMax();
}

[numthreads(THREAD_COUNT, 1, 1)]
void Simplex3D(uint id : SV_DispatchThreadID)
[numthreads(ThreadPerGroup, 1, 1)]
void Simplex3D(uint group_id : SV_GroupID,
uint group_thread_id: SV_GroupThreadID,
uint dispatch_id : SV_DispatchThreadID)
{
float maxval = 0;
for (uint i = 0; i < ITERATION; i++)
maxval = max(maxval, abs(SimplexNoise(RandomFloat3(id, i))));
shared_array[id] = maxval;
float3 temp = 0;
for (uint i = 0; i < IterationPerThread; i++)
temp = max(temp, abs(SimplexNoise(RandomFloat3(dispatch_id, i))));
shared_array[group_thread_id] = max(max(temp.x, temp.y), temp.z);
GroupMemoryBarrierWithGroupSync();
if (id.x == 0) _Output[0] = GetSharedArrayMax();
if (group_thread_id == 0) _AccBuffer[group_id] = GetSharedArrayMax();
}
34 changes: 24 additions & 10 deletions Assets/Test/ValueRangeTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,38 @@ class ValueRangeTest : MonoBehaviour
{
[SerializeField, HideInInspector] ComputeShader _compute = null;

void RunTest(int kernel, string name)
const uint TotalIteration = 1u << 30;
const uint ThreadPerGroup = 64u;
const uint IterationPerThread = 512u;
const uint ThreadGroupCount = TotalIteration / ThreadPerGroup / IterationPerThread;

unsafe GraphicsBuffer AllocBuffer<T>(int count) where T : unmanaged
=> new GraphicsBuffer(GraphicsBuffer.Target.Structured, count, sizeof(T));

void RunTest(int kernel, string name,
GraphicsBuffer accBuffer,
GraphicsBuffer recvBuffer)
{
using var buffer = new ComputeBuffer(1, sizeof(float));
_compute.SetBuffer(kernel, "_AccBuffer", accBuffer);
_compute.Dispatch(kernel, (int)ThreadGroupCount, 1, 1);

_compute.SetBuffer(kernel, "_Output", buffer);
_compute.Dispatch(kernel, 1, 1, 1);
_compute.SetBuffer(0, "_AccBuffer", accBuffer);
_compute.SetBuffer(0, "_OutBuffer", recvBuffer);
_compute.Dispatch(0, 1, 1, 1);

var read = new float[1];
buffer.GetData(read);

recvBuffer.GetData(read);
Debug.Log($"{name} max value = {read[0]}");
}

void Start()
{
RunTest(0, "2D classic perlin noise");
RunTest(1, "3D classic perlin noise");
RunTest(2, "2D simplex noise");
RunTest(3, "3d simplex noise");
using var accBuffer = AllocBuffer<float>((int)ThreadGroupCount);
using var recvBuffer = AllocBuffer<float>(1);
RunTest(1, "1D gradient noise", accBuffer, recvBuffer);
RunTest(2, "2D classic perlin noise", accBuffer, recvBuffer);
RunTest(3, "3D classic perlin noise", accBuffer, recvBuffer);
RunTest(4, "2D simplex noise", accBuffer, recvBuffer);
RunTest(5, "3d simplex noise", accBuffer, recvBuffer);
}
}
2 changes: 1 addition & 1 deletion ProjectSettings/ProjectSettings.asset
Original file line number Diff line number Diff line change
Expand Up @@ -670,7 +670,7 @@ PlayerSettings:
tvOS: 1
incrementalIl2cppBuild: {}
suppressCommonWarnings: 0
allowUnsafeCode: 0
allowUnsafeCode: 1
useDeterministicCompilation: 1
additionalIl2CppArgs:
scriptingRuntimeVersion: 1
Expand Down

0 comments on commit e89abf1

Please sign in to comment.