Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Raytracing pipeline demo #173

Open
wants to merge 22 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions 28_FFTBloom/app_resources/fft_common.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,6 @@ struct PreloadedAccessorCommonBase
NBL_CONSTEXPR_STATIC_INLINE uint16_t ElementsPerInvocation = FFTParameters::ElementsPerInvocation;
NBL_CONSTEXPR_STATIC_INLINE uint16_t WorkgroupSize = FFTParameters::WorkgroupSize;
NBL_CONSTEXPR_STATIC_INLINE uint16_t TotalSize = FFTParameters::TotalSize;

void memoryBarrier()
{
// Preloaded Accessors don't access any memory in this stage, so we don't need to do anything here
}
};

struct PreloadedAccessorBase : PreloadedAccessorCommonBase
Expand Down
37 changes: 37 additions & 0 deletions 71_RayTracingPipeline/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
include(common RESULT_VARIABLE RES)
if(NOT RES)
message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory")
endif()

if(NBL_BUILD_IMGUI)
set(NBL_INCLUDE_SERACH_DIRECTORIES
"${CMAKE_CURRENT_SOURCE_DIR}/include"
)

list(APPEND NBL_LIBRARIES
imtestengine
"${NBL_EXT_IMGUI_UI_LIB}"
)

nbl_create_executable_project("" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}")

if(NBL_EMBED_BUILTIN_RESOURCES)
set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData)
set(RESOURCE_DIR "app_resources")

get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE)
get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE)
get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE)

file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*")
foreach(RES_FILE ${BUILTIN_RESOURCE_FILES})
LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}")
endforeach()

ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}")

LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_})
endif()
endif()


11 changes: 11 additions & 0 deletions 71_RayTracingPipeline/Readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Vulkan Ray Tracing Pipeline Demo
![finalResult](docs/Images/final_result.png)

The scene is rendered using two ray. The first ray(primary ray) is shoot from the camera/generation shader and the second ray(occlusion ray) is shoot from the closest hit shader.
To test intersection shader, the acceleration structures consist of two types of geometries. The cubes are stored as triangle geometries while the spheres are stored as procedural geometries.
To test callable shader, we calculate lighting information of different type in its own callable shader

## Shader Table Layout
![shaderBindingTable](docs/Images/shader_binding_table.png)


164 changes: 164 additions & 0 deletions 71_RayTracingPipeline/app_resources/common.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
#ifndef RQG_COMMON_HLSL
#define RQG_COMMON_HLSL

#include "nbl/builtin/hlsl/cpp_compat.hlsl"

NBL_CONSTEXPR uint32_t WorkgroupSize = 16;

struct Material
{
float32_t3 ambient;
float32_t3 diffuse;
float32_t3 specular;
float32_t shininess;
float32_t dissolve; // 1 == opaque; 0 == fully transparent
uint32_t illum; // illumination model (see http://www.fileformat.info/format/material/)
Comment on lines +10 to +15

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd definitely use either float16_t3 or unorm rgb10a2 packed into uint32_t for the ambient, diffuse and specular.

And 18 bits for shininess, 10 bits for dissolve (I'd probably call it alpha instead) and remaining 4 for illum out of a single uint32_t (and order fields from largest alignment to smallest to avoid padding)

};

struct SProceduralGeomInfo
{
Material material;
float32_t3 center;
float32_t radius;
};

struct STriangleGeomInfo
{
Material material;
uint64_t vertexBufferAddress;
uint64_t indexBufferAddress;

uint32_t vertexStride : 26;
uint32_t objType: 3;
uint32_t indexType : 2; // 16 bit, 32 bit or none
uint32_t smoothNormals : 1; // flat for cube, rectangle, disk

};

enum E_GEOM_TYPE : uint16_t
{
EGT_TRIANGLES,
EGT_PROCEDURAL,
EGT_COUNT
};

enum E_RAY_TYPE : uint16_t
{
ERT_PRIMARY, // Ray shoot from camera
ERT_OCCLUSION,
ERT_COUNT
};

enum E_MISS_TYPE : uint16_t
{
EMT_PRIMARY,
EMT_OCCLUSION,
EMT_COUNT
};

enum E_LIGHT_TYPE : uint16_t
{
ELT_DIRECTIONAL,
ELT_POINT,
ELT_SPOT,
ELT_COUNT
};

struct Light
{
float32_t3 direction;
float32_t3 position;
float32_t outerCutoff;
uint16_t type;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't use 16-bit types in PushConstants. they require a feature to be enabled and no AMD GPU supports them afaik



#ifndef __HLSL_VERSION
bool operator==(const Light&) const = default;
#endif

};
Comment on lines +59 to +79

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

keep example simple, do just directional light


static const float LightIntensity = 100.0f;

struct SPushConstants
{
uint64_t proceduralGeomInfoBuffer;
uint64_t triangleGeomInfoBuffer;

float32_t3 camPos;
uint32_t frameCounter;
float32_t4x4 invMVP;


Light light;
};


struct RayLight
{
float32_t3 inHitPosition;
float32_t outLightDistance;
float32_t3 outLightDir;
float32_t outIntensity;
};
Comment on lines +97 to +103

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see you use this for a callable, as far as I know the SPIR-V spec, the callables have a division into IncomingCallableDataKHR and CallableDataKHR

Its important because right now stack is being murdered by allocating (passing) the output variables to the callable and the the input variables back from the callable


#ifdef __HLSL_VERSION

struct [raypayload] ColorPayload
{
float32_t3 hitValue : read(caller) : write(closesthit,miss);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you want to have a throughput variable (how much one unit of emitted light will contribute along this path), and thats all

uint32_t seed : read(closesthit,anyhit) : write(caller);
};

struct [raypayload] ShadowPayload
{
bool isShadowed : read(caller) : write(caller,miss);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as I've mentioned in the comment in your anyhit shader, its better to have a float32_t opacity and have the anyhit multiply into it, which allows for transparent shadows, you have to use the 4 bytes in your payload anyway.

uint32_t seed : read(anyhit) : write(caller);
};

enum ObjectType : uint32_t // matches c++
{
OT_CUBE = 0,
OT_SPHERE,
OT_CYLINDER,
OT_RECTANGLE,
OT_DISK,
OT_ARROW,
OT_CONE,
OT_ICOSPHERE,

OT_COUNT
};

static uint32_t s_offsetsToNormalBytes[OT_COUNT] = { 18, 24, 24, 20, 20, 24, 16, 12 }; // based on normals data position
float32_t3 computeDiffuse(Material mat, float32_t3 light_dir, float32_t3 normal)
{
// Lambertian
float32_t dotNL = max(dot(normal, light_dir), 0.0);
float32_t3 c = mat.diffuse * dotNL;
if (mat.illum >= 1)
c += mat.ambient;
return c;
}

float32_t3 computeSpecular(Material mat, float32_t3 view_dir,
float32_t3 light_dir, float32_t3 normal)
{
if (mat.illum < 2)
return float32_t3(0, 0, 0);

// Compute specular only if not in shadow
const float32_t kPi = 3.14159265;
const float32_t kShininess = max(mat.shininess, 4.0);

// Specular
const float32_t kEnergyConservation = (2.0 + kShininess) / (2.0 * kPi);
float32_t3 V = normalize(-view_dir);
float32_t3 R = reflect(-light_dir, normal);
float32_t specular = kEnergyConservation * pow(max(dot(V, R), 0.0), kShininess);

return float32_t3(mat.specular * specular);
}
#endif

#endif // RQG_COMMON_HLSL
11 changes: 11 additions & 0 deletions 71_RayTracingPipeline/app_resources/light_directional.rcall.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#include "common.hlsl"

[[vk::push_constant]] SPushConstants pc;

[shader("callable")]
void main(inout RayLight cLight)
{
cLight.outLightDir = normalize(-pc.light.direction);
cLight.outIntensity = 1;
cLight.outLightDistance = 10000000;
}
13 changes: 13 additions & 0 deletions 71_RayTracingPipeline/app_resources/light_point.rcall.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#include "common.hlsl"

[[vk::push_constant]] SPushConstants pc;

[shader("callable")]
void main(inout RayLight cLight)
{
float32_t3 lDir = pc.light.position - cLight.inHitPosition;
float lightDistance = length(lDir);
cLight.outIntensity = LightIntensity / (lightDistance * lightDistance);
cLight.outLightDir = normalize(lDir);
cLight.outLightDistance = lightDistance;
}
16 changes: 16 additions & 0 deletions 71_RayTracingPipeline/app_resources/light_spot.rcall.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#include "common.hlsl"

[[vk::push_constant]] SPushConstants pc;

[shader("callable")]
void main(inout RayLight cLight)
{
float32_t3 lDir = pc.light.position - cLight.inHitPosition;
cLight.outLightDistance = length(lDir);
cLight.outIntensity = LightIntensity / (cLight.outLightDistance * cLight.outLightDistance);
cLight.outLightDir = normalize(lDir);
float theta = dot(cLight.outLightDir, normalize(-pc.light.direction));
float epsilon = - pc.light.outerCutoff;
float spotIntensity = clamp((theta - pc.light.outerCutoff) / epsilon, 0.0, 1.0);
cLight.outIntensity *= spotIntensity;
}
19 changes: 19 additions & 0 deletions 71_RayTracingPipeline/app_resources/present.frag.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// Copyright (C) 2024-2024 - DevSH Graphics Programming Sp. z O.O.
// This file is part of the "Nabla Engine".
// For conditions of distribution and use, see copyright notice in nabla.h

#pragma wave shader_stage(fragment)

// vertex shader is provided by the fullScreenTriangle extension
#include <nbl/builtin/hlsl/ext/FullScreenTriangle/SVertexAttributes.hlsl>
using namespace nbl::hlsl;
using namespace ext::FullScreenTriangle;

// binding 0 set 0
[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] Texture2D texture;
[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] SamplerState samplerState;

[[vk::location(0)]] float32_t4 main(SVertexAttributes vxAttr) : SV_Target0
{
return float32_t4(texture.Sample(samplerState, vxAttr.uv).rgb, 1.0f);
}
34 changes: 34 additions & 0 deletions 71_RayTracingPipeline/app_resources/random.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Generate a random unsigned int from two unsigned int values, using 16 pairs
// of rounds of the Tiny Encryption Algorithm. See Zafar, Olano, and Curtis,
// "GPU Random Numbers via the Tiny Encryption Algorithm"
uint32_t tea(uint32_t val0, uint32_t val1)
{
uint32_t v0 = val0;
uint32_t v1 = val1;
uint32_t s0 = 0;

for(uint32_t n = 0; n < 16; n++)
{
s0 += 0x9e3779b9;
v0 += ((v1 << 4) + 0xa341316c) ^ (v1 + s0) ^ ((v1 >> 5) + 0xc8013ea4);
v1 += ((v0 << 4) + 0xad90777d) ^ (v0 + s0) ^ ((v0 >> 5) + 0x7e95761e);
}

return v0;
}

// Generate a random unsigned int in [0, 2^24) given the previous RNG state
// using the Numerical Recipes linear congruential generator
uint32_t lcg(inout uint32_t prev)
{
uint32_t LCG_A = 1664525u;
uint32_t LCG_C = 1013904223u;
prev = (LCG_A * prev + LCG_C);
return prev & 0x00FFFFFF;
}

// Generate a random float32_t in [0, 1) given the previous RNG state
float32_t rnd(inout uint32_t prev)
{
return (float32_t(lcg(prev)) / float32_t(0x01000000));
}
Comment on lines +1 to +34

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you may want to rework them into something resembling xoroshiro.hlsl in the Nabla repo and pcg.hlsl from the HLSL BxDF PR https://github.com/Devsh-Graphics-Programming/Nabla/pull/811/files#diff-717f65cfd315d91ced5e7da9f817f786fb1f8c1b1c06a570df4f61be3660a643

28 changes: 28 additions & 0 deletions 71_RayTracingPipeline/app_resources/raytrace.rahit.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#include "common.hlsl"
#include "random.hlsl"

[[vk::push_constant]] SPushConstants pc;

[[vk::binding(0, 0)]] RaytracingAccelerationStructure topLevelAS;

#if defined(USE_COLOR_PAYLOAD)
using AnyHitPayload = ColorPayload;
#elif defined(USE_SHADOW_PAYLOAD)
using AnyHitPayload = ShadowPayload;
#endif

[shader("anyhit")]
void main(inout AnyHitPayload p, in BuiltInTriangleIntersectionAttributes attribs)
{
const int instID = InstanceID();
const STriangleGeomInfo geom = vk::RawBufferLoad < STriangleGeomInfo > (pc.triangleGeomInfoBuffer + instID * sizeof(STriangleGeomInfo));

if (geom.material.illum != 4)
return;
Comment on lines +20 to +21

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you can build your TLAS with instance flags that prevent ANY_HIT being called on them (or what it only BLAS?).

This is useful because you don't end up executing the vk::RawBufferLoad for the things that are not transparent to check if they are transparent (saving Bandwidth in the meantime).


uint32_t seed = p.seed;
if (geom.material.dissolve == 0.0)
IgnoreHit();
else if (rnd(seed) > geom.material.dissolve)
IgnoreHit();
Comment on lines +23 to +27

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok so you stochastically do alpha, btw for this to add up, you'd need to "boost" the color (throughput in the color payload) by the probability of the path taken, so:

  • if you ignore the hit, you mul by 1.f/(1.f-dissolve)
  • if you don't ignore the hit, you mul by 1.f/dissolve

basically same as importance sampling. Nvm they cancel out and you never multiplied them into the throughput.

For the shadow ray there is a more performant way actually, instead of importance sampling the mix

this*alpha+next*(1-alpha)

you can recognise that the shadow ray doesn't have to be a boolean and you can multiply the dissolve into it same you would an alpha value (you do need to create the BLAS with the no-duplicate hit flag though).

Also at this point you can make two separate anyhit HLSL shader sources.

}
Loading