From 4b315fba43ec7ce5fce5c5e336c2e24ecabfa78a Mon Sep 17 00:00:00 2001 From: Ilya Doroshenko Date: Mon, 27 Jan 2025 17:58:40 +0100 Subject: [PATCH] [Release] Feature/raytracing (#208) * added basic raytracing husk * base top level structure * buildable acceleration structures * rewritten descriptor storage to become more flexible * write acceleration structure write RW texture * pipeline creation * sbt * RT works with DX12! * RT works with Vulkan * first triangle in RT, API beta wrapup * Compute pipelines (untested) * more compute commands, added proxies for vulkan * typo * small fix to flags * flags * desc storage fix * test for srv buffers * ok, they have to be raw * ->structured buffers * recursion depth, compiler flags for DXC * fix for DXC * finalized version * Restyled Feature/raytracing (#209) * Restyled by astyle * Restyled by clang-format * Restyled by cmake-format * Restyled by prettier-markdown * Restyled by whitespace --------- Co-authored-by: Restyled.io --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Restyled.io --- CMakeLists.txt | 2 +- Changelog.md | 6 + README.md | 23 +- bindings/wisdom.cpp | 144 ++- bindings/wisdom.h | 989 ++++++++++++++---- cmake/functions.cmake | 13 +- cmake/wisdom-deps.cmake.in | 14 +- cmake/wisdom.targets | 2 +- examples/basic/CMakeLists.txt | 1 + .../basic/descriptor_storage/entry_main.cpp | 18 +- examples/basic/multiview/entry_main.cpp | 19 +- examples/basic/raytracing/CMakeLists.txt | 17 + examples/basic/raytracing/entry_main.cpp | 456 ++++++++ examples/basic/shaders/CMakeLists.txt | 16 + .../descriptor_storage/desc_storage.vs.hlsl | 9 +- .../multiview/multiview_screen.ps.hlsl | 6 +- .../shaders/raytracing/raytracing.lib.hlsl | 59 ++ examples/common/CMakeLists.txt | 10 +- examples/common/mouse.h | 4 +- examples/common/wis_helper.h | 5 +- examples/common/wis_swapchain.h | 11 +- generator/generator.cpp | 10 +- generator/generator.h | 3 +- wisdom/extensions/CMakeLists.txt | 1 + wisdom/extensions/raytracing/CMakeLists.txt | 87 ++ .../wisdom/dx12_raytracing_pipeline.h | 37 + .../raytracing/wisdom/gen/vk_functions.in | 11 + .../wisdom/generated/vk_functions.hpp | 75 ++ .../extensions/raytracing/wisdom/impl.dx12.h | 239 +++++ wisdom/extensions/raytracing/wisdom/impl.h | 14 + wisdom/extensions/raytracing/wisdom/impl.vk.h | 207 ++++ .../raytracing/wisdom/impl/impl.dx12.cpp | 268 +++++ .../raytracing/wisdom/impl/impl.vk.cpp | 339 ++++++ .../wisdom/vk_raytracing_pipeline.h | 35 + wisdom/extensions/raytracing/wisdom/vk_rtas.h | 59 ++ .../raytracing/wisdom/wisdom_raytracing.hpp | 35 + wisdom/include/wisdom/dx12/dx12_allocator.h | 5 +- .../include/wisdom/dx12/dx12_command_list.h | 80 ++ .../wisdom/dx12/dx12_descriptor_storage.h | 102 +- wisdom/include/wisdom/dx12/dx12_device.h | 136 +-- wisdom/include/wisdom/dx12/dx12_resource.h | 16 + wisdom/include/wisdom/dx12/dx12_shader.h | 2 +- wisdom/include/wisdom/dx12/dx12_views.h | 2 + .../wisdom/dx12/impl/dx12_allocator.cpp | 28 +- .../wisdom/dx12/impl/dx12_command_list.cpp | 133 ++- .../include/wisdom/dx12/impl/dx12_device.cpp | 407 ++++--- wisdom/include/wisdom/generated/api/api.hpp | 315 ++++-- .../wisdom/generated/dx12/dx12_structs.hpp | 129 ++- .../wisdom/generated/vulkan/vk_functions.hpp | 465 +++++--- .../wisdom/generated/vulkan/vk_structs.hpp | 172 ++- wisdom/include/wisdom/global/definitions.h | 3 +- .../include/wisdom/vulkan/gen/vk_functions.in | 3 + .../wisdom/vulkan/impl/vk_allocator.cpp | 2 +- .../wisdom/vulkan/impl/vk_command_list.cpp | 88 +- .../include/wisdom/vulkan/impl/vk_device.cpp | 235 +++-- wisdom/include/wisdom/vulkan/vk_adapter.h | 2 +- .../include/wisdom/vulkan/vk_command_list.h | 103 +- .../wisdom/vulkan/vk_descriptor_storage.h | 104 +- wisdom/include/wisdom/vulkan/vk_device.h | 267 ++--- wisdom/include/wisdom/vulkan/vk_device_ext.h | 5 + wisdom/include/wisdom/vulkan/vk_resource.h | 21 + wisdom/include/wisdom/vulkan/vk_views.h | 4 +- wisdom/include/wisdom/wisdom.hpp | 12 + xml/enums.xml | 178 +++- xml/structs.xml | 144 ++- xml/wis.xml | 113 +- 66 files changed, 5304 insertions(+), 1216 deletions(-) create mode 100644 examples/basic/raytracing/CMakeLists.txt create mode 100644 examples/basic/raytracing/entry_main.cpp create mode 100644 examples/basic/shaders/raytracing/raytracing.lib.hlsl create mode 100644 wisdom/extensions/raytracing/CMakeLists.txt create mode 100644 wisdom/extensions/raytracing/wisdom/dx12_raytracing_pipeline.h create mode 100644 wisdom/extensions/raytracing/wisdom/gen/vk_functions.in create mode 100644 wisdom/extensions/raytracing/wisdom/generated/vk_functions.hpp create mode 100644 wisdom/extensions/raytracing/wisdom/impl.dx12.h create mode 100644 wisdom/extensions/raytracing/wisdom/impl.h create mode 100644 wisdom/extensions/raytracing/wisdom/impl.vk.h create mode 100644 wisdom/extensions/raytracing/wisdom/impl/impl.dx12.cpp create mode 100644 wisdom/extensions/raytracing/wisdom/impl/impl.vk.cpp create mode 100644 wisdom/extensions/raytracing/wisdom/vk_raytracing_pipeline.h create mode 100644 wisdom/extensions/raytracing/wisdom/vk_rtas.h create mode 100644 wisdom/extensions/raytracing/wisdom/wisdom_raytracing.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 2d9bd743..faa4afd3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,7 +20,7 @@ cmake_minimum_required(VERSION 3.22) -set(WISDOM_VERSION "0.5.0") +set(WISDOM_VERSION "0.6.0") project("Wisdom" VERSION ${WISDOM_VERSION}) set(CMAKE_DEBUG_POSTFIX d) diff --git a/Changelog.md b/Changelog.md index dee71b98..5919bfdd 100644 --- a/Changelog.md +++ b/Changelog.md @@ -1,5 +1,11 @@ # Version History +- 0.6.0 Raytracing + - BREAKING: Revisited root signature and descriptor storage. Now it is possible to create roots with several same types of descriptors, binding tables are ordered accordingly. + - BREAKING: Root signature compatibility is now based on the input `DescriptorBindingDesc`. It is possible to create a root signature with different descriptor types, but that may result in incompatibility between root and descriptor storage. + - Added Raytracing support for DXR and VK_KHR_ray_tracing + - Added compute pipeline and compute functions to the device and command list + - Added more bindings to the Descriptor storage - 0.5.0 API stabilization - Most of the API is now stable and will not change diff --git a/README.md b/README.md index 4f473c92..316d6391 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,16 @@ https://www.nuget.org/packages/Wisdom/ +# Features + +- [x] Raytracing support +- [x] Compute pipeline, Basic rendering and Multiview +- [x] Embedded DXC shader compiler and standard HLSL language +- [x] Inline API with no virtual functions. +- [x] Extensibility with internal state access +- [x] Advanced memory allocations +- [x] DMA copy support and ability to share memory between APIs + # Why? A lot of old OpenGL solutions are scratching the ceiling of OpenGL potential, and Vulkan is too low-level for most of the tasks. DirectX 12 is a good alternative, but it's not cross-platform. @@ -110,16 +120,3 @@ This type of project does not support Vulkan, since Vulkan does not have UWP sur - Vulkan 1.3.2xx+ Video card driver should have Descriptor buffer support. Tested on NVIDIA RTX A4000. - -# Roadmap - -The project has Gitub projects enabled, so you can see the progress on the project. -For the roadmap, the following features are planned: - -- [x] SDL3 examples -- [ ] UWP example -- [ ] Elaborate documentation -- [x] C API generation -- [ ] Debugging tools -- [ ] Small game engine -- [x] Lower CMake version requirement to 3.22 diff --git a/bindings/wisdom.cpp b/bindings/wisdom.cpp index 311919c0..907cf6bd 100644 --- a/bindings/wisdom.cpp +++ b/bindings/wisdom.cpp @@ -135,25 +135,25 @@ extern "C" WisResult DX12DeviceCreateGraphicsPipeline(DX12Device self, const DX1 } return reinterpret_cast(res); } -extern "C" WisResult DX12DeviceCreateRootSignature(DX12Device self, const WisPushConstant* push_constants, uint32_t constants_count, const WisPushDescriptor* push_descriptors, uint32_t descriptors_count, uint32_t space_overlap_count, DX12RootSignature* signature) +extern "C" WisResult DX12DeviceCreateComputePipeline(DX12Device self, const DX12ComputePipelineDesc* desc, DX12PipelineState* pipeline) { auto* xself = reinterpret_cast(self); - auto&& [res, value] = xself->CreateRootSignature(reinterpret_cast(push_constants), constants_count, reinterpret_cast(push_descriptors), descriptors_count, space_overlap_count); + auto&& [res, value] = xself->CreateComputePipeline(*reinterpret_cast(desc)); if (res.status != wis::Status::Ok) { return reinterpret_cast(res); } - *signature = reinterpret_cast(new (std::nothrow) wis::DX12RootSignature(std::move(value))); - if (!*signature) { - return WisResult{ StatusOutOfMemory, "Failed to allocate memory for wis::DX12RootSignature." }; + *pipeline = reinterpret_cast(new (std::nothrow) wis::DX12PipelineState(std::move(value))); + if (!*pipeline) { + return WisResult{ StatusOutOfMemory, "Failed to allocate memory for wis::DX12PipelineState." }; } return reinterpret_cast(res); } -extern "C" WisResult DX12DeviceCreateRootSignature2(DX12Device self, const WisPushConstant* push_constants, uint32_t constants_count, const WisPushDescriptor* push_descriptors, uint32_t push_descriptors_count, const WisDescriptorSpacing* descriptor_spacing, DX12RootSignature* signature) +extern "C" WisResult DX12DeviceCreateRootSignature(DX12Device self, const WisPushConstant* push_constants, uint32_t push_constant_count, const WisPushDescriptor* push_descriptors, uint32_t push_descriptor_count, const WisDescriptorBindingDesc* bindings, uint32_t binding_count, DX12RootSignature* signature) { auto* xself = reinterpret_cast(self); - auto&& [res, value] = xself->CreateRootSignature2(reinterpret_cast(push_constants), constants_count, reinterpret_cast(push_descriptors), push_descriptors_count, reinterpret_cast(descriptor_spacing)); + auto&& [res, value] = xself->CreateRootSignature(reinterpret_cast(push_constants), push_constant_count, reinterpret_cast(push_descriptors), push_descriptor_count, reinterpret_cast(bindings), binding_count); if (res.status != wis::Status::Ok) { return reinterpret_cast(res); @@ -255,10 +255,10 @@ extern "C" WisResult DX12DeviceCreateShaderResource(DX12Device self, DX12Texture } return reinterpret_cast(res); } -extern "C" WisResult DX12DeviceCreateDescriptorStorage(DX12Device self, const WisDescriptorStorageDesc* desc, DX12DescriptorStorage* storage) +extern "C" WisResult DX12DeviceCreateDescriptorStorage(DX12Device self, const WisDescriptorBindingDesc* bindings, uint32_t bindings_count, WisDescriptorMemory memory, DX12DescriptorStorage* storage) { auto* xself = reinterpret_cast(self); - auto&& [res, value] = xself->CreateDescriptorStorage(*reinterpret_cast(desc)); + auto&& [res, value] = xself->CreateDescriptorStorage(reinterpret_cast(bindings), bindings_count, static_cast(memory)); if (res.status != wis::Status::Ok) { return reinterpret_cast(res); @@ -508,6 +508,11 @@ extern "C" void DX12CommandListCopyTextureToBuffer(DX12CommandList self, DX12Tex auto* xself = reinterpret_cast(self); xself->CopyTextureToBuffer(*reinterpret_cast(source), *reinterpret_cast(destination), reinterpret_cast(regions), region_count); } +extern "C" void DX12CommandListCopyTexture(DX12CommandList self, DX12Texture source, DX12Texture destination, const WisTextureCopyRegion* regions, uint32_t region_count) +{ + auto* xself = reinterpret_cast(self); + xself->CopyTexture(*reinterpret_cast(source), *reinterpret_cast(destination), reinterpret_cast(regions), region_count); +} extern "C" void DX12CommandListBufferBarrier(DX12CommandList self, const WisBufferBarrier* barrier, DX12Buffer buffer) { auto* xself = reinterpret_cast(self); @@ -543,6 +548,11 @@ extern "C" void DX12CommandListSetRootSignature(DX12CommandList self, DX12RootSi auto* xself = reinterpret_cast(self); xself->SetRootSignature(*reinterpret_cast(root_signature)); } +extern "C" void DX12CommandListSetComputeRootSignature(DX12CommandList self, DX12RootSignature root_signature) +{ + auto* xself = reinterpret_cast(self); + xself->SetComputeRootSignature(*reinterpret_cast(root_signature)); +} extern "C" void DX12CommandListIASetPrimitiveTopology(DX12CommandList self, WisPrimitiveTopology topology) { auto* xself = reinterpret_cast(self); @@ -593,16 +603,41 @@ extern "C" void DX12CommandListDrawInstanced(DX12CommandList self, uint32_t vert auto* xself = reinterpret_cast(self); xself->DrawInstanced(vertex_count_per_instance, instance_count, start_vertex, start_instance); } +extern "C" void DX12CommandListDispatch(DX12CommandList self, uint32_t group_count_x, uint32_t group_count_y, uint32_t group_count_z) +{ + auto* xself = reinterpret_cast(self); + xself->Dispatch(group_count_x, group_count_y, group_count_z); +} extern "C" void DX12CommandListSetPushConstants(DX12CommandList self, void* data, uint32_t size_4bytes, uint32_t offset_4bytes, WisShaderStages stage) { auto* xself = reinterpret_cast(self); xself->SetPushConstants(data, size_4bytes, offset_4bytes, static_cast(stage)); } +extern "C" void DX12CommandListSetComputePushConstants(DX12CommandList self, void* data, uint32_t size_4bytes, uint32_t offset_4bytes) +{ + auto* xself = reinterpret_cast(self); + xself->SetComputePushConstants(data, size_4bytes, offset_4bytes); +} extern "C" void DX12CommandListPushDescriptor(DX12CommandList self, WisDescriptorType type, uint32_t root_index, DX12Buffer buffer, uint32_t offset) { auto* xself = reinterpret_cast(self); xself->PushDescriptor(static_cast(type), root_index, *reinterpret_cast(buffer), offset); } +extern "C" void DX12CommandListPushDescriptorCompute(DX12CommandList self, WisDescriptorType type, uint32_t root_index, DX12Buffer buffer, uint32_t offset) +{ + auto* xself = reinterpret_cast(self); + xself->PushDescriptorCompute(static_cast(type), root_index, *reinterpret_cast(buffer), offset); +} +extern "C" void DX12CommandListSetDescriptorStorage(DX12CommandList self, DX12DescriptorStorage storage) +{ + auto* xself = reinterpret_cast(self); + xself->SetDescriptorStorage(*reinterpret_cast(storage)); +} +extern "C" void DX12CommandListSetComputeDescriptorStorage(DX12CommandList self, DX12DescriptorStorage storage) +{ + auto* xself = reinterpret_cast(self); + xself->SetComputeDescriptorStorage(*reinterpret_cast(storage)); +} // DX12SwapChain methods -- extern "C" void DX12SwapChainDestroy(DX12SwapChain self) @@ -671,6 +706,13 @@ extern "C" void DX12BufferUnmap(DX12Buffer self) auto* xself = reinterpret_cast(self); xself->Unmap(); } +extern "C" uint64_t DX12BufferGetGPUAddress(DX12Buffer self) +{ + auto* xself = reinterpret_cast(self); + auto res = xself->GetGPUAddress(); + ; + return res; +} // DX12Texture methods -- extern "C" void DX12TextureDestroy(DX12Texture self) @@ -685,20 +727,20 @@ extern "C" void DX12DescriptorStorageDestroy(DX12DescriptorStorage self) auto* xself = reinterpret_cast(self); delete xself; } -extern "C" void DX12DescriptorStorageWriteSampler(DX12DescriptorStorage self, uint32_t index, DX12Sampler sampler) +extern "C" void DX12DescriptorStorageWriteSampler(DX12DescriptorStorage self, uint32_t set_index, uint32_t binding, DX12Sampler sampler) { auto* xself = reinterpret_cast(self); - xself->WriteSampler(index, *reinterpret_cast(sampler)); + xself->WriteSampler(set_index, binding, *reinterpret_cast(sampler)); } -extern "C" void DX12DescriptorStorageWriteConstantBuffer(DX12DescriptorStorage self, uint32_t index, DX12Buffer buffer, uint32_t size, uint32_t offset) +extern "C" void DX12DescriptorStorageWriteConstantBuffer(DX12DescriptorStorage self, uint32_t set_index, uint32_t binding, DX12Buffer buffer, uint32_t size, uint32_t offset) { auto* xself = reinterpret_cast(self); - xself->WriteConstantBuffer(index, *reinterpret_cast(buffer), size, offset); + xself->WriteConstantBuffer(set_index, binding, *reinterpret_cast(buffer), size, offset); } -extern "C" void DX12DescriptorStorageWriteTexture(DX12DescriptorStorage self, uint32_t index, DX12ShaderResource resource) +extern "C" void DX12DescriptorStorageWriteTexture(DX12DescriptorStorage self, uint32_t set_index, uint32_t binding, DX12ShaderResource resource) { auto* xself = reinterpret_cast(self); - xself->WriteTexture(index, *reinterpret_cast(resource)); + xself->WriteTexture(set_index, binding, *reinterpret_cast(resource)); } // DX12RootSignature methods -- @@ -915,25 +957,25 @@ extern "C" WisResult VKDeviceCreateGraphicsPipeline(VKDevice self, const VKGraph } return reinterpret_cast(res); } -extern "C" WisResult VKDeviceCreateRootSignature(VKDevice self, const WisPushConstant* push_constants, uint32_t constants_count, const WisPushDescriptor* push_descriptors, uint32_t descriptors_count, uint32_t space_overlap_count, VKRootSignature* signature) +extern "C" WisResult VKDeviceCreateComputePipeline(VKDevice self, const VKComputePipelineDesc* desc, VKPipelineState* pipeline) { auto* xself = reinterpret_cast(self); - auto&& [res, value] = xself->CreateRootSignature(reinterpret_cast(push_constants), constants_count, reinterpret_cast(push_descriptors), descriptors_count, space_overlap_count); + auto&& [res, value] = xself->CreateComputePipeline(*reinterpret_cast(desc)); if (res.status != wis::Status::Ok) { return reinterpret_cast(res); } - *signature = reinterpret_cast(new (std::nothrow) wis::VKRootSignature(std::move(value))); - if (!*signature) { - return WisResult{ StatusOutOfMemory, "Failed to allocate memory for wis::VKRootSignature." }; + *pipeline = reinterpret_cast(new (std::nothrow) wis::VKPipelineState(std::move(value))); + if (!*pipeline) { + return WisResult{ StatusOutOfMemory, "Failed to allocate memory for wis::VKPipelineState." }; } return reinterpret_cast(res); } -extern "C" WisResult VKDeviceCreateRootSignature2(VKDevice self, const WisPushConstant* push_constants, uint32_t constants_count, const WisPushDescriptor* push_descriptors, uint32_t push_descriptors_count, const WisDescriptorSpacing* descriptor_spacing, VKRootSignature* signature) +extern "C" WisResult VKDeviceCreateRootSignature(VKDevice self, const WisPushConstant* push_constants, uint32_t push_constant_count, const WisPushDescriptor* push_descriptors, uint32_t push_descriptor_count, const WisDescriptorBindingDesc* bindings, uint32_t binding_count, VKRootSignature* signature) { auto* xself = reinterpret_cast(self); - auto&& [res, value] = xself->CreateRootSignature2(reinterpret_cast(push_constants), constants_count, reinterpret_cast(push_descriptors), push_descriptors_count, reinterpret_cast(descriptor_spacing)); + auto&& [res, value] = xself->CreateRootSignature(reinterpret_cast(push_constants), push_constant_count, reinterpret_cast(push_descriptors), push_descriptor_count, reinterpret_cast(bindings), binding_count); if (res.status != wis::Status::Ok) { return reinterpret_cast(res); @@ -1035,10 +1077,10 @@ extern "C" WisResult VKDeviceCreateShaderResource(VKDevice self, VKTexture textu } return reinterpret_cast(res); } -extern "C" WisResult VKDeviceCreateDescriptorStorage(VKDevice self, const WisDescriptorStorageDesc* desc, VKDescriptorStorage* storage) +extern "C" WisResult VKDeviceCreateDescriptorStorage(VKDevice self, const WisDescriptorBindingDesc* bindings, uint32_t bindings_count, WisDescriptorMemory memory, VKDescriptorStorage* storage) { auto* xself = reinterpret_cast(self); - auto&& [res, value] = xself->CreateDescriptorStorage(*reinterpret_cast(desc)); + auto&& [res, value] = xself->CreateDescriptorStorage(reinterpret_cast(bindings), bindings_count, static_cast(memory)); if (res.status != wis::Status::Ok) { return reinterpret_cast(res); @@ -1288,6 +1330,11 @@ extern "C" void VKCommandListCopyTextureToBuffer(VKCommandList self, VKTexture s auto* xself = reinterpret_cast(self); xself->CopyTextureToBuffer(*reinterpret_cast(source), *reinterpret_cast(destination), reinterpret_cast(regions), region_count); } +extern "C" void VKCommandListCopyTexture(VKCommandList self, VKTexture source, VKTexture destination, const WisTextureCopyRegion* regions, uint32_t region_count) +{ + auto* xself = reinterpret_cast(self); + xself->CopyTexture(*reinterpret_cast(source), *reinterpret_cast(destination), reinterpret_cast(regions), region_count); +} extern "C" void VKCommandListBufferBarrier(VKCommandList self, const WisBufferBarrier* barrier, VKBuffer buffer) { auto* xself = reinterpret_cast(self); @@ -1323,6 +1370,11 @@ extern "C" void VKCommandListSetRootSignature(VKCommandList self, VKRootSignatur auto* xself = reinterpret_cast(self); xself->SetRootSignature(*reinterpret_cast(root_signature)); } +extern "C" void VKCommandListSetComputeRootSignature(VKCommandList self, VKRootSignature root_signature) +{ + auto* xself = reinterpret_cast(self); + xself->SetComputeRootSignature(*reinterpret_cast(root_signature)); +} extern "C" void VKCommandListIASetPrimitiveTopology(VKCommandList self, WisPrimitiveTopology topology) { auto* xself = reinterpret_cast(self); @@ -1373,16 +1425,41 @@ extern "C" void VKCommandListDrawInstanced(VKCommandList self, uint32_t vertex_c auto* xself = reinterpret_cast(self); xself->DrawInstanced(vertex_count_per_instance, instance_count, start_vertex, start_instance); } +extern "C" void VKCommandListDispatch(VKCommandList self, uint32_t group_count_x, uint32_t group_count_y, uint32_t group_count_z) +{ + auto* xself = reinterpret_cast(self); + xself->Dispatch(group_count_x, group_count_y, group_count_z); +} extern "C" void VKCommandListSetPushConstants(VKCommandList self, void* data, uint32_t size_4bytes, uint32_t offset_4bytes, WisShaderStages stage) { auto* xself = reinterpret_cast(self); xself->SetPushConstants(data, size_4bytes, offset_4bytes, static_cast(stage)); } +extern "C" void VKCommandListSetComputePushConstants(VKCommandList self, void* data, uint32_t size_4bytes, uint32_t offset_4bytes) +{ + auto* xself = reinterpret_cast(self); + xself->SetComputePushConstants(data, size_4bytes, offset_4bytes); +} extern "C" void VKCommandListPushDescriptor(VKCommandList self, WisDescriptorType type, uint32_t root_index, VKBuffer buffer, uint32_t offset) { auto* xself = reinterpret_cast(self); xself->PushDescriptor(static_cast(type), root_index, *reinterpret_cast(buffer), offset); } +extern "C" void VKCommandListPushDescriptorCompute(VKCommandList self, WisDescriptorType type, uint32_t root_index, VKBuffer buffer, uint32_t offset) +{ + auto* xself = reinterpret_cast(self); + xself->PushDescriptorCompute(static_cast(type), root_index, *reinterpret_cast(buffer), offset); +} +extern "C" void VKCommandListSetDescriptorStorage(VKCommandList self, VKDescriptorStorage storage) +{ + auto* xself = reinterpret_cast(self); + xself->SetDescriptorStorage(*reinterpret_cast(storage)); +} +extern "C" void VKCommandListSetComputeDescriptorStorage(VKCommandList self, VKDescriptorStorage storage) +{ + auto* xself = reinterpret_cast(self); + xself->SetComputeDescriptorStorage(*reinterpret_cast(storage)); +} // VKSwapChain methods -- extern "C" void VKSwapChainDestroy(VKSwapChain self) @@ -1451,6 +1528,13 @@ extern "C" void VKBufferUnmap(VKBuffer self) auto* xself = reinterpret_cast(self); xself->Unmap(); } +extern "C" uint64_t VKBufferGetGPUAddress(VKBuffer self) +{ + auto* xself = reinterpret_cast(self); + auto res = xself->GetGPUAddress(); + ; + return res; +} // VKTexture methods -- extern "C" void VKTextureDestroy(VKTexture self) @@ -1465,20 +1549,20 @@ extern "C" void VKDescriptorStorageDestroy(VKDescriptorStorage self) auto* xself = reinterpret_cast(self); delete xself; } -extern "C" void VKDescriptorStorageWriteSampler(VKDescriptorStorage self, uint32_t index, VKSampler sampler) +extern "C" void VKDescriptorStorageWriteSampler(VKDescriptorStorage self, uint32_t set_index, uint32_t binding, VKSampler sampler) { auto* xself = reinterpret_cast(self); - xself->WriteSampler(index, *reinterpret_cast(sampler)); + xself->WriteSampler(set_index, binding, *reinterpret_cast(sampler)); } -extern "C" void VKDescriptorStorageWriteConstantBuffer(VKDescriptorStorage self, uint32_t index, VKBuffer buffer, uint32_t size, uint32_t offset) +extern "C" void VKDescriptorStorageWriteConstantBuffer(VKDescriptorStorage self, uint32_t set_index, uint32_t binding, VKBuffer buffer, uint32_t size, uint32_t offset) { auto* xself = reinterpret_cast(self); - xself->WriteConstantBuffer(index, *reinterpret_cast(buffer), size, offset); + xself->WriteConstantBuffer(set_index, binding, *reinterpret_cast(buffer), size, offset); } -extern "C" void VKDescriptorStorageWriteTexture(VKDescriptorStorage self, uint32_t index, VKShaderResource resource) +extern "C" void VKDescriptorStorageWriteTexture(VKDescriptorStorage self, uint32_t set_index, uint32_t binding, VKShaderResource resource) { auto* xself = reinterpret_cast(self); - xself->WriteTexture(index, *reinterpret_cast(resource)); + xself->WriteTexture(set_index, binding, *reinterpret_cast(resource)); } // VKRootSignature methods -- diff --git a/bindings/wisdom.h b/bindings/wisdom.h index 02d012b5..0ef98182 100644 --- a/bindings/wisdom.h +++ b/bindings/wisdom.h @@ -80,12 +80,14 @@ enum WisStatus { }; /** - * @brief Determines the behavior when wait for multiple fences is issued. + * @brief Type of the queue to create. * * */ -enum WisMutiWaitFlags { - MutiWaitFlagsAll = 0, ///< All the fences in the batch are triggered. - MutiWaitFlagsAny = 1, ///< At least one of the fences from the batch is triggered. +enum WisQueueType { + QueueTypeGraphics = 0, ///< Queue is used for graphics operations. + QueueTypeCompute = 2, ///< Queue is used for compute operations. + QueueTypeCopy = 3, ///< Queue is used for copy operations. + QueueTypeVideoDecode = 4, ///< Queue is used for video decoding operations. }; /** @@ -119,17 +121,27 @@ enum WisDescriptorType { * May be bigger than constant buffers, but slower. * */ DescriptorTypeBuffer = 5, + DescriptorTypeAccelerationStructure = 6, ///< Descriptor is an acceleration structure. }; /** - * @brief Type of the queue to create. + * @brief Determines the behavior when wait for multiple fences is issued. * * */ -enum WisQueueType { - QueueTypeGraphics = 0, ///< Queue is used for graphics operations. - QueueTypeCompute = 2, ///< Queue is used for compute operations. - QueueTypeCopy = 3, ///< Queue is used for copy operations. - QueueTypeVideoDecode = 4, ///< Queue is used for video decoding operations. +enum WisMutiWaitFlags { + MutiWaitFlagsAll = 0, ///< All the fences in the batch are triggered. + MutiWaitFlagsAny = 1, ///< At least one of the fences from the batch is triggered. +}; + +/** + * @brief Type of the geometry in the Acceleration Structure. + * + * Translates to VkGeometryTypeKHR for vk implementation. + * Translates to D3D12_RAYTRACING_GEOMETRY_TYPE for dx implementation. + * */ +enum WisASGeometryType { + ASGeometryTypeTriangles = 0, ///< Triangles geometry type. Used for triangle meshes. + ASGeometryTypeAABBs = 1, ///< Axis Aligned Bounding Boxes geometry type. Used for bounding volume hierarchies. }; /** @@ -152,6 +164,20 @@ enum WisAdapterPreference { AdapterPreferencePerformance = 2, }; +/** + * @brief Shader stages that can be used in the raytracing pipeline. + * + * Translates to VkShaderStageFlagBits for vk implementation. + * */ +enum WisRaytracingShaderType { + RaytracingShaderTypeRaygen = 0, ///< Ray generation shader stage. + RaytracingShaderTypeMiss = 1, ///< Miss shader stage. + RaytracingShaderTypeClosestHit = 2, ///< Closest hit shader stage. + RaytracingShaderTypeAnyHit = 3, ///< Any hit shader stage. + RaytracingShaderTypeIntersection = 4, ///< Intersection shader stage. + RaytracingShaderTypeCallable = 5, ///< Callable shader stage. +}; + /** * @brief Log message severity. * Used with DebugCallback and internal library logging. @@ -178,6 +204,26 @@ enum WisSeverity { SeverityCritical = 5, }; +/** + * @brief Level of the Raytracing Acceleration Structure. Used to create Acceleration structures. + * + * */ +enum WisASLevel { + ASLevelBottom = 0, ///< Bottom level Acceleration Structure. Contains geometry data. + ASLevelTop = 1, ///< Top level Acceleration Structure. Contains instance data. +}; + +/** + * @brief Type of the hit group in the raytracing pipeline. + * + * Translates to VkRayTracingShaderGroupTypeKHR for vk implementation. + * Translates to D3D12_HIT_GROUP_TYPE for dx implementation. + * */ +enum WisHitGroupType { + HitGroupTypeTriangles = 0, ///< Hit group for triangles. + HitGroupTypeProcedural = 1, ///< Hit group for procedural geometry. +}; + /** * @brief Input classification for vertex buffer data. * @@ -878,11 +924,12 @@ enum WisLogicOp { * Translates to VkMemoryPropertyFlags for vk implementation. * */ enum WisMemoryType { + MemoryTypeDefault = 0, ///< Default memory type. Alias for MemoryTypeDeviceLocal /** * @brief Default memory type. * Local device memory, most efficient for rendering. * */ - MemoryTypeDefault = 0, + MemoryTypeDeviceLocal = 0, /** * @brief Upload memory type. * Used for data that is uploaded to the GPU Local memory using copy operations. @@ -961,32 +1008,6 @@ enum WisTextureLayout { TextureLayoutTexture3D = 8, ///< Texture is 3D volume. }; -/** - * @brief Binding index for resources. - * Used in DescriptorStorage to determine which descriptor type goes where when binding. - * Same values are used for HLSL side to pick descriptors up. - * Space 0 and set 0 are reserved for push descriptors and push constants. - * - * */ -enum WisBindingIndex { - /** - * @brief No binding index set.Results in [[vk::binding(*,0)]] and register(*). - * This space is reserved for push constants and push descriptors. - * */ - BindingIndexNone = 0, - BindingIndexSampler = 1, ///< Binding index for sampler descriptors. Results in [[vk::binding(0,1)]] and register(s0, space1). - BindingIndexConstantBuffer = 2, ///< Binding index for constant buffer descriptors. Results in [[vk::binding(0,2)]] and register(b0, space2). - BindingIndexTexture = 3, ///< Binding index for texture descriptors. Results in [[vk::binding(0,3)]] and register(t0, space3). - BindingIndexRWTexture = 4, ///< Binding index for read-write texture descriptors. Results in [[vk::binding(0,4)]] and register(u0, space4). - BindingIndexRWBuffer = 5, ///< Binding index for read-write buffer descriptors. Results in [[vk::binding(0,5)]] and register(u0, space5). - /** - * @brief Binding index for read buffer descriptors. Results in [[vk::binding(0,6)]] and register(t0, space6). - * Can't be merged with Texture because of Vulkan. - * */ - BindingIndexBuffer = 6, - BindingIndexCount = 6, ///< Number of binding indices. Used for array sizes. -}; - /** * @brief Descriptor heap type. * @@ -1217,14 +1238,21 @@ enum WisColorComponentsBits { * @brief Buffer usage flags. * Determine how the buffer can be used throughout its lifetime. * + * Translates to VkBufferUsageFlags for vk implementation. + * Translates to D3D12_RESOURCE_FLAGS for dx implementation. * */ enum WisBufferUsageBits { BufferUsageNone = 0x0, ///< No flags set. Buffer is not used. BufferUsageCopySrc = 1 << 0, ///< Buffer is used as a source for copy operations. BufferUsageCopyDst = 1 << 1, ///< Buffer is used as a destination for copy operations. - BufferUsageConstantBuffer = 1 << 4, ///< Buffer is used as a constant buffer. - BufferUsageIndexBuffer = 1 << 6, ///< Buffer is used as an index buffer. - BufferUsageVertexBuffer = 1 << 7, ///< Buffer is used as a vertex buffer or an instance buffer. + BufferUsageConstantBuffer = 1 << 2, ///< Buffer is used as a constant buffer. + BufferUsageIndexBuffer = 1 << 3, ///< Buffer is used as an index buffer. + BufferUsageVertexBuffer = 1 << 4, ///< Buffer is used as a vertex buffer or an instance buffer. + BufferUsageIndirectBuffer = 1 << 5, ///< Buffer is used as an indirect buffer. + BufferUsageStorageBuffer = 1 << 6, ///< Buffer is used as a storage unordered access buffer. + BufferUsageAccelerationStructureBuffer = 1 << 7, ///< Buffer is used as an acceleration structure buffer. + BufferUsageAccelerationStructureInput = 1 << 8, ///< Buffer is used as a read only acceleration instance input buffer. + BufferUsageShaderBindingTable = 1 << 9, ///< Buffer is used as a shader binding table buffer. }; /** @@ -1327,8 +1355,8 @@ enum WisResourceAccessBits { ResourceAccessCopyDest = 1 << 10, ///< Copy destination access. ResourceAccessCopySource = 1 << 11, ///< Copy source access. ResourceAccessConditionalRendering = 1 << 12, ///< Conditional rendering access. - ResourceAccessAccelerationStrucureRead = 1 << 13, ///< Acceleration structure read access. - ResourceAccessAccelerationStrucureWrite = 1 << 14, ///< Acceleration structure write access. + ResourceAccessAccelerationStructureRead = 1 << 13, ///< Acceleration structure read access. + ResourceAccessAccelerationStructureWrite = 1 << 14, ///< Acceleration structure write access. ResourceAccessShadingRate = 1 << 15, ///< Shading rate access. Used in variable shading rate. ResourceAccessVideoDecodeRead = 1 << 16, ///< Video decode read access. ResourceAccessVideoDecodeWrite = 1 << 17, ///< Video decode write access. @@ -1380,6 +1408,47 @@ enum WisPipelineFlagsBits { PipelineFlagsDescriptorBuffer = 1 << 0, }; +/** + * @brief Geometry flags for additional geometry features + * + * Translates to D3D12_RAYTRACING_GEOMETRY_FLAGS for dx implementation. + * Translates to VkGeometryFlagsKHR for vk implementation. + * */ +enum WisASGeometryFlagsBits { + ASGeometryFlagsNone = 0x0, ///< No flags set. Geometry is regular. + ASGeometryFlagsOpaque = 1 << 0, ///< Geometry is opaque. Used for opaque geometry. + ASGeometryFlagsNoDuplicateAnyHitInvocation = 1 << 1, ///< Geometry has no duplicate any hit invocation. +}; + +/** + * @brief Acceleration structure flags for additional acceleration structure features + * + * Translates to D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS for dx implementation. + * Translates to VkBuildAccelerationStructureFlagsKHR for vk implementation. + * */ +enum WisAccelerationStructureFlagsBits { + AccelerationStructureFlagsNone = 0x0, ///< No flags set. Acceleration structure is regular. + AccelerationStructureFlagsAllowUpdate = 1 << 0, ///< Acceleration structure is allowed to be updated. + AccelerationStructureFlagsAllowCompaction = 1 << 1, ///< Acceleration structure is allowed to be compacted. + AccelerationStructureFlagsPreferFastTrace = 1 << 2, ///< Acceleration structure is preferred to be fast traced. + AccelerationStructureFlagsPreferFastBuild = 1 << 3, ///< Acceleration structure is preferred to be fast built. + AccelerationStructureFlagsMinimizeMemory = 1 << 4, ///< Acceleration structure is minimized for memory usage. +}; + +/** + * @brief Instance flags for additional instance features + * + * Translates to D3D12_RAYTRACING_INSTANCE_FLAGS for dx implementation. + * Translates to VkGeometryInstanceFlagsKHR for vk implementation. + * */ +enum WisASInstanceFlagsBits { + ASInstanceFlagsNone = 0x0, ///< No flags set. Instance is regular. + ASInstanceFlagsTriangleCullDisable = 1 << 0, ///< Triangle cull is disabled. + ASInstanceFlagsTriangleFrontCounterClockwise = 1 << 1, ///< Triangle front is counter clockwise. + ASInstanceFlagsForceOpaque = 1 << 2, ///< Force opaque. + ASInstanceFlagsForceNoOpaque = 1 << 3, ///< Force no opaque. +}; + //------------------------------------------------------------------------- typedef struct WisResult WisResult; @@ -1415,17 +1484,29 @@ typedef struct WisDescriptorTable WisDescriptorTable; typedef struct WisSamplerDesc WisSamplerDesc; typedef struct WisComponentMapping WisComponentMapping; typedef struct WisShaderResourceDesc WisShaderResourceDesc; +typedef struct WisUnorderedAccessDesc WisUnorderedAccessDesc; typedef struct WisFactoryExtQuery WisFactoryExtQuery; typedef struct WisDeviceExtQuery WisDeviceExtQuery; -typedef struct WisDescriptorStorageDesc WisDescriptorStorageDesc; -typedef struct WisDescriptorSpacing WisDescriptorSpacing; +typedef struct WisTopLevelASBuildDesc WisTopLevelASBuildDesc; +typedef struct WisAcceleratedGeometryInput WisAcceleratedGeometryInput; +typedef struct WisASAllocationInfo WisASAllocationInfo; +typedef struct WisDescriptorBindingDesc WisDescriptorBindingDesc; +typedef struct WisShaderExport WisShaderExport; +typedef struct WisHitGroupDesc WisHitGroupDesc; +typedef struct WisShaderBindingTableInfo WisShaderBindingTableInfo; +typedef struct WisRaytracingDispatchDesc WisRaytracingDispatchDesc; +typedef struct WisTextureCopyRegion WisTextureCopyRegion; typedef enum WisShaderStages WisShaderStages; typedef enum WisStatus WisStatus; -typedef enum WisMutiWaitFlags WisMutiWaitFlags; -typedef enum WisDescriptorType WisDescriptorType; typedef enum WisQueueType WisQueueType; +typedef enum WisDescriptorType WisDescriptorType; +typedef enum WisMutiWaitFlags WisMutiWaitFlags; +typedef enum WisASGeometryType WisASGeometryType; typedef enum WisAdapterPreference WisAdapterPreference; +typedef enum WisRaytracingShaderType WisRaytracingShaderType; typedef enum WisSeverity WisSeverity; +typedef enum WisASLevel WisASLevel; +typedef enum WisHitGroupType WisHitGroupType; typedef enum WisInputClass WisInputClass; typedef enum WisCullMode WisCullMode; typedef enum WisDataFormat WisDataFormat; @@ -1443,7 +1524,6 @@ typedef enum WisShaderIntermediate WisShaderIntermediate; typedef enum WisTextureState WisTextureState; typedef enum WisLoadOperation WisLoadOperation; typedef enum WisTextureLayout WisTextureLayout; -typedef enum WisBindingIndex WisBindingIndex; typedef enum WisDescriptorHeapType WisDescriptorHeapType; typedef enum WisStoreOperation WisStoreOperation; typedef enum WisPrimitiveTopology WisPrimitiveTopology; @@ -1478,6 +1558,12 @@ typedef enum WisFenceFlagsBits WisFenceFlagsBits; typedef uint32_t WisFenceFlags; typedef enum WisPipelineFlagsBits WisPipelineFlagsBits; typedef uint32_t WisPipelineFlags; +typedef enum WisASGeometryFlagsBits WisASGeometryFlagsBits; +typedef uint32_t WisASGeometryFlags; +typedef enum WisAccelerationStructureFlagsBits WisAccelerationStructureFlagsBits; +typedef uint32_t WisAccelerationStructureFlags; +typedef enum WisASInstanceFlagsBits WisASInstanceFlagsBits; +typedef uint32_t WisASInstanceFlags; //------------------------------------------------------------------------- @@ -1854,6 +1940,15 @@ struct WisShaderResourceDesc { WisSubresourceRange subresource_range; ///< Subresource range of the resource. }; +/** + * @brief Unordered access description for RW Texture creation. + * */ +struct WisUnorderedAccessDesc { + WisDataFormat format; ///< Resource format. + WisTextureViewType view_type; ///< Resource view type. + WisSubresourceRange subresource_range; ///< Subresource range of the resource. +}; + /** * @brief Struct used to query the extensions for C code. * Queried results should not be freed, their lifetime ends with the Factory they were created with. @@ -1887,30 +1982,124 @@ struct WisDeviceExtQuery { }; /** - * @brief Descriptor storage description for DescriptorStorage creation. + * @brief Top level acceleration structure build description. + * */ +struct WisTopLevelASBuildDesc { + WisAccelerationStructureFlags flags; ///< Build flags. + uint32_t instance_count; ///< Instance count. + uint64_t gpu_address; ///< Address of instances. + bool indirect; ///< If true Buffer under address contains pointers to the instances, rather than instances themselves. + /** + * @brief true means the acceleration structure is being updated. + * flags must have contained AccelerationStructureFlagsAllowUpdate to perfom updates. + * */ + bool update; +}; + +/** + * @brief Geometry description for bottom-level acceleration structure. Mayy contain AABBs or Triangles. + * */ +struct WisAcceleratedGeometryInput { + WisASGeometryType geometry_type; ///< Type of the geometry (Triangles/AABB). + WisASGeometryFlags flags; ///< Geometry flags. + uint64_t vertex_or_aabb_buffer_address; ///< Buffer address of the buffer containing vertex data or AABB data (float [6]) depending on the geometry type. + uint64_t vertex_or_aabb_buffer_stride; ///< Stride of the vertex buffer in bytes or stride of the AABB buffer in bytes. + uint64_t index_buffer_address; ///< Buffer address of the buffer containing index data. Unused for ASGeometryTypeAABBs. + uint64_t transform_matrix_address; ///< GPU Buffer address of the containing transform matrix (float [3][4]). Unused for ASGeometryTypeAABBs. + uint32_t vertex_count; ///< Vertex count. Unused for ASGeometryTypeAABBs. + uint32_t triangle_or_aabb_count; ///< For triangles it is equal to (index_count/3) and count for AABBs. + WisDataFormat vertex_format; ///< Format of the vertices. Unused for ASGeometryTypeAABBs. + WisIndexType index_format; ///< Format of the indices. Unused for ASGeometryTypeAABBs. +}; + +/** + * @brief Acceleration structure allocation info. Used to query sizes for AS build/update buffers. * */ -struct WisDescriptorStorageDesc { - uint32_t sampler_count; ///< Count of sampler descriptors to allocate. - uint32_t cbuffer_count; ///< Count of constant buffer descriptors to allocate. - uint32_t sbuffer_count; ///< Count of storage buffer descriptors to allocate. - uint32_t texture_count; ///< Count of texture descriptors to allocate. - uint32_t stexture_count; ///< Count of storage texture descriptors to allocate. - uint32_t rbuffer_count; ///< Count of read only storage buffer descriptors to allocate. - WisDescriptorMemory memory; ///< Descriptor memory to use. +struct WisASAllocationInfo { + uint64_t scratch_size; ///< Size of the scratch buffer. + uint64_t result_size; ///< Size of the result buffer. + uint64_t update_size; ///< Size of the update buffer. }; /** - * @brief Describes how many types can descriptors be reinterpreted as. - * Minimal amount of spaces for each type is 1, 0 is treated as 1. - * Used for RootSignature. + * @brief Descriptor binding description for RootSignature and Descriptor Storage creation. + * Description place in array determines binding index that this lane maps to. e.g. bindings[1] means on HLSL side this results in [[vk::binding(0,1)]]. + * All the bindings in Descriptor Storage are unbounded, array of these structures determine the presence and order of the bindings. * */ -struct WisDescriptorSpacing { - uint32_t sampler_count; ///< Count of spaces of sampler descriptors to allocate. - uint32_t cbuffer_count; ///< Count of spaces of constant buffer descriptors to allocate. - uint32_t sbuffer_count; ///< Count of spaces of storage buffer descriptors to allocate. - uint32_t texture_count; ///< Count of spaces of texture descriptors to allocate. - uint32_t stexture_count; ///< Count of spaces of storage texture descriptors to allocate. - uint32_t rbuffer_count; ///< Count of spaces of read only storage buffer descriptors to allocate. +struct WisDescriptorBindingDesc { + WisDescriptorType binding_type; ///< Binding type. Must be unique in array. + uint32_t binding_space; ///< Binding space number in HLSL. + /** + * @brief Number of consecutive spaces this binding occupies. + * e.g. for binding_space = 1 and space_overlap_count = 3, HLSL binding will be :register(x0,space1), register(x0,space2), register(x0,space3) + * This is useful for binding multiple resource types to the same register array in HLSL. + * */ + uint32_t space_overlap_count; + /** + * @brief How many bindings should be allocated. + * Affects only the count of descriptors allocated in the descriptor heap, Root Signature always receives unbounded array with max amount of 4096 registers. + * */ + uint32_t binding_count; +}; + +/** + * @brief Defines export shader functions from a library shader. + * */ +struct WisShaderExport { + const char* entry_point; ///< Entry point of the shader. + WisRaytracingShaderType shader_type; ///< Type of the shader. + uint32_t shader_array_index; ///< Index of the shader in the shader array. +}; + +/** + * @brief Hit group description for Raytracing pipeline. + * */ +struct WisHitGroupDesc { + /** + * @brief Type of the hit group. + * HitGroupTypeTriangles - hit group for triangles. Uses closest hit shader and optionally any hit shader for transparency. + * HitGroupTypeProcedural - hit group for procedural geometry. Uses intersection shader and optionally any hit shader for transparency. + * */ + WisHitGroupType type; + uint32_t closest_hit_export_index; ///< Closest hit shader from WisShaderExport. + uint32_t any_hit_export_index; ///< Any hit shader. + uint32_t intersection_export_index; ///< Intersection shader. +}; + +/** + * @brief Shader binding table description for Raytracing pipeline. + * */ +struct WisShaderBindingTableInfo { + uint32_t entry_size; ///< Size/stride of the entry in bytes. + uint32_t table_start_alignment; ///< Alignment of the table start in bytes. +}; + +/** + * @brief Raytracing dispatch description for CommandList. + * */ +struct WisRaytracingDispatchDesc { + uint64_t ray_gen_shader_table_address; ///< Address of the ray generation shader table. + uint64_t miss_shader_table_address; ///< Address of the miss shader table. + uint64_t hit_group_table_address; ///< Address of the hit group shader table. + uint64_t callable_shader_table_address; ///< Address of the callable shader table. + uint32_t ray_gen_shader_table_size; ///< Size of the ray generation shader table in bytes. + uint32_t miss_shader_table_size; ///< Size of the miss shader table in bytes. + uint32_t hit_group_table_size; ///< Size of the hit group shader table in bytes. + uint32_t callable_shader_table_size; ///< Size of the callable shader table in bytes. + uint32_t miss_shader_table_stride; ///< Stride of the miss shader table in bytes. + uint32_t hit_group_table_stride; ///< Stride of the hit group shader table in bytes. + uint32_t callable_shader_table_stride; ///< Stride of the callable shader table in bytes. + uint32_t width; ///< Width of the dispatch in number of rays. + uint32_t height; ///< Height of the dispatch in number of rays. + uint32_t depth; ///< Depth of the dispatch in number of rays. +}; + +/** + * @brief Texture to texture copy region. + * */ +struct WisTextureCopyRegion { + WisTextureRegion src; ///< Source texture region. + WisTextureRegion dst; ///< Destination texture region. }; //------------------------------------------------------------------------- @@ -1940,8 +2129,11 @@ typedef struct VKShaderView VKShaderView; typedef struct VKRenderTargetView VKRenderTargetView; typedef struct VKRootSignatureView VKRootSignatureView; typedef struct VKTextureBarrier2 VKTextureBarrier2; +typedef struct VKRaytracingPipeineDesc VKRaytracingPipeineDesc; +typedef struct VKBottomLevelASBuildDesc VKBottomLevelASBuildDesc; typedef struct VKBufferBarrier2 VKBufferBarrier2; typedef struct VKGraphicsShaderStages VKGraphicsShaderStages; +typedef struct VKComputePipelineDesc VKComputePipelineDesc; typedef struct VKRenderPassRenderTargetDesc VKRenderPassRenderTargetDesc; typedef struct VKRenderPassDesc VKRenderPassDesc; typedef struct VKVertexBufferBinding VKVertexBufferBinding; @@ -2005,6 +2197,27 @@ struct VKGraphicsShaderStages { VKShaderView pixel; ///< Pixel shader. }; +/** + * @brief Raytracing pipeline descriptor for pipeline creation. + * */ +struct VKRaytracingPipeineDesc { + VKRootSignatureView root_signature; ///< Root signature. + const VKShaderView* shaders; ///< Shader libraries. + uint32_t shader_count; ///< Shader library count. + const WisShaderExport* exports; ///< Shader library exports (entry points). + uint32_t export_count; ///< Shader export count. + /** + * @brief Hit group descriptions. + * Note: Raygen and miss shaders don't have their dedicated shader groups, instead groups are defined in order of appearance in . + * And groups for SBTs are exported as raygen:miss:hit. + * */ + const WisHitGroupDesc* hit_groups; + uint32_t hit_group_count; ///< Hit group count. + uint32_t max_recursion_depth; ///< Max recursion depth. Default is 1. + uint32_t max_payload_size; ///< Max payload size. Default is 0. + uint32_t max_attribute_size; ///< Max attribute size. Default is 0. +}; + /** * @brief Variant of PipelineStateDesc for graphics pipeline. * */ @@ -2026,6 +2239,14 @@ struct VKGraphicsPipelineDesc { WisPipelineFlags flags; ///< Pipeline flags to add options to pipeline creation. }; +/** + * @brief Variant of PipelineStateDesc for compute pipeline. + * */ +struct VKComputePipelineDesc { + VKRootSignatureView root_signature; ///< Root signature. + VKShaderView shader; ///< Compute shader. +}; + /** * @brief Variant of RenderPassDesc for render target. * */ @@ -2099,6 +2320,7 @@ typedef struct VKDebugMessenger_t* VKDebugMessenger; typedef struct VKRenderTarget_t* VKRenderTarget; typedef struct VKSampler_t* VKSampler; typedef struct VKShaderResource_t* VKShaderResource; +typedef struct VKUnorderedAccessTexture_t* VKUnorderedAccessTexture; //------------------------------------------------------------------------- @@ -2243,41 +2465,39 @@ WISDOM_API WisResult VKDeviceCreateCommandList(VKDevice self, WisQueueType type, WISDOM_API WisResult VKDeviceCreateGraphicsPipeline(VKDevice self, const VKGraphicsPipelineDesc* desc, VKPipelineState* pipeline); /** - * @brief Creates a root signature object for use with DescriptorStorage. + * @brief Creates a compute pipeline state object. * @param self valid handle to the Device - * @param push_constants The root constants to create the root signature with. - * @param constants_count The number of push constants. Max is 5. - * @param push_descriptors The root descriptors to create the root signature with. - * In shader will appear in order of submission. e.g. push_descriptors[5] is [[vk::binding(5,0)]] ... : register(b5/t5/u5) - * @param descriptors_count The number of push descriptors. Max is 8. - * @param space_overlap_count Count of descriptor spaces to overlap for each of the DescriptorStorage types. - * Default is 1. Max is 16. This is used primarily for descriptor type aliasing. - * Example: If VKDevice is 2, that means that 2 descriptor spaces will be allocated for each descriptor type. - * [[vk::binding(0,0)]] SamplerState samplers: register(s0,space1); // space1 can be used for different type of samplers e.g. SamplerComparisonState - * [[vk::binding(0,0)]] SamplerComparisonState shadow_samplers: register(s0,space2); // they use the same binding (works like overloading) - * [[vk::binding(0,1)]] ConstantBuffer cbuffers: register(b0,space3); // this type also has 2 spaces, next will be on space 4 etc. - * @param signature VKRootSignature on success (StatusOk). + * @param desc The description of the compute pipeline to create. + * @param pipeline VKPipelineState on success (StatusOk). * @return Result with StatusOk on success. * Error in WisResult::error otherwise. * */ -WISDOM_API WisResult VKDeviceCreateRootSignature(VKDevice self, const WisPushConstant* push_constants, uint32_t constants_count, const WisPushDescriptor* push_descriptors, uint32_t descriptors_count, uint32_t space_overlap_count, VKRootSignature* signature); +WISDOM_API WisResult VKDeviceCreateComputePipeline(VKDevice self, const VKComputePipelineDesc* desc, VKPipelineState* pipeline); /** * @brief Creates a root signature object for use with DescriptorStorage. - * Supplies number of types for each descriptor type separately. + * DescriptorStorage is used for bindless and non-uniform bindings. Don't combine with Descriptor buffers, this may reduce performance. + * Push constants and push descriptors are used for fast changing data. + * Spaces may not overlap, but can be in any order. Push descriptors always have space0 and [[vk::binding(x,0)]]. + * That means that all the binding numbers are off by 1. Meaning that if you have Descriptor Storage with 1 binding, it will be [[vk::binding(0,1)]] + * even though it is supposed to be binding 0. This is done for consistency. + * Set number is the position of binding in bindings array. e.g. bindings[5] is set 5 and on HLSL side it is [[vk::binding(0,5)]]. + * For several overlapping types e.g. 2D and 3D textures, use different spaces. + * Those are specified in the bindings array. Space overlap count means how many consecutive spaces are used by the binding. * @param self valid handle to the Device * @param push_constants The root constants to create the root signature with. - * @param constants_count The number of push constants. Max is 5. + * @param push_constant_count The number of push constants. Max is 5. * @param push_descriptors The root descriptors to create the root signature with. - * In shader will appear in order of submission. e.g. root_descriptors[5] is [[vk::binding(5,0)]] ... : register(b5/t5/u5) - * @param push_descriptors_count The number of push descriptors. Max is 8. - * @param descriptor_spacing Descriptor spacing allocation. - * nullptr means allocate 1 space for each. + * In shader will appear in order of submission. e.g. push_descriptors[5] is [[vk::binding(5,0)]] ... : register(b5/t5/u5) + * @param push_descriptor_count The number of push descriptors. Max is 8. + * @param bindings The bindings to allocate. Order matters, binding count is ignored. + * One block of bindings can contain up to 4096 descriptors. For Sampler blocks, max amount of samplers across all bindings is 2048. + * @param binding_count Count of bindings to allocate. Max is 64 - push_constant_count - push_descriptor_count * 2. * @param signature VKRootSignature on success (StatusOk). * @return Result with StatusOk on success. * Error in WisResult::error otherwise. * */ -WISDOM_API WisResult VKDeviceCreateRootSignature2(VKDevice self, const WisPushConstant* push_constants, uint32_t constants_count, const WisPushDescriptor* push_descriptors, uint32_t push_descriptors_count, const WisDescriptorSpacing* descriptor_spacing, VKRootSignature* signature); +WISDOM_API WisResult VKDeviceCreateRootSignature(VKDevice self, const WisPushConstant* push_constants, uint32_t push_constant_count, const WisPushDescriptor* push_descriptors, uint32_t push_descriptor_count, const WisDescriptorBindingDesc* bindings, uint32_t binding_count, VKRootSignature* signature); /** * @brief Creates a shader object. @@ -2349,12 +2569,14 @@ WISDOM_API WisResult VKDeviceCreateShaderResource(VKDevice self, VKTexture textu * @brief Creates a descriptor storage object with specified number of bindings to allocate. * Switching between several DescriptorStorage is slow, consider allocating one big set and copy descriptors to it. * @param self valid handle to the Device - * @param desc The description of the descriptor storage to create. + * @param bindings The bindings to allocate. Space and space overlap counts are ignored. + * @param bindings_count The number of bindings to allocate. + * @param memory The memory to allocate the descriptors in. * @param storage VKDescriptorStorage on success (StatusOk). * @return Result with StatusOk on success. * Error in WisResult::error otherwise. * */ -WISDOM_API WisResult VKDeviceCreateDescriptorStorage(VKDevice self, const WisDescriptorStorageDesc* desc, VKDescriptorStorage* storage); +WISDOM_API WisResult VKDeviceCreateDescriptorStorage(VKDevice self, const WisDescriptorBindingDesc* bindings, uint32_t bindings_count, WisDescriptorMemory memory, VKDescriptorStorage* storage); /** * @brief Queries if the device supports the feature. @@ -2600,6 +2822,16 @@ WISDOM_API void VKCommandListCopyBufferToTexture(VKCommandList self, VKBuffer so * */ WISDOM_API void VKCommandListCopyTextureToBuffer(VKCommandList self, VKTexture source, VKBuffer destination, const WisBufferTextureCopyRegion* regions, uint32_t region_count); +/** + * @brief Copies data from one texture to another. + * @param self valid handle to the CommandList + * @param source The source texture to copy from. + * @param destination The destination texture to copy to. + * @param regions The regions to copy. + * @param region_count The number of regions to copy. + * */ +WISDOM_API void VKCommandListCopyTexture(VKCommandList self, VKTexture source, VKTexture destination, const WisTextureCopyRegion* regions, uint32_t region_count); + /** * @brief Sets the barrier on the buffer. * @param self valid handle to the CommandList @@ -2652,6 +2884,14 @@ WISDOM_API void VKCommandListEndRenderPass(VKCommandList self); * */ WISDOM_API void VKCommandListSetRootSignature(VKCommandList self, VKRootSignature root_signature); +/** + * @brief Sets the pipeline signature object to compute pipeline. Used to determine how to pick descriptors from descriptor buffer. + * May only work with compute pipelines. + * @param self valid handle to the CommandList + * @param root_signature The root signature to set. + * */ +WISDOM_API void VKCommandListSetComputeRootSignature(VKCommandList self, VKRootSignature root_signature); + /** * @brief Sets the primitive topology. Detemines how vertices shall be processed. * @param self valid handle to the CommandList @@ -2742,6 +2982,15 @@ WISDOM_API void VKCommandListDrawIndexedInstanced(VKCommandList self, uint32_t v * */ WISDOM_API void VKCommandListDrawInstanced(VKCommandList self, uint32_t vertex_count_per_instance, uint32_t instance_count, uint32_t start_vertex, uint32_t start_instance); +/** + * @brief Dispatches compute shader. + * @param self valid handle to the CommandList + * @param group_count_x The number of groups to dispatch in X dimension. + * @param group_count_y The number of groups to dispatch in Y dimension. Default is 1. + * @param group_count_z The number of groups to dispatch in Z dimension. Default is 1. + * */ +WISDOM_API void VKCommandListDispatch(VKCommandList self, uint32_t group_count_x, uint32_t group_count_y, uint32_t group_count_z); + /** * @brief Sets the root constants for the shader. * @param self valid handle to the CommandList @@ -2752,6 +3001,15 @@ WISDOM_API void VKCommandListDrawInstanced(VKCommandList self, uint32_t vertex_c * */ WISDOM_API void VKCommandListSetPushConstants(VKCommandList self, void* data, uint32_t size_4bytes, uint32_t offset_4bytes, WisShaderStages stage); +/** + * @brief Sets the root constants for the compute or raytracing shader. + * @param self valid handle to the CommandList + * @param data The data to set the root constants with. + * @param size_4bytes The size of the data in 4-byte units. + * @param offset_4bytes The offset in the data in 4-byte units. + * */ +WISDOM_API void VKCommandListSetComputePushConstants(VKCommandList self, void* data, uint32_t size_4bytes, uint32_t offset_4bytes); + /** * @brief Pushes descriptor directly to the command list, without putting it to the table. * Works only with buffer bindings. @@ -2764,6 +3022,33 @@ WISDOM_API void VKCommandListSetPushConstants(VKCommandList self, void* data, ui * */ WISDOM_API void VKCommandListPushDescriptor(VKCommandList self, WisDescriptorType type, uint32_t root_index, VKBuffer buffer, uint32_t offset); +/** + * @brief Pushes descriptor directly to the command list, without putting it to the table. + * Works only with buffer bindings. + * Works with compute or raytracing pipelines. + * Buffer is always bound with full size. + * @param self valid handle to the CommandList + * @param type The type of the descriptor to set. + * @param root_index The index of the root descriptor to set. + * @param buffer The buffer to set. + * @param offset The offset in the descriptor table to set the descriptor to. + * */ +WISDOM_API void VKCommandListPushDescriptorCompute(VKCommandList self, WisDescriptorType type, uint32_t root_index, VKBuffer buffer, uint32_t offset); + +/** + * @brief Sets the descriptor storage object for graphics pipeline. + * @param self valid handle to the CommandList + * @param storage The descriptor storage to set. + * */ +WISDOM_API void VKCommandListSetDescriptorStorage(VKCommandList self, VKDescriptorStorage storage); + +/** + * @brief Sets the descriptor storage object for compute pipeline. + * @param self valid handle to the CommandList + * @param storage The descriptor storage to set. + * */ +WISDOM_API void VKCommandListSetComputeDescriptorStorage(VKCommandList self, VKDescriptorStorage storage); + // VKSwapChain methods -- /** * @brief Destroys the VKSwapChain. @@ -2847,6 +3132,13 @@ WISDOM_API void* VKBufferMapRaw(VKBuffer self); * */ WISDOM_API void VKBufferUnmap(VKBuffer self); +/** + * @brief Returns the address of the resource in GPU memory. + * @param self valid handle to the Buffer + * @return The address of the resource in GPU memory. + * */ +WISDOM_API uint64_t VKBufferGetGPUAddress(VKBuffer self); + // VKTexture methods -- /** * @brief Destroys the VKTexture. @@ -2864,29 +3156,32 @@ WISDOM_API void VKDescriptorStorageDestroy(VKDescriptorStorage self); /** * @brief Writes the sampler to the sampler descriptor storage. * @param self valid handle to the DescriptorStorage - * @param index Index in array of samplers to fill. + * @param set_index Index in storage sets, defined by the place in the binding array at the creation. + * @param binding Index in array of samplers to fill. * @param sampler The sampler to write. * */ -WISDOM_API void VKDescriptorStorageWriteSampler(VKDescriptorStorage self, uint32_t index, VKSampler sampler); +WISDOM_API void VKDescriptorStorageWriteSampler(VKDescriptorStorage self, uint32_t set_index, uint32_t binding, VKSampler sampler); /** * @brief Writes the constant buffer to the constant buffer descriptor storage. * @param self valid handle to the DescriptorStorage - * @param index Index in array of constant buffers to fill. + * @param set_index Index in storage sets, defined by the place in the binding array at the creation. + * @param binding Index in array of constant buffers to fill. * @param buffer The buffer to write. * @param size The size of the constant buffer in bytes. * @param offset The offset in the buffer to write the constant buffer to. * size + offset must be less or equal the overall size of the bound buffer. * */ -WISDOM_API void VKDescriptorStorageWriteConstantBuffer(VKDescriptorStorage self, uint32_t index, VKBuffer buffer, uint32_t size, uint32_t offset); +WISDOM_API void VKDescriptorStorageWriteConstantBuffer(VKDescriptorStorage self, uint32_t set_index, uint32_t binding, VKBuffer buffer, uint32_t size, uint32_t offset); /** * @brief Writes the texture to the shader resource descriptor storage. * @param self valid handle to the DescriptorStorage - * @param index Index in array of shader resources to fill. + * @param set_index Index in storage sets, defined by the place in the binding array at the creation. + * @param binding Index in array of shader resources to fill. * @param resource The shader resource to write. * */ -WISDOM_API void VKDescriptorStorageWriteTexture(VKDescriptorStorage self, uint32_t index, VKShaderResource resource); +WISDOM_API void VKDescriptorStorageWriteTexture(VKDescriptorStorage self, uint32_t set_index, uint32_t binding, VKShaderResource resource); // VKRootSignature methods -- /** @@ -2981,8 +3276,11 @@ typedef struct DX12ShaderView DX12ShaderView; typedef struct DX12RenderTargetView DX12RenderTargetView; typedef struct DX12RootSignatureView DX12RootSignatureView; typedef struct DX12TextureBarrier2 DX12TextureBarrier2; +typedef struct DX12RaytracingPipeineDesc DX12RaytracingPipeineDesc; +typedef struct DX12BottomLevelASBuildDesc DX12BottomLevelASBuildDesc; typedef struct DX12BufferBarrier2 DX12BufferBarrier2; typedef struct DX12GraphicsShaderStages DX12GraphicsShaderStages; +typedef struct DX12ComputePipelineDesc DX12ComputePipelineDesc; typedef struct DX12RenderPassRenderTargetDesc DX12RenderPassRenderTargetDesc; typedef struct DX12RenderPassDesc DX12RenderPassDesc; typedef struct DX12VertexBufferBinding DX12VertexBufferBinding; @@ -3045,6 +3343,27 @@ struct DX12GraphicsShaderStages { DX12ShaderView pixel; ///< Pixel shader. }; +/** + * @brief Raytracing pipeline descriptor for pipeline creation. + * */ +struct DX12RaytracingPipeineDesc { + DX12RootSignatureView root_signature; ///< Root signature. + const DX12ShaderView* shaders; ///< Shader libraries. + uint32_t shader_count; ///< Shader library count. + const WisShaderExport* exports; ///< Shader library exports (entry points). + uint32_t export_count; ///< Shader export count. + /** + * @brief Hit group descriptions. + * Note: Raygen and miss shaders don't have their dedicated shader groups, instead groups are defined in order of appearance in . + * And groups for SBTs are exported as raygen:miss:hit. + * */ + const WisHitGroupDesc* hit_groups; + uint32_t hit_group_count; ///< Hit group count. + uint32_t max_recursion_depth; ///< Max recursion depth. Default is 1. + uint32_t max_payload_size; ///< Max payload size. Default is 0. + uint32_t max_attribute_size; ///< Max attribute size. Default is 0. +}; + /** * @brief Variant of PipelineStateDesc for graphics pipeline. * */ @@ -3066,6 +3385,14 @@ struct DX12GraphicsPipelineDesc { WisPipelineFlags flags; ///< Pipeline flags to add options to pipeline creation. }; +/** + * @brief Variant of PipelineStateDesc for compute pipeline. + * */ +struct DX12ComputePipelineDesc { + DX12RootSignatureView root_signature; ///< Root signature. + DX12ShaderView shader; ///< Compute shader. +}; + /** * @brief Variant of RenderPassDesc for render target. * */ @@ -3139,6 +3466,7 @@ typedef struct DX12DebugMessenger_t* DX12DebugMessenger; typedef struct DX12RenderTarget_t* DX12RenderTarget; typedef struct DX12Sampler_t* DX12Sampler; typedef struct DX12ShaderResource_t* DX12ShaderResource; +typedef struct DX12UnorderedAccessTexture_t* DX12UnorderedAccessTexture; //------------------------------------------------------------------------- @@ -3283,41 +3611,39 @@ WISDOM_API WisResult DX12DeviceCreateCommandList(DX12Device self, WisQueueType t WISDOM_API WisResult DX12DeviceCreateGraphicsPipeline(DX12Device self, const DX12GraphicsPipelineDesc* desc, DX12PipelineState* pipeline); /** - * @brief Creates a root signature object for use with DescriptorStorage. + * @brief Creates a compute pipeline state object. * @param self valid handle to the Device - * @param push_constants The root constants to create the root signature with. - * @param constants_count The number of push constants. Max is 5. - * @param push_descriptors The root descriptors to create the root signature with. - * In shader will appear in order of submission. e.g. push_descriptors[5] is [[vk::binding(5,0)]] ... : register(b5/t5/u5) - * @param descriptors_count The number of push descriptors. Max is 8. - * @param space_overlap_count Count of descriptor spaces to overlap for each of the DescriptorStorage types. - * Default is 1. Max is 16. This is used primarily for descriptor type aliasing. - * Example: If DX12Device is 2, that means that 2 descriptor spaces will be allocated for each descriptor type. - * [[vk::binding(0,0)]] SamplerState samplers: register(s0,space1); // space1 can be used for different type of samplers e.g. SamplerComparisonState - * [[vk::binding(0,0)]] SamplerComparisonState shadow_samplers: register(s0,space2); // they use the same binding (works like overloading) - * [[vk::binding(0,1)]] ConstantBuffer cbuffers: register(b0,space3); // this type also has 2 spaces, next will be on space 4 etc. - * @param signature DX12RootSignature on success (StatusOk). + * @param desc The description of the compute pipeline to create. + * @param pipeline DX12PipelineState on success (StatusOk). * @return Result with StatusOk on success. * Error in WisResult::error otherwise. * */ -WISDOM_API WisResult DX12DeviceCreateRootSignature(DX12Device self, const WisPushConstant* push_constants, uint32_t constants_count, const WisPushDescriptor* push_descriptors, uint32_t descriptors_count, uint32_t space_overlap_count, DX12RootSignature* signature); +WISDOM_API WisResult DX12DeviceCreateComputePipeline(DX12Device self, const DX12ComputePipelineDesc* desc, DX12PipelineState* pipeline); /** * @brief Creates a root signature object for use with DescriptorStorage. - * Supplies number of types for each descriptor type separately. + * DescriptorStorage is used for bindless and non-uniform bindings. Don't combine with Descriptor buffers, this may reduce performance. + * Push constants and push descriptors are used for fast changing data. + * Spaces may not overlap, but can be in any order. Push descriptors always have space0 and [[vk::binding(x,0)]]. + * That means that all the binding numbers are off by 1. Meaning that if you have Descriptor Storage with 1 binding, it will be [[vk::binding(0,1)]] + * even though it is supposed to be binding 0. This is done for consistency. + * Set number is the position of binding in bindings array. e.g. bindings[5] is set 5 and on HLSL side it is [[vk::binding(0,5)]]. + * For several overlapping types e.g. 2D and 3D textures, use different spaces. + * Those are specified in the bindings array. Space overlap count means how many consecutive spaces are used by the binding. * @param self valid handle to the Device * @param push_constants The root constants to create the root signature with. - * @param constants_count The number of push constants. Max is 5. + * @param push_constant_count The number of push constants. Max is 5. * @param push_descriptors The root descriptors to create the root signature with. - * In shader will appear in order of submission. e.g. root_descriptors[5] is [[vk::binding(5,0)]] ... : register(b5/t5/u5) - * @param push_descriptors_count The number of push descriptors. Max is 8. - * @param descriptor_spacing Descriptor spacing allocation. - * nullptr means allocate 1 space for each. + * In shader will appear in order of submission. e.g. push_descriptors[5] is [[vk::binding(5,0)]] ... : register(b5/t5/u5) + * @param push_descriptor_count The number of push descriptors. Max is 8. + * @param bindings The bindings to allocate. Order matters, binding count is ignored. + * One block of bindings can contain up to 4096 descriptors. For Sampler blocks, max amount of samplers across all bindings is 2048. + * @param binding_count Count of bindings to allocate. Max is 64 - push_constant_count - push_descriptor_count * 2. * @param signature DX12RootSignature on success (StatusOk). * @return Result with StatusOk on success. * Error in WisResult::error otherwise. * */ -WISDOM_API WisResult DX12DeviceCreateRootSignature2(DX12Device self, const WisPushConstant* push_constants, uint32_t constants_count, const WisPushDescriptor* push_descriptors, uint32_t push_descriptors_count, const WisDescriptorSpacing* descriptor_spacing, DX12RootSignature* signature); +WISDOM_API WisResult DX12DeviceCreateRootSignature(DX12Device self, const WisPushConstant* push_constants, uint32_t push_constant_count, const WisPushDescriptor* push_descriptors, uint32_t push_descriptor_count, const WisDescriptorBindingDesc* bindings, uint32_t binding_count, DX12RootSignature* signature); /** * @brief Creates a shader object. @@ -3389,12 +3715,14 @@ WISDOM_API WisResult DX12DeviceCreateShaderResource(DX12Device self, DX12Texture * @brief Creates a descriptor storage object with specified number of bindings to allocate. * Switching between several DescriptorStorage is slow, consider allocating one big set and copy descriptors to it. * @param self valid handle to the Device - * @param desc The description of the descriptor storage to create. + * @param bindings The bindings to allocate. Space and space overlap counts are ignored. + * @param bindings_count The number of bindings to allocate. + * @param memory The memory to allocate the descriptors in. * @param storage DX12DescriptorStorage on success (StatusOk). * @return Result with StatusOk on success. * Error in WisResult::error otherwise. * */ -WISDOM_API WisResult DX12DeviceCreateDescriptorStorage(DX12Device self, const WisDescriptorStorageDesc* desc, DX12DescriptorStorage* storage); +WISDOM_API WisResult DX12DeviceCreateDescriptorStorage(DX12Device self, const WisDescriptorBindingDesc* bindings, uint32_t bindings_count, WisDescriptorMemory memory, DX12DescriptorStorage* storage); /** * @brief Queries if the device supports the feature. @@ -3640,6 +3968,16 @@ WISDOM_API void DX12CommandListCopyBufferToTexture(DX12CommandList self, DX12Buf * */ WISDOM_API void DX12CommandListCopyTextureToBuffer(DX12CommandList self, DX12Texture source, DX12Buffer destination, const WisBufferTextureCopyRegion* regions, uint32_t region_count); +/** + * @brief Copies data from one texture to another. + * @param self valid handle to the CommandList + * @param source The source texture to copy from. + * @param destination The destination texture to copy to. + * @param regions The regions to copy. + * @param region_count The number of regions to copy. + * */ +WISDOM_API void DX12CommandListCopyTexture(DX12CommandList self, DX12Texture source, DX12Texture destination, const WisTextureCopyRegion* regions, uint32_t region_count); + /** * @brief Sets the barrier on the buffer. * @param self valid handle to the CommandList @@ -3692,6 +4030,14 @@ WISDOM_API void DX12CommandListEndRenderPass(DX12CommandList self); * */ WISDOM_API void DX12CommandListSetRootSignature(DX12CommandList self, DX12RootSignature root_signature); +/** + * @brief Sets the pipeline signature object to compute pipeline. Used to determine how to pick descriptors from descriptor buffer. + * May only work with compute pipelines. + * @param self valid handle to the CommandList + * @param root_signature The root signature to set. + * */ +WISDOM_API void DX12CommandListSetComputeRootSignature(DX12CommandList self, DX12RootSignature root_signature); + /** * @brief Sets the primitive topology. Detemines how vertices shall be processed. * @param self valid handle to the CommandList @@ -3782,6 +4128,15 @@ WISDOM_API void DX12CommandListDrawIndexedInstanced(DX12CommandList self, uint32 * */ WISDOM_API void DX12CommandListDrawInstanced(DX12CommandList self, uint32_t vertex_count_per_instance, uint32_t instance_count, uint32_t start_vertex, uint32_t start_instance); +/** + * @brief Dispatches compute shader. + * @param self valid handle to the CommandList + * @param group_count_x The number of groups to dispatch in X dimension. + * @param group_count_y The number of groups to dispatch in Y dimension. Default is 1. + * @param group_count_z The number of groups to dispatch in Z dimension. Default is 1. + * */ +WISDOM_API void DX12CommandListDispatch(DX12CommandList self, uint32_t group_count_x, uint32_t group_count_y, uint32_t group_count_z); + /** * @brief Sets the root constants for the shader. * @param self valid handle to the CommandList @@ -3792,6 +4147,15 @@ WISDOM_API void DX12CommandListDrawInstanced(DX12CommandList self, uint32_t vert * */ WISDOM_API void DX12CommandListSetPushConstants(DX12CommandList self, void* data, uint32_t size_4bytes, uint32_t offset_4bytes, WisShaderStages stage); +/** + * @brief Sets the root constants for the compute or raytracing shader. + * @param self valid handle to the CommandList + * @param data The data to set the root constants with. + * @param size_4bytes The size of the data in 4-byte units. + * @param offset_4bytes The offset in the data in 4-byte units. + * */ +WISDOM_API void DX12CommandListSetComputePushConstants(DX12CommandList self, void* data, uint32_t size_4bytes, uint32_t offset_4bytes); + /** * @brief Pushes descriptor directly to the command list, without putting it to the table. * Works only with buffer bindings. @@ -3804,6 +4168,33 @@ WISDOM_API void DX12CommandListSetPushConstants(DX12CommandList self, void* data * */ WISDOM_API void DX12CommandListPushDescriptor(DX12CommandList self, WisDescriptorType type, uint32_t root_index, DX12Buffer buffer, uint32_t offset); +/** + * @brief Pushes descriptor directly to the command list, without putting it to the table. + * Works only with buffer bindings. + * Works with compute or raytracing pipelines. + * Buffer is always bound with full size. + * @param self valid handle to the CommandList + * @param type The type of the descriptor to set. + * @param root_index The index of the root descriptor to set. + * @param buffer The buffer to set. + * @param offset The offset in the descriptor table to set the descriptor to. + * */ +WISDOM_API void DX12CommandListPushDescriptorCompute(DX12CommandList self, WisDescriptorType type, uint32_t root_index, DX12Buffer buffer, uint32_t offset); + +/** + * @brief Sets the descriptor storage object for graphics pipeline. + * @param self valid handle to the CommandList + * @param storage The descriptor storage to set. + * */ +WISDOM_API void DX12CommandListSetDescriptorStorage(DX12CommandList self, DX12DescriptorStorage storage); + +/** + * @brief Sets the descriptor storage object for compute pipeline. + * @param self valid handle to the CommandList + * @param storage The descriptor storage to set. + * */ +WISDOM_API void DX12CommandListSetComputeDescriptorStorage(DX12CommandList self, DX12DescriptorStorage storage); + // DX12SwapChain methods -- /** * @brief Destroys the DX12SwapChain. @@ -3887,6 +4278,13 @@ WISDOM_API void* DX12BufferMapRaw(DX12Buffer self); * */ WISDOM_API void DX12BufferUnmap(DX12Buffer self); +/** + * @brief Returns the address of the resource in GPU memory. + * @param self valid handle to the Buffer + * @return The address of the resource in GPU memory. + * */ +WISDOM_API uint64_t DX12BufferGetGPUAddress(DX12Buffer self); + // DX12Texture methods -- /** * @brief Destroys the DX12Texture. @@ -3904,29 +4302,32 @@ WISDOM_API void DX12DescriptorStorageDestroy(DX12DescriptorStorage self); /** * @brief Writes the sampler to the sampler descriptor storage. * @param self valid handle to the DescriptorStorage - * @param index Index in array of samplers to fill. + * @param set_index Index in storage sets, defined by the place in the binding array at the creation. + * @param binding Index in array of samplers to fill. * @param sampler The sampler to write. * */ -WISDOM_API void DX12DescriptorStorageWriteSampler(DX12DescriptorStorage self, uint32_t index, DX12Sampler sampler); +WISDOM_API void DX12DescriptorStorageWriteSampler(DX12DescriptorStorage self, uint32_t set_index, uint32_t binding, DX12Sampler sampler); /** * @brief Writes the constant buffer to the constant buffer descriptor storage. * @param self valid handle to the DescriptorStorage - * @param index Index in array of constant buffers to fill. + * @param set_index Index in storage sets, defined by the place in the binding array at the creation. + * @param binding Index in array of constant buffers to fill. * @param buffer The buffer to write. * @param size The size of the constant buffer in bytes. * @param offset The offset in the buffer to write the constant buffer to. * size + offset must be less or equal the overall size of the bound buffer. * */ -WISDOM_API void DX12DescriptorStorageWriteConstantBuffer(DX12DescriptorStorage self, uint32_t index, DX12Buffer buffer, uint32_t size, uint32_t offset); +WISDOM_API void DX12DescriptorStorageWriteConstantBuffer(DX12DescriptorStorage self, uint32_t set_index, uint32_t binding, DX12Buffer buffer, uint32_t size, uint32_t offset); /** * @brief Writes the texture to the shader resource descriptor storage. * @param self valid handle to the DescriptorStorage - * @param index Index in array of shader resources to fill. + * @param set_index Index in storage sets, defined by the place in the binding array at the creation. + * @param binding Index in array of shader resources to fill. * @param resource The shader resource to write. * */ -WISDOM_API void DX12DescriptorStorageWriteTexture(DX12DescriptorStorage self, uint32_t index, DX12ShaderResource resource); +WISDOM_API void DX12DescriptorStorageWriteTexture(DX12DescriptorStorage self, uint32_t set_index, uint32_t binding, DX12ShaderResource resource); // DX12RootSignature methods -- /** @@ -4038,6 +4439,7 @@ typedef DX12DebugMessenger WisDebugMessenger; typedef DX12RenderTarget WisRenderTarget; typedef DX12Sampler WisSampler; typedef DX12ShaderResource WisShaderResource; +typedef DX12UnorderedAccessTexture WisUnorderedAccessTexture; typedef DX12FenceView WisFenceView; typedef DX12BufferView WisBufferView; typedef DX12TextureView WisTextureView; @@ -4048,7 +4450,9 @@ typedef DX12RootSignatureView WisRootSignatureView; typedef DX12BufferBarrier2 WisBufferBarrier2; typedef DX12TextureBarrier2 WisTextureBarrier2; typedef DX12GraphicsShaderStages WisGraphicsShaderStages; +typedef DX12RaytracingPipeineDesc WisRaytracingPipeineDesc; typedef DX12GraphicsPipelineDesc WisGraphicsPipelineDesc; +typedef DX12ComputePipelineDesc WisComputePipelineDesc; typedef DX12RenderPassRenderTargetDesc WisRenderPassRenderTargetDesc; typedef DX12RenderPassDepthStencilDesc WisRenderPassDepthStencilDesc; typedef DX12RenderPassDesc WisRenderPassDesc; @@ -4242,46 +4646,44 @@ inline WisResult WisDeviceCreateGraphicsPipeline(WisDevice self, const WisGraphi } /** - * @brief Creates a root signature object for use with DescriptorStorage. + * @brief Creates a compute pipeline state object. * @param self valid handle to the Device - * @param push_constants The root constants to create the root signature with. - * @param constants_count The number of push constants. Max is 5. - * @param push_descriptors The root descriptors to create the root signature with. - * In shader will appear in order of submission. e.g. push_descriptors[5] is [[vk::binding(5,0)]] ... : register(b5/t5/u5) - * @param descriptors_count The number of push descriptors. Max is 8. - * @param space_overlap_count Count of descriptor spaces to overlap for each of the DescriptorStorage types. - * Default is 1. Max is 16. This is used primarily for descriptor type aliasing. - * Example: If WisDevice is 2, that means that 2 descriptor spaces will be allocated for each descriptor type. - * [[vk::binding(0,0)]] SamplerState samplers: register(s0,space1); // space1 can be used for different type of samplers e.g. SamplerComparisonState - * [[vk::binding(0,0)]] SamplerComparisonState shadow_samplers: register(s0,space2); // they use the same binding (works like overloading) - * [[vk::binding(0,1)]] ConstantBuffer cbuffers: register(b0,space3); // this type also has 2 spaces, next will be on space 4 etc. - * @param signature WisRootSignature on success (StatusOk). + * @param desc The description of the compute pipeline to create. + * @param pipeline WisPipelineState on success (StatusOk). * @return Result with StatusOk on success. * Error in WisResult::error otherwise. * */ -inline WisResult WisDeviceCreateRootSignature(WisDevice self, const WisPushConstant* push_constants, uint32_t constants_count, const WisPushDescriptor* push_descriptors, uint32_t descriptors_count, uint32_t space_overlap_count, WisRootSignature* signature) +inline WisResult WisDeviceCreateComputePipeline(WisDevice self, const WisComputePipelineDesc* desc, WisPipelineState* pipeline) { - return DX12DeviceCreateRootSignature(self, push_constants, constants_count, push_descriptors, descriptors_count, space_overlap_count, signature); + return DX12DeviceCreateComputePipeline(self, desc, pipeline); } /** * @brief Creates a root signature object for use with DescriptorStorage. - * Supplies number of types for each descriptor type separately. + * DescriptorStorage is used for bindless and non-uniform bindings. Don't combine with Descriptor buffers, this may reduce performance. + * Push constants and push descriptors are used for fast changing data. + * Spaces may not overlap, but can be in any order. Push descriptors always have space0 and [[vk::binding(x,0)]]. + * That means that all the binding numbers are off by 1. Meaning that if you have Descriptor Storage with 1 binding, it will be [[vk::binding(0,1)]] + * even though it is supposed to be binding 0. This is done for consistency. + * Set number is the position of binding in bindings array. e.g. bindings[5] is set 5 and on HLSL side it is [[vk::binding(0,5)]]. + * For several overlapping types e.g. 2D and 3D textures, use different spaces. + * Those are specified in the bindings array. Space overlap count means how many consecutive spaces are used by the binding. * @param self valid handle to the Device * @param push_constants The root constants to create the root signature with. - * @param constants_count The number of push constants. Max is 5. + * @param push_constant_count The number of push constants. Max is 5. * @param push_descriptors The root descriptors to create the root signature with. - * In shader will appear in order of submission. e.g. root_descriptors[5] is [[vk::binding(5,0)]] ... : register(b5/t5/u5) - * @param push_descriptors_count The number of push descriptors. Max is 8. - * @param descriptor_spacing Descriptor spacing allocation. - * nullptr means allocate 1 space for each. + * In shader will appear in order of submission. e.g. push_descriptors[5] is [[vk::binding(5,0)]] ... : register(b5/t5/u5) + * @param push_descriptor_count The number of push descriptors. Max is 8. + * @param bindings The bindings to allocate. Order matters, binding count is ignored. + * One block of bindings can contain up to 4096 descriptors. For Sampler blocks, max amount of samplers across all bindings is 2048. + * @param binding_count Count of bindings to allocate. Max is 64 - push_constant_count - push_descriptor_count * 2. * @param signature WisRootSignature on success (StatusOk). * @return Result with StatusOk on success. * Error in WisResult::error otherwise. * */ -inline WisResult WisDeviceCreateRootSignature2(WisDevice self, const WisPushConstant* push_constants, uint32_t constants_count, const WisPushDescriptor* push_descriptors, uint32_t push_descriptors_count, const WisDescriptorSpacing* descriptor_spacing, WisRootSignature* signature) +inline WisResult WisDeviceCreateRootSignature(WisDevice self, const WisPushConstant* push_constants, uint32_t push_constant_count, const WisPushDescriptor* push_descriptors, uint32_t push_descriptor_count, const WisDescriptorBindingDesc* bindings, uint32_t binding_count, WisRootSignature* signature) { - return DX12DeviceCreateRootSignature2(self, push_constants, constants_count, push_descriptors, push_descriptors_count, descriptor_spacing, signature); + return DX12DeviceCreateRootSignature(self, push_constants, push_constant_count, push_descriptors, push_descriptor_count, bindings, binding_count, signature); } /** @@ -4372,14 +4774,16 @@ inline WisResult WisDeviceCreateShaderResource(WisDevice self, WisTexture textur * @brief Creates a descriptor storage object with specified number of bindings to allocate. * Switching between several DescriptorStorage is slow, consider allocating one big set and copy descriptors to it. * @param self valid handle to the Device - * @param desc The description of the descriptor storage to create. + * @param bindings The bindings to allocate. Space and space overlap counts are ignored. + * @param bindings_count The number of bindings to allocate. + * @param memory The memory to allocate the descriptors in. * @param storage WisDescriptorStorage on success (StatusOk). * @return Result with StatusOk on success. * Error in WisResult::error otherwise. * */ -inline WisResult WisDeviceCreateDescriptorStorage(WisDevice self, const WisDescriptorStorageDesc* desc, WisDescriptorStorage* storage) +inline WisResult WisDeviceCreateDescriptorStorage(WisDevice self, const WisDescriptorBindingDesc* bindings, uint32_t bindings_count, WisDescriptorMemory memory, WisDescriptorStorage* storage) { - return DX12DeviceCreateDescriptorStorage(self, desc, storage); + return DX12DeviceCreateDescriptorStorage(self, bindings, bindings_count, memory, storage); } /** @@ -4704,6 +5108,19 @@ inline void WisCommandListCopyTextureToBuffer(WisCommandList self, WisTexture so DX12CommandListCopyTextureToBuffer(self, source, destination, regions, region_count); } +/** + * @brief Copies data from one texture to another. + * @param self valid handle to the CommandList + * @param source The source texture to copy from. + * @param destination The destination texture to copy to. + * @param regions The regions to copy. + * @param region_count The number of regions to copy. + * */ +inline void WisCommandListCopyTexture(WisCommandList self, WisTexture source, WisTexture destination, const WisTextureCopyRegion* regions, uint32_t region_count) +{ + DX12CommandListCopyTexture(self, source, destination, regions, region_count); +} + /** * @brief Sets the barrier on the buffer. * @param self valid handle to the CommandList @@ -4777,6 +5194,17 @@ inline void WisCommandListSetRootSignature(WisCommandList self, WisRootSignature DX12CommandListSetRootSignature(self, root_signature); } +/** + * @brief Sets the pipeline signature object to compute pipeline. Used to determine how to pick descriptors from descriptor buffer. + * May only work with compute pipelines. + * @param self valid handle to the CommandList + * @param root_signature The root signature to set. + * */ +inline void WisCommandListSetComputeRootSignature(WisCommandList self, WisRootSignature root_signature) +{ + DX12CommandListSetComputeRootSignature(self, root_signature); +} + /** * @brief Sets the primitive topology. Detemines how vertices shall be processed. * @param self valid handle to the CommandList @@ -4897,6 +5325,18 @@ inline void WisCommandListDrawInstanced(WisCommandList self, uint32_t vertex_cou DX12CommandListDrawInstanced(self, vertex_count_per_instance, instance_count, start_vertex, start_instance); } +/** + * @brief Dispatches compute shader. + * @param self valid handle to the CommandList + * @param group_count_x The number of groups to dispatch in X dimension. + * @param group_count_y The number of groups to dispatch in Y dimension. Default is 1. + * @param group_count_z The number of groups to dispatch in Z dimension. Default is 1. + * */ +inline void WisCommandListDispatch(WisCommandList self, uint32_t group_count_x, uint32_t group_count_y, uint32_t group_count_z) +{ + DX12CommandListDispatch(self, group_count_x, group_count_y, group_count_z); +} + /** * @brief Sets the root constants for the shader. * @param self valid handle to the CommandList @@ -4910,6 +5350,18 @@ inline void WisCommandListSetPushConstants(WisCommandList self, void* data, uint DX12CommandListSetPushConstants(self, data, size_4bytes, offset_4bytes, stage); } +/** + * @brief Sets the root constants for the compute or raytracing shader. + * @param self valid handle to the CommandList + * @param data The data to set the root constants with. + * @param size_4bytes The size of the data in 4-byte units. + * @param offset_4bytes The offset in the data in 4-byte units. + * */ +inline void WisCommandListSetComputePushConstants(WisCommandList self, void* data, uint32_t size_4bytes, uint32_t offset_4bytes) +{ + DX12CommandListSetComputePushConstants(self, data, size_4bytes, offset_4bytes); +} + /** * @brief Pushes descriptor directly to the command list, without putting it to the table. * Works only with buffer bindings. @@ -4925,6 +5377,42 @@ inline void WisCommandListPushDescriptor(WisCommandList self, WisDescriptorType DX12CommandListPushDescriptor(self, type, root_index, buffer, offset); } +/** + * @brief Pushes descriptor directly to the command list, without putting it to the table. + * Works only with buffer bindings. + * Works with compute or raytracing pipelines. + * Buffer is always bound with full size. + * @param self valid handle to the CommandList + * @param type The type of the descriptor to set. + * @param root_index The index of the root descriptor to set. + * @param buffer The buffer to set. + * @param offset The offset in the descriptor table to set the descriptor to. + * */ +inline void WisCommandListPushDescriptorCompute(WisCommandList self, WisDescriptorType type, uint32_t root_index, WisBuffer buffer, uint32_t offset) +{ + DX12CommandListPushDescriptorCompute(self, type, root_index, buffer, offset); +} + +/** + * @brief Sets the descriptor storage object for graphics pipeline. + * @param self valid handle to the CommandList + * @param storage The descriptor storage to set. + * */ +inline void WisCommandListSetDescriptorStorage(WisCommandList self, WisDescriptorStorage storage) +{ + DX12CommandListSetDescriptorStorage(self, storage); +} + +/** + * @brief Sets the descriptor storage object for compute pipeline. + * @param self valid handle to the CommandList + * @param storage The descriptor storage to set. + * */ +inline void WisCommandListSetComputeDescriptorStorage(WisCommandList self, WisDescriptorStorage storage) +{ + DX12CommandListSetComputeDescriptorStorage(self, storage); +} + // WisSwapChain methods -- /** * @brief Destroys the WisSwapChain. @@ -5041,6 +5529,16 @@ inline void WisBufferUnmap(WisBuffer self) DX12BufferUnmap(self); } +/** + * @brief Returns the address of the resource in GPU memory. + * @param self valid handle to the Buffer + * @return The address of the resource in GPU memory. + * */ +inline uint64_t WisBufferGetGPUAddress(WisBuffer self) +{ + return DX12BufferGetGPUAddress(self); +} + // WisTexture methods -- /** * @brief Destroys the WisTexture. @@ -5064,37 +5562,40 @@ inline void WisDescriptorStorageDestroy(WisDescriptorStorage self) /** * @brief Writes the sampler to the sampler descriptor storage. * @param self valid handle to the DescriptorStorage - * @param index Index in array of samplers to fill. + * @param set_index Index in storage sets, defined by the place in the binding array at the creation. + * @param binding Index in array of samplers to fill. * @param sampler The sampler to write. * */ -inline void WisDescriptorStorageWriteSampler(WisDescriptorStorage self, uint32_t index, WisSampler sampler) +inline void WisDescriptorStorageWriteSampler(WisDescriptorStorage self, uint32_t set_index, uint32_t binding, WisSampler sampler) { - DX12DescriptorStorageWriteSampler(self, index, sampler); + DX12DescriptorStorageWriteSampler(self, set_index, binding, sampler); } /** * @brief Writes the constant buffer to the constant buffer descriptor storage. * @param self valid handle to the DescriptorStorage - * @param index Index in array of constant buffers to fill. + * @param set_index Index in storage sets, defined by the place in the binding array at the creation. + * @param binding Index in array of constant buffers to fill. * @param buffer The buffer to write. * @param size The size of the constant buffer in bytes. * @param offset The offset in the buffer to write the constant buffer to. * size + offset must be less or equal the overall size of the bound buffer. * */ -inline void WisDescriptorStorageWriteConstantBuffer(WisDescriptorStorage self, uint32_t index, WisBuffer buffer, uint32_t size, uint32_t offset) +inline void WisDescriptorStorageWriteConstantBuffer(WisDescriptorStorage self, uint32_t set_index, uint32_t binding, WisBuffer buffer, uint32_t size, uint32_t offset) { - DX12DescriptorStorageWriteConstantBuffer(self, index, buffer, size, offset); + DX12DescriptorStorageWriteConstantBuffer(self, set_index, binding, buffer, size, offset); } /** * @brief Writes the texture to the shader resource descriptor storage. * @param self valid handle to the DescriptorStorage - * @param index Index in array of shader resources to fill. + * @param set_index Index in storage sets, defined by the place in the binding array at the creation. + * @param binding Index in array of shader resources to fill. * @param resource The shader resource to write. * */ -inline void WisDescriptorStorageWriteTexture(WisDescriptorStorage self, uint32_t index, WisShaderResource resource) +inline void WisDescriptorStorageWriteTexture(WisDescriptorStorage self, uint32_t set_index, uint32_t binding, WisShaderResource resource) { - DX12DescriptorStorageWriteTexture(self, index, resource); + DX12DescriptorStorageWriteTexture(self, set_index, binding, resource); } // WisRootSignature methods -- @@ -5216,6 +5717,7 @@ typedef VKDebugMessenger WisDebugMessenger; typedef VKRenderTarget WisRenderTarget; typedef VKSampler WisSampler; typedef VKShaderResource WisShaderResource; +typedef VKUnorderedAccessTexture WisUnorderedAccessTexture; typedef VKFenceView WisFenceView; typedef VKBufferView WisBufferView; typedef VKTextureView WisTextureView; @@ -5226,7 +5728,9 @@ typedef VKRootSignatureView WisRootSignatureView; typedef VKBufferBarrier2 WisBufferBarrier2; typedef VKTextureBarrier2 WisTextureBarrier2; typedef VKGraphicsShaderStages WisGraphicsShaderStages; +typedef VKRaytracingPipeineDesc WisRaytracingPipeineDesc; typedef VKGraphicsPipelineDesc WisGraphicsPipelineDesc; +typedef VKComputePipelineDesc WisComputePipelineDesc; typedef VKRenderPassRenderTargetDesc WisRenderPassRenderTargetDesc; typedef VKRenderPassDepthStencilDesc WisRenderPassDepthStencilDesc; typedef VKRenderPassDesc WisRenderPassDesc; @@ -5420,46 +5924,44 @@ inline WisResult WisDeviceCreateGraphicsPipeline(WisDevice self, const WisGraphi } /** - * @brief Creates a root signature object for use with DescriptorStorage. + * @brief Creates a compute pipeline state object. * @param self valid handle to the Device - * @param push_constants The root constants to create the root signature with. - * @param constants_count The number of push constants. Max is 5. - * @param push_descriptors The root descriptors to create the root signature with. - * In shader will appear in order of submission. e.g. push_descriptors[5] is [[vk::binding(5,0)]] ... : register(b5/t5/u5) - * @param descriptors_count The number of push descriptors. Max is 8. - * @param space_overlap_count Count of descriptor spaces to overlap for each of the DescriptorStorage types. - * Default is 1. Max is 16. This is used primarily for descriptor type aliasing. - * Example: If WisDevice is 2, that means that 2 descriptor spaces will be allocated for each descriptor type. - * [[vk::binding(0,0)]] SamplerState samplers: register(s0,space1); // space1 can be used for different type of samplers e.g. SamplerComparisonState - * [[vk::binding(0,0)]] SamplerComparisonState shadow_samplers: register(s0,space2); // they use the same binding (works like overloading) - * [[vk::binding(0,1)]] ConstantBuffer cbuffers: register(b0,space3); // this type also has 2 spaces, next will be on space 4 etc. - * @param signature WisRootSignature on success (StatusOk). + * @param desc The description of the compute pipeline to create. + * @param pipeline WisPipelineState on success (StatusOk). * @return Result with StatusOk on success. * Error in WisResult::error otherwise. * */ -inline WisResult WisDeviceCreateRootSignature(WisDevice self, const WisPushConstant* push_constants, uint32_t constants_count, const WisPushDescriptor* push_descriptors, uint32_t descriptors_count, uint32_t space_overlap_count, WisRootSignature* signature) +inline WisResult WisDeviceCreateComputePipeline(WisDevice self, const WisComputePipelineDesc* desc, WisPipelineState* pipeline) { - return VKDeviceCreateRootSignature(self, push_constants, constants_count, push_descriptors, descriptors_count, space_overlap_count, signature); + return VKDeviceCreateComputePipeline(self, desc, pipeline); } /** * @brief Creates a root signature object for use with DescriptorStorage. - * Supplies number of types for each descriptor type separately. + * DescriptorStorage is used for bindless and non-uniform bindings. Don't combine with Descriptor buffers, this may reduce performance. + * Push constants and push descriptors are used for fast changing data. + * Spaces may not overlap, but can be in any order. Push descriptors always have space0 and [[vk::binding(x,0)]]. + * That means that all the binding numbers are off by 1. Meaning that if you have Descriptor Storage with 1 binding, it will be [[vk::binding(0,1)]] + * even though it is supposed to be binding 0. This is done for consistency. + * Set number is the position of binding in bindings array. e.g. bindings[5] is set 5 and on HLSL side it is [[vk::binding(0,5)]]. + * For several overlapping types e.g. 2D and 3D textures, use different spaces. + * Those are specified in the bindings array. Space overlap count means how many consecutive spaces are used by the binding. * @param self valid handle to the Device * @param push_constants The root constants to create the root signature with. - * @param constants_count The number of push constants. Max is 5. + * @param push_constant_count The number of push constants. Max is 5. * @param push_descriptors The root descriptors to create the root signature with. - * In shader will appear in order of submission. e.g. root_descriptors[5] is [[vk::binding(5,0)]] ... : register(b5/t5/u5) - * @param push_descriptors_count The number of push descriptors. Max is 8. - * @param descriptor_spacing Descriptor spacing allocation. - * nullptr means allocate 1 space for each. + * In shader will appear in order of submission. e.g. push_descriptors[5] is [[vk::binding(5,0)]] ... : register(b5/t5/u5) + * @param push_descriptor_count The number of push descriptors. Max is 8. + * @param bindings The bindings to allocate. Order matters, binding count is ignored. + * One block of bindings can contain up to 4096 descriptors. For Sampler blocks, max amount of samplers across all bindings is 2048. + * @param binding_count Count of bindings to allocate. Max is 64 - push_constant_count - push_descriptor_count * 2. * @param signature WisRootSignature on success (StatusOk). * @return Result with StatusOk on success. * Error in WisResult::error otherwise. * */ -inline WisResult WisDeviceCreateRootSignature2(WisDevice self, const WisPushConstant* push_constants, uint32_t constants_count, const WisPushDescriptor* push_descriptors, uint32_t push_descriptors_count, const WisDescriptorSpacing* descriptor_spacing, WisRootSignature* signature) +inline WisResult WisDeviceCreateRootSignature(WisDevice self, const WisPushConstant* push_constants, uint32_t push_constant_count, const WisPushDescriptor* push_descriptors, uint32_t push_descriptor_count, const WisDescriptorBindingDesc* bindings, uint32_t binding_count, WisRootSignature* signature) { - return VKDeviceCreateRootSignature2(self, push_constants, constants_count, push_descriptors, push_descriptors_count, descriptor_spacing, signature); + return VKDeviceCreateRootSignature(self, push_constants, push_constant_count, push_descriptors, push_descriptor_count, bindings, binding_count, signature); } /** @@ -5550,14 +6052,16 @@ inline WisResult WisDeviceCreateShaderResource(WisDevice self, WisTexture textur * @brief Creates a descriptor storage object with specified number of bindings to allocate. * Switching between several DescriptorStorage is slow, consider allocating one big set and copy descriptors to it. * @param self valid handle to the Device - * @param desc The description of the descriptor storage to create. + * @param bindings The bindings to allocate. Space and space overlap counts are ignored. + * @param bindings_count The number of bindings to allocate. + * @param memory The memory to allocate the descriptors in. * @param storage WisDescriptorStorage on success (StatusOk). * @return Result with StatusOk on success. * Error in WisResult::error otherwise. * */ -inline WisResult WisDeviceCreateDescriptorStorage(WisDevice self, const WisDescriptorStorageDesc* desc, WisDescriptorStorage* storage) +inline WisResult WisDeviceCreateDescriptorStorage(WisDevice self, const WisDescriptorBindingDesc* bindings, uint32_t bindings_count, WisDescriptorMemory memory, WisDescriptorStorage* storage) { - return VKDeviceCreateDescriptorStorage(self, desc, storage); + return VKDeviceCreateDescriptorStorage(self, bindings, bindings_count, memory, storage); } /** @@ -5882,6 +6386,19 @@ inline void WisCommandListCopyTextureToBuffer(WisCommandList self, WisTexture so VKCommandListCopyTextureToBuffer(self, source, destination, regions, region_count); } +/** + * @brief Copies data from one texture to another. + * @param self valid handle to the CommandList + * @param source The source texture to copy from. + * @param destination The destination texture to copy to. + * @param regions The regions to copy. + * @param region_count The number of regions to copy. + * */ +inline void WisCommandListCopyTexture(WisCommandList self, WisTexture source, WisTexture destination, const WisTextureCopyRegion* regions, uint32_t region_count) +{ + VKCommandListCopyTexture(self, source, destination, regions, region_count); +} + /** * @brief Sets the barrier on the buffer. * @param self valid handle to the CommandList @@ -5955,6 +6472,17 @@ inline void WisCommandListSetRootSignature(WisCommandList self, WisRootSignature VKCommandListSetRootSignature(self, root_signature); } +/** + * @brief Sets the pipeline signature object to compute pipeline. Used to determine how to pick descriptors from descriptor buffer. + * May only work with compute pipelines. + * @param self valid handle to the CommandList + * @param root_signature The root signature to set. + * */ +inline void WisCommandListSetComputeRootSignature(WisCommandList self, WisRootSignature root_signature) +{ + VKCommandListSetComputeRootSignature(self, root_signature); +} + /** * @brief Sets the primitive topology. Detemines how vertices shall be processed. * @param self valid handle to the CommandList @@ -6075,6 +6603,18 @@ inline void WisCommandListDrawInstanced(WisCommandList self, uint32_t vertex_cou VKCommandListDrawInstanced(self, vertex_count_per_instance, instance_count, start_vertex, start_instance); } +/** + * @brief Dispatches compute shader. + * @param self valid handle to the CommandList + * @param group_count_x The number of groups to dispatch in X dimension. + * @param group_count_y The number of groups to dispatch in Y dimension. Default is 1. + * @param group_count_z The number of groups to dispatch in Z dimension. Default is 1. + * */ +inline void WisCommandListDispatch(WisCommandList self, uint32_t group_count_x, uint32_t group_count_y, uint32_t group_count_z) +{ + VKCommandListDispatch(self, group_count_x, group_count_y, group_count_z); +} + /** * @brief Sets the root constants for the shader. * @param self valid handle to the CommandList @@ -6088,6 +6628,18 @@ inline void WisCommandListSetPushConstants(WisCommandList self, void* data, uint VKCommandListSetPushConstants(self, data, size_4bytes, offset_4bytes, stage); } +/** + * @brief Sets the root constants for the compute or raytracing shader. + * @param self valid handle to the CommandList + * @param data The data to set the root constants with. + * @param size_4bytes The size of the data in 4-byte units. + * @param offset_4bytes The offset in the data in 4-byte units. + * */ +inline void WisCommandListSetComputePushConstants(WisCommandList self, void* data, uint32_t size_4bytes, uint32_t offset_4bytes) +{ + VKCommandListSetComputePushConstants(self, data, size_4bytes, offset_4bytes); +} + /** * @brief Pushes descriptor directly to the command list, without putting it to the table. * Works only with buffer bindings. @@ -6103,6 +6655,42 @@ inline void WisCommandListPushDescriptor(WisCommandList self, WisDescriptorType VKCommandListPushDescriptor(self, type, root_index, buffer, offset); } +/** + * @brief Pushes descriptor directly to the command list, without putting it to the table. + * Works only with buffer bindings. + * Works with compute or raytracing pipelines. + * Buffer is always bound with full size. + * @param self valid handle to the CommandList + * @param type The type of the descriptor to set. + * @param root_index The index of the root descriptor to set. + * @param buffer The buffer to set. + * @param offset The offset in the descriptor table to set the descriptor to. + * */ +inline void WisCommandListPushDescriptorCompute(WisCommandList self, WisDescriptorType type, uint32_t root_index, WisBuffer buffer, uint32_t offset) +{ + VKCommandListPushDescriptorCompute(self, type, root_index, buffer, offset); +} + +/** + * @brief Sets the descriptor storage object for graphics pipeline. + * @param self valid handle to the CommandList + * @param storage The descriptor storage to set. + * */ +inline void WisCommandListSetDescriptorStorage(WisCommandList self, WisDescriptorStorage storage) +{ + VKCommandListSetDescriptorStorage(self, storage); +} + +/** + * @brief Sets the descriptor storage object for compute pipeline. + * @param self valid handle to the CommandList + * @param storage The descriptor storage to set. + * */ +inline void WisCommandListSetComputeDescriptorStorage(WisCommandList self, WisDescriptorStorage storage) +{ + VKCommandListSetComputeDescriptorStorage(self, storage); +} + // WisSwapChain methods -- /** * @brief Destroys the WisSwapChain. @@ -6219,6 +6807,16 @@ inline void WisBufferUnmap(WisBuffer self) VKBufferUnmap(self); } +/** + * @brief Returns the address of the resource in GPU memory. + * @param self valid handle to the Buffer + * @return The address of the resource in GPU memory. + * */ +inline uint64_t WisBufferGetGPUAddress(WisBuffer self) +{ + return VKBufferGetGPUAddress(self); +} + // WisTexture methods -- /** * @brief Destroys the WisTexture. @@ -6242,37 +6840,40 @@ inline void WisDescriptorStorageDestroy(WisDescriptorStorage self) /** * @brief Writes the sampler to the sampler descriptor storage. * @param self valid handle to the DescriptorStorage - * @param index Index in array of samplers to fill. + * @param set_index Index in storage sets, defined by the place in the binding array at the creation. + * @param binding Index in array of samplers to fill. * @param sampler The sampler to write. * */ -inline void WisDescriptorStorageWriteSampler(WisDescriptorStorage self, uint32_t index, WisSampler sampler) +inline void WisDescriptorStorageWriteSampler(WisDescriptorStorage self, uint32_t set_index, uint32_t binding, WisSampler sampler) { - VKDescriptorStorageWriteSampler(self, index, sampler); + VKDescriptorStorageWriteSampler(self, set_index, binding, sampler); } /** * @brief Writes the constant buffer to the constant buffer descriptor storage. * @param self valid handle to the DescriptorStorage - * @param index Index in array of constant buffers to fill. + * @param set_index Index in storage sets, defined by the place in the binding array at the creation. + * @param binding Index in array of constant buffers to fill. * @param buffer The buffer to write. * @param size The size of the constant buffer in bytes. * @param offset The offset in the buffer to write the constant buffer to. * size + offset must be less or equal the overall size of the bound buffer. * */ -inline void WisDescriptorStorageWriteConstantBuffer(WisDescriptorStorage self, uint32_t index, WisBuffer buffer, uint32_t size, uint32_t offset) +inline void WisDescriptorStorageWriteConstantBuffer(WisDescriptorStorage self, uint32_t set_index, uint32_t binding, WisBuffer buffer, uint32_t size, uint32_t offset) { - VKDescriptorStorageWriteConstantBuffer(self, index, buffer, size, offset); + VKDescriptorStorageWriteConstantBuffer(self, set_index, binding, buffer, size, offset); } /** * @brief Writes the texture to the shader resource descriptor storage. * @param self valid handle to the DescriptorStorage - * @param index Index in array of shader resources to fill. + * @param set_index Index in storage sets, defined by the place in the binding array at the creation. + * @param binding Index in array of shader resources to fill. * @param resource The shader resource to write. * */ -inline void WisDescriptorStorageWriteTexture(WisDescriptorStorage self, uint32_t index, WisShaderResource resource) +inline void WisDescriptorStorageWriteTexture(WisDescriptorStorage self, uint32_t set_index, uint32_t binding, WisShaderResource resource) { - VKDescriptorStorageWriteTexture(self, index, resource); + VKDescriptorStorageWriteTexture(self, set_index, binding, resource); } // WisRootSignature methods -- diff --git a/cmake/functions.cmake b/cmake/functions.cmake index 2e8b705e..26ad0ddf 100644 --- a/cmake/functions.cmake +++ b/cmake/functions.cmake @@ -92,7 +92,7 @@ endfunction() function(wis_compile_shader) set(options ) set(oneValueArgs TARGET ENTRY SHADER OUTPUT TYPE SHADER_MODEL) - set(multiValueArgs INCLUDE_DIRS DEFINITIONS) + set(multiValueArgs INCLUDE_DIRS DEFINITIONS FLAGS) cmake_parse_arguments(wis_compile_shader "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} ) @@ -152,7 +152,14 @@ function(wis_compile_shader) list(APPEND DEFINES "-D${DEFINITION} ") endforeach() + foreach(FLAG ${wis_compile_shader_FLAGS}) + list(APPEND FLAGS "${FLAG} ") + endforeach() + #remove trailing space + string(STRIP "${INCLUDES}" INCLUDES) + string(STRIP "${DEFINES}" DEFINES) + string(STRIP "${FLAGS}" FLAGS) set(SHADER ${wis_compile_shader_SHADER}) set(TARGET ${wis_compile_shader_TARGET}) @@ -178,7 +185,7 @@ function(wis_compile_shader) if(WISDOM_WINDOWS) add_custom_command(TARGET ${TARGET} - COMMAND "${dxc_EXECUTABLE}" -E${ENTRY} -T${TYPE}_${SHADER_MODEL} -Zi $,-Od,-O3> -Wno-ignored-attributes ${INCLUDES} ${DEFINES} -DDXIL=1 -Fo${OUTPUT_DXIL} -Fd${OUTPUT_PDB} ${SHADER} + COMMAND "${dxc_EXECUTABLE}" -E${ENTRY} -T${TYPE}_${SHADER_MODEL} -Zi $,-Od,-O3> -Wno-ignored-attributes ${FLAGS} ${INCLUDES} ${DEFINES} -DDXIL=1 -Fo${OUTPUT_DXIL} -Fd${OUTPUT_PDB} ${SHADER} MAIN_DEPENDENCY ${SHADER} COMMENT "HLSL ${SHADER}" WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} @@ -186,7 +193,7 @@ function(wis_compile_shader) endif() add_custom_command(TARGET ${TARGET} - COMMAND "${dxc_EXECUTABLE}" -E${ENTRY} -T${TYPE}_${SHADER_MODEL} -Zi $,-Od,-O3> -spirv -Wno-ignored-attributes -fspv-target-env=vulkan1.3 ${INCLUDES} ${DEFINES} -DSPIRV=1 -Fo${OUTPUT_SPV} ${SHADER} + COMMAND "${dxc_EXECUTABLE}" -E${ENTRY} -T${TYPE}_${SHADER_MODEL} -Zi $,-Od,-O3> -spirv -Wno-ignored-attributes ${FLAGS} -fspv-target-env=vulkan1.3 ${INCLUDES} ${DEFINES} -DSPIRV=1 -Fo${OUTPUT_SPV} ${SHADER} MAIN_DEPENDENCY ${SHADER} COMMENT "SPV ${SHADER}" WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} diff --git a/cmake/wisdom-deps.cmake.in b/cmake/wisdom-deps.cmake.in index e889b51a..abb8b52a 100644 --- a/cmake/wisdom-deps.cmake.in +++ b/cmake/wisdom-deps.cmake.in @@ -85,7 +85,7 @@ endfunction() function(wis_compile_shader) set(options ) set(oneValueArgs DXC TARGET ENTRY SHADER OUTPUT TYPE SHADER_MODEL) - set(multiValueArgs INCLUDE_DIRS DEFINITIONS) + set(multiValueArgs INCLUDE_DIRS DEFINITIONS FLAGS) cmake_parse_arguments(wis_compile_shader "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} ) @@ -145,6 +145,14 @@ function(wis_compile_shader) list(APPEND DEFINES "-D${DEFINITION} ") endforeach() + foreach(FLAG ${wis_compile_shader_FLAGS}) + list(APPEND FLAGS "${FLAG} ") + endforeach() + + #remove trailing space + string(STRIP "${INCLUDES}" INCLUDES) + string(STRIP "${DEFINES}" DEFINES) + string(STRIP "${FLAGS}" FLAGS) set(SHADER ${wis_compile_shader_SHADER}) @@ -171,7 +179,7 @@ function(wis_compile_shader) if(WIN32) add_custom_command(TARGET ${TARGET} - COMMAND "${wis_compile_shader_DXC}" -E${ENTRY} -T${TYPE}_${SHADER_MODEL} -Zi $,-Od,-O3> -Wno-ignored-attributes ${INCLUDES} ${DEFINES} -DDXIL=1 -Fo${OUTPUT_DXIL} -Fd${OUTPUT_PDB} ${SHADER} + COMMAND "${wis_compile_shader_DXC}" -E${ENTRY} -T${TYPE}_${SHADER_MODEL} -Zi $,-Od,-O3> -Wno-ignored-attributes ${FLAGS} ${INCLUDES} ${DEFINES} -DDXIL=1 -Fo${OUTPUT_DXIL} -Fd${OUTPUT_PDB} ${SHADER} MAIN_DEPENDENCY ${SHADER} COMMENT "HLSL ${SHADER}" WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} @@ -179,7 +187,7 @@ function(wis_compile_shader) endif() add_custom_command(TARGET ${TARGET} - COMMAND "${wis_compile_shader_DXC}" -E${ENTRY} -T${TYPE}_${SHADER_MODEL} -Zi $,-Od,-O3> -spirv -Wno-ignored-attributes -fspv-target-env=vulkan1.3 ${INCLUDES} ${DEFINES} -DSPIRV=1 -Fo${OUTPUT_SPV} ${SHADER} + COMMAND "${wis_compile_shader_DXC}" -E${ENTRY} -T${TYPE}_${SHADER_MODEL} -Zi $,-Od,-O3> -spirv -Wno-ignored-attributes ${FLAGS} -fspv-target-env=vulkan1.3 ${INCLUDES} ${DEFINES} -DSPIRV=1 -Fo${OUTPUT_SPV} ${SHADER} MAIN_DEPENDENCY ${SHADER} COMMENT "SPV ${SHADER}" WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} diff --git a/cmake/wisdom.targets b/cmake/wisdom.targets index 49dc3b4c..a2c1af98 100644 --- a/cmake/wisdom.targets +++ b/cmake/wisdom.targets @@ -20,7 +20,7 @@ - $(MSBuildThisFileDirectory)..\..\lib\wisdom-debug$(LP).lib;$(MSBuildThisFileDirectory)..\..\lib\wisdom-windows$(LP).lib;$(MSBuildThisFileDirectory)..\..\lib\wisdom-extended-allocation$(LP).lib;$(MSBuildThisFileDirectory)..\..\lib\wisdom-dx12$(LP).lib;%(AdditionalDependencies) + $(MSBuildThisFileDirectory)..\..\lib\wisdom-debug$(LP).lib;$(MSBuildThisFileDirectory)..\..\lib\wisdom-windows$(LP).lib;$(MSBuildThisFileDirectory)..\..\lib\wisdom-extended-allocation$(LP).lib;$(MSBuildThisFileDirectory)..\..\lib\wisdom-dx12$(LP).lib;$(MSBuildThisFileDirectory)..\..\lib\wisdom-raytacing$(LP).lib;%(AdditionalDependencies) $(MSBuildThisFileDirectory)..\..\lib\VKAllocator$(LP).lib;$(MSBuildThisFileDirectory)..\..\lib\wisdom-vk$(LP).lib;%(AdditionalDependencies) $(MSBuildThisFileDirectory)..\..\lib\DX12Allocator$(LP).lib;$(MSBuildThisFileDirectory)..\..\lib\DX12Agility$(LP).lib;dxguid.lib;DXGI.lib;%(AdditionalDependencies) diff --git a/examples/basic/CMakeLists.txt b/examples/basic/CMakeLists.txt index bc52fc4b..dba7117b 100644 --- a/examples/basic/CMakeLists.txt +++ b/examples/basic/CMakeLists.txt @@ -3,3 +3,4 @@ add_subdirectory(shaders) add_example(lut) add_example(multiview) add_example(descriptor_storage) +add_example(raytracing) diff --git a/examples/basic/descriptor_storage/entry_main.cpp b/examples/basic/descriptor_storage/entry_main.cpp index 106b7a8c..fea19f32 100644 --- a/examples/basic/descriptor_storage/entry_main.cpp +++ b/examples/basic/descriptor_storage/entry_main.cpp @@ -43,12 +43,10 @@ class App std::construct_at(&swap, setup.device, std::move(swapx), w, h); cmd_list = setup.CreateLists(); - // Only a single descriptor table with 1 descriptor - wis::DescriptorStorageDesc desc{ - .cbuffer_count = ex::flight_frames * 2, // one cbuffer per frame - .memory = wis::DescriptorMemory::ShaderVisible, // visible to shaders + wis::DescriptorBindingDesc bindings[] = { + { .binding_type = wis::DescriptorType::ConstantBuffer, .binding_space = 1, .binding_count = ex::flight_frames * 2 }, }; - desc_storage = setup.device.CreateDescriptorStorage(result, desc); + desc_storage = setup.device.CreateDescriptorStorage(result, bindings, std::size(bindings)); } public: @@ -189,8 +187,10 @@ class App wis::PushConstant root_constants[]{ { .stage = wis::ShaderStages::All, .size_bytes = 2 * sizeof(uint32_t) } }; - root = ex::Unwrap(setup.device.CreateRootSignature(root_constants, std::size(root_constants), nullptr, 0, 2)); - // Note the 2 in the CreateRootSignature call. This is the space overlap count, which is 2 in this case. + wis::DescriptorBindingDesc bindings[] = { + { .binding_type = wis::DescriptorType::ConstantBuffer, .binding_space = 1, .space_overlap_count = 2, .binding_count = ex::flight_frames * 2 }, + }; + root = ex::Unwrap(setup.device.CreateRootSignature(root_constants, std::size(root_constants), nullptr, 0, bindings, std::size(bindings))); // Create pipeline { @@ -233,8 +233,8 @@ class App for (size_t i = 0; i < ex::flight_frames; i++) { constant_buffersx[i] = ex::Unwrap(setup.allocator.CreateBuffer(sizeof(float), wis::BufferUsage::CopySrc | wis::BufferUsage::ConstantBuffer, wis::MemoryType::Upload, wis::MemoryFlags::Mapped)); constant_buffersy[i] = ex::Unwrap(setup.allocator.CreateBuffer(sizeof(float), wis::BufferUsage::CopySrc | wis::BufferUsage::ConstantBuffer, wis::MemoryType::Upload, wis::MemoryFlags::Mapped)); - desc_storage.WriteConstantBuffer(i, constant_buffersx[i], sizeof(float)); - desc_storage.WriteConstantBuffer(ex::flight_frames + i, constant_buffersy[i], sizeof(float)); + desc_storage.WriteConstantBuffer(0, i, constant_buffersx[i], sizeof(float)); + desc_storage.WriteConstantBuffer(0, ex::flight_frames + i, constant_buffersy[i], sizeof(float)); constant_datax[i] = static_cast(constant_buffersx[i].Map()); constant_datax[i][0] = 0.0f; diff --git a/examples/basic/multiview/entry_main.cpp b/examples/basic/multiview/entry_main.cpp index c9bb2b0d..403430c9 100644 --- a/examples/basic/multiview/entry_main.cpp +++ b/examples/basic/multiview/entry_main.cpp @@ -57,9 +57,11 @@ class App cmd_list = setup.CreateLists(); cmd_list2 = setup.CreateLists(); - desc_storage = ex::Unwrap(setup.device.CreateDescriptorStorage({ .sampler_count = 1, - .texture_count = ex::flight_frames, - .memory = wis::DescriptorMemory::ShaderVisible })); + wis::DescriptorBindingDesc bindings[] = { + { .binding_type = wis::DescriptorType::Texture, .binding_space = 1, .binding_count = ex::flight_frames }, + { .binding_type = wis::DescriptorType::Sampler, .binding_space = 2, .binding_count = 1 }, + }; + desc_storage = setup.device.CreateDescriptorStorage(result, bindings, std::size(bindings)); } public: @@ -287,10 +289,15 @@ class App // Create root signature with { + wis::Result result = wis::success; wis::PushConstant root_constants[]{ { .stage = wis::ShaderStages::Pixel, .size_bytes = sizeof(uint32_t) } }; - root = ex::Unwrap(setup.device.CreateRootSignature(root_constants, 1, nullptr, 0, 2)); + wis::DescriptorBindingDesc bindings[] = { + { .binding_type = wis::DescriptorType::Texture, .binding_space = 1, .binding_count = ex::flight_frames }, // space 0 is for root constants + { .binding_type = wis::DescriptorType::Sampler, .binding_space = 2, .binding_count = 1 }, + }; + root = setup.device.CreateRootSignature(result, root_constants, 1, nullptr, 0, bindings, std::size(bindings)); } // Create pipeline @@ -376,13 +383,13 @@ class App .comparison_op = wis::Compare::None, }; sampler = ex::Unwrap(setup.device.CreateSampler(sample_desc)); - desc_storage.WriteSampler(0, sampler); + desc_storage.WriteSampler(1, 0, sampler); } // fill desc buffer { for (uint32_t i = 0; i < ex::flight_frames; i++) { - desc_storage.WriteTexture(i, srvs[i]); + desc_storage.WriteTexture(0, i, srvs[i]); } } } diff --git a/examples/basic/raytracing/CMakeLists.txt b/examples/basic/raytracing/CMakeLists.txt new file mode 100644 index 00000000..cd3ae835 --- /dev/null +++ b/examples/basic/raytracing/CMakeLists.txt @@ -0,0 +1,17 @@ +project(raytracing-${POSTFIX}) + +set(HEADERS) +set(SOURCES entry_main.cpp) + +add_executable(${PROJECT_NAME} ${HEADERS} ${SOURCES}) +target_link_libraries(${PROJECT_NAME} PUBLIC common-${POSTFIX}) +add_dependencies(${PROJECT_NAME} compile_shaders_basic) +set_target_properties( + ${PROJECT_NAME} PROPERTIES CXX_STANDARD 20 RUNTIME_OUTPUT_DIRECTORY + ${EXAMPLE_BIN_OUTPUT}) + +if(POSTFIX STREQUAL "dx12") + wis_make_exports_dx(${PROJECT_NAME}) # install the d3d12 agility sdk, for + # examples this is enough + # for the main project, use wis_installdeps(${PROJECT_NAME}) instead +endif() diff --git a/examples/basic/raytracing/entry_main.cpp b/examples/basic/raytracing/entry_main.cpp new file mode 100644 index 00000000..457c9209 --- /dev/null +++ b/examples/basic/raytracing/entry_main.cpp @@ -0,0 +1,456 @@ +#include +#include +#include +#include +#include + +#include + +// In order to render with multiview, we need to have texture with array layers. +// In case of stereo rendering, we need to have 2 array layers. +// Stereo example is not implemented here, but it is possible to implement it by +// creating a swapchain with stereo=true. Stereo rendering is not supported by all graphics cards. +// You can check if stereo is supported by calling swap.StereoSupported(). + +// In this example, we will render to the texture with 2 array layers, imitating stereo rendering. +// One layer will be rendered with red color, and the other with blue color. + +class App +{ + // Standard setup + ex::Window window; + ex::ExampleSetup setup; + ex::Swapchain swap; + ex::FramedCommandList cmd_list; + ex::FramedCommandList copy_cmd_list; + + // Resources + wis::Raytracing raytracing_extension; + + wis::Texture uav_texture; + wis::Shader raygen_shader; + + wis::Buffer vertex_buffer; + wis::Buffer index_buffer; + + wis::Buffer rtas_buffer; + wis::Buffer rtas_scratch_buffer; + wis::Buffer rtas_update_buffer; + wis::Buffer rtas_instance_buffer; + wis::Buffer sbt_buffer; + + wis::AccelerationStructure top_rtas; + wis::AccelerationStructure bottom_rtas; + + wis::UnorderedAccessTexture uav_output; + + wis::RootSignature rt_root_signature; + wis::DescriptorStorage rt_descriptor_storage; + wis::RaytracingPipeline rt_pipeline; + + wis::RaytracingDispatchDesc rt_dispatch_desc; // just for convenience + +public: + App() + : window("Raytracing", 800, 600) + { + wis::Result result = wis::success; + wis::DeviceExtension* device_exts[] = { &raytracing_extension }; + setup.InitDefault(window.GetPlatformExtension(), device_exts); + auto [w, h] = window.PixelSize(); + std::construct_at(&swap, setup.device, window.CreateSwapchain(result, setup, ex::swapchain_format), w, h); + cmd_list = setup.CreateLists(); + + // Load shaders + auto buf = ex::LoadShader("shaders/raytracing.lib"); + raygen_shader = setup.device.CreateShader(result, buf.data(), uint32_t(buf.size())); + + // Create resources + CreatePipeline(); + CreateSizeDependentResources(w, h); + + // Create ... + CreatePrimitives(); + CreateAccelerationStructures(); + } + +public: + void Run() + { + while (true) { + if (!ProcessEvents()) { + break; + } + + Frame(); + } + } + // Process window events + bool ProcessEvents() + { + // Window events + SDL_Event e; + while (SDL_PollEvent(&e)) { + switch (e.type) { + case SDL_EVENT_WINDOW_RESIZED: { + auto [w, h] = window.PixelSize(); + swap.Resize(setup.device, w, h); + CreateSizeDependentResources(w, h); + break; + } + case SDL_EVENT_QUIT: + swap.Throttle(); // wait for GPU to finish, then exit + return false; + case SDL_EVENT_KEY_DOWN: + OnKeyPressed(e); + break; + case SDL_EVENT_MOUSE_BUTTON_DOWN: + OnMouseButtonDown(e); + break; + case SDL_EVENT_MOUSE_MOTION: + OnMouseMove(e); + break; + default: + break; + } + } + return true; + } + void OnKeyPressed(const SDL_Event& event) + { + // Keyboard events + } + void OnMouseButtonDown(const SDL_Event& event) + { + // Mouse events + } + void OnMouseMove(const SDL_Event& event) + { + // Mouse events + } + + // Render a frame + void Frame() + { + uint32_t frame_index = swap.CurrentFrame(); + auto& swap_texture = swap.GetTexture(frame_index); + + auto& cmd = cmd_list[frame_index]; + std::ignore = cmd.Reset(); + + // Root signature is set as if it was a compute pipeline + cmd.SetComputeRootSignature(rt_root_signature); + raytracing_extension.SetDescriptorStorage(cmd, rt_descriptor_storage); + raytracing_extension.SetPipelineState(cmd, rt_pipeline); + raytracing_extension.DispatchRays(cmd, rt_dispatch_desc); + // barrier for UAV texture + + wis::TextureBarrier2 before[] = { + { .barrier = { .sync_before = wis::BarrierSync::Compute, + .sync_after = wis::BarrierSync::Copy, + .access_before = wis::ResourceAccess::UnorderedAccess, + .access_after = wis::ResourceAccess::CopySource, + .state_before = wis::TextureState::UnorderedAccess, + .state_after = wis::TextureState::CopySource }, + .texture = uav_texture }, + // swapchain + { .barrier = { .sync_before = wis::BarrierSync::None, + .sync_after = wis::BarrierSync::Copy, + .access_before = wis::ResourceAccess::NoAccess, + .access_after = wis::ResourceAccess::CopyDest, + .state_before = wis::TextureState::Present, + .state_after = wis::TextureState::CopyDest }, + .texture = swap_texture }, + }; + + cmd.TextureBarriers(before, std::size(before)); + + wis::TextureCopyRegion region{ + .src = { + .size = { rt_dispatch_desc.width, rt_dispatch_desc.height, 1 }, + .format = ex::swapchain_format, + }, + .dst = { + .size = { rt_dispatch_desc.width, rt_dispatch_desc.height, 1 }, + .format = ex::swapchain_format, + }, + }; + cmd.CopyTexture(uav_texture, swap_texture, ®ion, 1); + + wis::TextureBarrier2 after[] = { + { .barrier = { .sync_before = wis::BarrierSync::Copy, + .sync_after = wis::BarrierSync::Compute, + .access_before = wis::ResourceAccess::CopySource, + .access_after = wis::ResourceAccess::UnorderedAccess, + .state_before = wis::TextureState::CopySource, + .state_after = wis::TextureState::UnorderedAccess }, + .texture = uav_texture }, + // swapchain + { .barrier = { .sync_before = wis::BarrierSync::Copy, + .sync_after = wis::BarrierSync::None, + .access_before = wis::ResourceAccess::CopyDest, + .access_after = wis::ResourceAccess::NoAccess, + .state_before = wis::TextureState::CopyDest, + .state_after = wis::TextureState::Present }, + .texture = swap_texture }, + }; + cmd.TextureBarriers(after, std::size(after)); + + std::ignore = cmd.Close(); + + wis::CommandListView lists[] = { cmd }; + setup.queue.ExecuteCommandLists(lists, std::size(lists)); + swap.Present(setup.queue); + } + +private: + void CreateSizeDependentResources(uint32_t width, uint32_t height) + { + using namespace wis; // for flag operators + wis::Result result = wis::success; + + // Create UAV texture + wis::TextureDesc desc{ + .format = ex::swapchain_format, + .size = { width, height, 1 }, + .usage = wis::TextureUsage::CopySrc | wis::TextureUsage::UnorderedAccess, + }; + uav_texture = setup.allocator.CreateTexture(result, desc); + + // Create UAV output + wis::UnorderedAccessDesc uav_desc{ + .format = ex::swapchain_format, + .view_type = wis::TextureViewType::Texture2D, + .subresource_range = { 0, 1, 0, 1 }, + }; + uav_output = setup.device.CreateUnorderedAccessTexture(result, uav_texture, uav_desc); + + // Write to descriptor storage + rt_descriptor_storage.WriteRWTexture(0, 0, uav_output); + + // Update dispatch desc + rt_dispatch_desc.width = width; + rt_dispatch_desc.height = height; + rt_dispatch_desc.depth = 1; + + MakeTransitions(); + } + void CreatePrimitives() + { + // clang-format off + constexpr static float vertices[] = { + 1.0f, 1.0f, 0.0f, + -1.0f, 1.0f, 0.0f, + 0.0f, -1.0f, 0.0f, + }; + // clang-format on + constexpr static uint16_t indices[] = { 0, 1, 2 }; + + wis::Result result = wis::success; + vertex_buffer = setup.allocator.CreateBuffer(result, sizeof(vertices), wis::BufferUsage::AccelerationStructureInput, wis::MemoryType::Upload, wis::MemoryFlags::Mapped); + index_buffer = setup.allocator.CreateBuffer(result, sizeof(indices), wis::BufferUsage::AccelerationStructureInput, wis::MemoryType::Upload, wis::MemoryFlags::Mapped); + + auto memory = vertex_buffer.Map(); + std::copy_n(vertices, std::size(vertices), memory); + std::span vertices_span(memory, std::size(vertices)); + + vertex_buffer.Unmap(); + + auto memory2 = index_buffer.Map(); + std::copy_n(indices, std::size(indices), memory2); + index_buffer.Unmap(); + } + + void MakeTransitions() + { + auto& cmd = cmd_list[0]; + std::ignore = cmd.Reset(); + // Transition UAV texture to UAV state + cmd.TextureBarrier({ .sync_before = wis::BarrierSync::None, + .sync_after = wis::BarrierSync::None, + .access_before = wis::ResourceAccess::NoAccess, + .access_after = wis::ResourceAccess::NoAccess, + .state_before = wis::TextureState::Undefined, + .state_after = wis::TextureState::UnorderedAccess }, + uav_texture); + cmd.Close(); + + wis::CommandListView lists[] = { cmd }; + setup.queue.ExecuteCommandLists(lists, std::size(lists)); + setup.WaitForGPU(); + } + void CreateAccelerationStructures() + { + using namespace wis; // for flag operators + wis::Result result = wis::success; + + rtas_instance_buffer = setup.allocator.CreateBuffer(result, sizeof(wis::AccelerationInstance), wis::BufferUsage::AccelerationStructureInput, wis::MemoryType::Upload, wis::MemoryFlags::Mapped); + + // get tlas size + wis::TopLevelASBuildDesc build_desc{ + .flags = wis::AccelerationStructureFlags::AllowUpdate, + .instance_count = 1, + .gpu_address = rtas_instance_buffer.GetGPUAddress(), + .indirect = false, + .update = false, + }; + auto as_size = raytracing_extension.GetTopLevelASSize(build_desc); + + // get blas size + wis::AcceleratedGeometryInput geometry_input{ + .geometry_type = wis::ASGeometryType::Triangles, + .flags = wis::ASGeometryFlags::Opaque, + .vertex_or_aabb_buffer_address = vertex_buffer.GetGPUAddress(), + .vertex_or_aabb_buffer_stride = sizeof(float[3]), + .index_buffer_address = index_buffer.GetGPUAddress(), + .transform_matrix_address = 0, + .vertex_count = 3, + .triangle_or_aabb_count = 1, + .vertex_format = wis::DataFormat::RGB32Float, + .index_format = wis::IndexType::UInt16, + }; + wis::AcceleratedGeometryDesc geometry_desc = wis::CreateGeometryDesc(geometry_input); + wis::BottomLevelASBuildDesc blas_desc{ + .flags = wis::AccelerationStructureFlags::None, + .geometry_count = 1, + .geometry_array = &geometry_desc, + .update = false, + }; + auto blas_size = raytracing_extension.GetBottomLevelASSize(blas_desc); + + rtas_buffer = setup.allocator.CreateBuffer(result, as_size.result_size + blas_size.result_size, wis::BufferUsage::AccelerationStructureBuffer); + rtas_scratch_buffer = setup.allocator.CreateBuffer(result, as_size.scratch_size + blas_size.scratch_size, wis::BufferUsage::StorageBuffer); + rtas_update_buffer = setup.allocator.CreateBuffer(result, as_size.update_size, wis::BufferUsage::StorageBuffer); + + top_rtas = raytracing_extension.CreateAccelerationStructure(result, rtas_buffer, 0, as_size.result_size, wis::ASLevel::Top); + bottom_rtas = raytracing_extension.CreateAccelerationStructure(result, rtas_buffer, as_size.result_size, blas_size.result_size, wis::ASLevel::Bottom); + + // Fill instance buffer + rtas_instance_buffer.Map()[0] = { + .transform = { + { 1.0f, 0.0f, 0.0f, 0.0f }, + { 0.0f, 1.0f, 0.0f, 0.0f }, + { 0.0f, 0.0f, 1.0f, 0.0f }, + }, + .instance_id = 0, + .mask = 0xFF, + .instance_offset = 0, + .flags = uint32_t(wis::ASInstanceFlags::TriangleCullDisable), + .acceleration_structure_handle = raytracing_extension.GetAccelerationStructureDeviceAddress(bottom_rtas), + }; + rtas_instance_buffer.Unmap(); + + // Build acceleration structures + auto& cmd = cmd_list[0]; + std::ignore = cmd.Reset(); + raytracing_extension.BuildBottomLevelAS(cmd, blas_desc, bottom_rtas, rtas_scratch_buffer.GetGPUAddress() + as_size.scratch_size); + // Add a barrier to make sure the BLAS is built before the TLAS build + cmd.BufferBarrier({ .sync_before = wis::BarrierSync::BuildRTAS, + .sync_after = wis::BarrierSync::BuildRTAS, + .access_before = wis::ResourceAccess::AccelerationStructureWrite, + .access_after = wis::ResourceAccess::AccelerationStructureRead | wis::ResourceAccess::AccelerationStructureWrite }, + rtas_buffer); + + raytracing_extension.BuildTopLevelAS(cmd, build_desc, top_rtas, rtas_scratch_buffer.GetGPUAddress()); + cmd.Close(); + + wis::CommandListView lists[] = { cmd }; + setup.queue.ExecuteCommandLists(lists, std::size(lists)); + setup.WaitForGPU(); + + // Write acceleration structure to descriptor storage + raytracing_extension.WriteAccelerationStructure(rt_descriptor_storage, 1, 0, top_rtas); + } + + void CreatePipeline() + { + wis::Result result = wis::success; + wis::DescriptorBindingDesc bindings[] = { + { .binding_type = wis::DescriptorType::RWTexture, .binding_space = 0, .binding_count = ex::flight_frames }, + { .binding_type = wis::DescriptorType::AccelerationStructure, .binding_space = 1, .binding_count = 1 }, + }; + rt_descriptor_storage = setup.device.CreateDescriptorStorage(result, bindings, std::size(bindings)); + rt_root_signature = setup.device.CreateRootSignature(result, nullptr, 0, nullptr, 0, bindings, std::size(bindings)); + + // Create pipeline + wis::ShaderView shaders[]{ + raygen_shader, raygen_shader + }; + wis::ShaderExport exports[]{ + { .entry_point = "RayGeneration", .shader_type = wis::RaytracingShaderType::Raygen, .shader_array_index = 1 }, + { .entry_point = "Miss", .shader_type = wis::RaytracingShaderType::Miss, .shader_array_index = 0 }, + { .entry_point = "ClosestHit", .shader_type = wis::RaytracingShaderType::ClosestHit, .shader_array_index = 1 }, + }; + wis::HitGroupDesc hit_groups[]{ + { .type = wis::HitGroupType::Triangles, .closest_hit_export_index = 2 }, + }; + wis::RaytracingPipelineDesc rt_pipeline_desc{ + .root_signature = rt_root_signature, + .shaders = shaders, + .shader_count = std::size(shaders), + .exports = exports, + .export_count = std::size(exports), + .hit_groups = hit_groups, + .hit_group_count = std::size(hit_groups), + .max_recursion_depth = 1, + .max_payload_size = 24, + .max_attribute_size = 8, + }; + rt_pipeline = raytracing_extension.CreateRaytracingPipeline(result, rt_pipeline_desc); + + // Create shader binding table + wis::ShaderBindingTableInfo sbt_info = raytracing_extension.GetShaderBindingTableInfo(); + + const uint8_t* shader_ident = rt_pipeline.GetShaderIdentifiers(); + + // 1 raygen, 1 miss, 1 hit group + sbt_buffer = setup.allocator.CreateBuffer(result, 1024, wis::BufferUsage::ShaderBindingTable, wis::MemoryType::Upload, wis::MemoryFlags::Mapped); + auto memory = sbt_buffer.Map(); + + // raygen + uint32_t table_increment = wis::detail::aligned_size(sbt_info.entry_size, sbt_info.table_start_alignment); // not real, just for demonstration + + // copies should have size of entry_size, only the last one should have the size aligned to table_start_alignment + std::memcpy(memory, shader_ident, sbt_info.entry_size); + memory += table_increment; + + // miss + std::memcpy(memory, shader_ident + sbt_info.entry_size, sbt_info.entry_size); + memory += table_increment; + + // hit group + std::memcpy(memory, shader_ident + sbt_info.entry_size * 2, sbt_info.entry_size); + memory += table_increment; + sbt_buffer.Unmap(); + + auto gpu_address = sbt_buffer.GetGPUAddress(); + + rt_dispatch_desc.ray_gen_shader_table_address = gpu_address; + rt_dispatch_desc.miss_shader_table_address = gpu_address + table_increment; + rt_dispatch_desc.hit_group_table_address = gpu_address + table_increment * 2; + rt_dispatch_desc.callable_shader_table_address = 0; + rt_dispatch_desc.ray_gen_shader_table_size = sbt_info.entry_size; + rt_dispatch_desc.miss_shader_table_size = sbt_info.entry_size; + rt_dispatch_desc.hit_group_table_size = sbt_info.entry_size; + rt_dispatch_desc.callable_shader_table_size = 0; + rt_dispatch_desc.miss_shader_table_stride = sbt_info.entry_size; + rt_dispatch_desc.hit_group_table_stride = sbt_info.entry_size; + rt_dispatch_desc.callable_shader_table_stride = 0; + } + +private: + void CopyTextureToSwapchain() + { + } +}; + +int main(int argc, char** argv) +{ + try { + App{}.Run(); + } catch (const std::exception& e) { + std::cerr << e.what() << std::endl; + return 1; + } + return 0; +} diff --git a/examples/basic/shaders/CMakeLists.txt b/examples/basic/shaders/CMakeLists.txt index abed68bd..e9b52d96 100644 --- a/examples/basic/shaders/CMakeLists.txt +++ b/examples/basic/shaders/CMakeLists.txt @@ -19,3 +19,19 @@ foreach(FILE ${vs} ${ps} ${ds} ${hs} ${gs}) wis_compile_shader(TARGET compile_shaders_basic SHADER ${FILE} OUTPUT ${SHADER_DIR}/${FILE_WE}) endforeach(FILE) + +file(GLOB_RECURSE lib "${CMAKE_CURRENT_SOURCE_DIR}/*/*.lib.hlsl") +foreach(FILE ${lib}) + get_filename_component(FILE_WE ${FILE} NAME_WLE) + wis_compile_shader( + TARGET + compile_shaders_basic + SHADER + ${FILE} + OUTPUT + ${SHADER_DIR}/${FILE_WE} + TYPE + "lib" + SHADER_MODEL + "6.3") +endforeach(FILE) diff --git a/examples/basic/shaders/descriptor_storage/desc_storage.vs.hlsl b/examples/basic/shaders/descriptor_storage/desc_storage.vs.hlsl index b94121ba..a5a590d2 100644 --- a/examples/basic/shaders/descriptor_storage/desc_storage.vs.hlsl +++ b/examples/basic/shaders/descriptor_storage/desc_storage.vs.hlsl @@ -15,13 +15,8 @@ struct OffsetY { float offset; }; -// binding 0, space 1 is used for samplers -// binding 0, space 2 is used for constant buffers -[[vk::binding(0, 2)]] ConstantBuffer offsetsx[] : register(b0, space3); -[[vk::binding(0, 2)]] ConstantBuffer offsetsy[] : register(b0, space4); //overlap with space3 -// Note: different register, for DX this has to be different register -// for Vulkan it can be the same register without offset. -// That means we have to subtract the descriptor offset in the shader for DX compilation. +[[vk::binding(0, 1)]] ConstantBuffer offsetsx[] : register(b0, space1); +[[vk::binding(0, 1)]] ConstantBuffer offsetsy[] : register(b0, space2); //overlap with space1 PSInput main(float3 position : POSITION) { diff --git a/examples/basic/shaders/multiview/multiview_screen.ps.hlsl b/examples/basic/shaders/multiview/multiview_screen.ps.hlsl index ee87711c..7a267879 100644 --- a/examples/basic/shaders/multiview/multiview_screen.ps.hlsl +++ b/examples/basic/shaders/multiview/multiview_screen.ps.hlsl @@ -11,9 +11,9 @@ struct PushConstants { }; [[vk::push_constant]] ConstantBuffer pushConstants : register(b0); -// binding 0, space 1 is used for samplers -[[vk::binding(0, 1)]] SamplerState sampler_point[] : register(s0, space1); -[[vk::binding(0, 3)]] Texture2DArray tex[] : register(t0, space5); + +[[vk::binding(0, 1)]] Texture2DArray tex[] : register(t0, space1); +[[vk::binding(0, 2)]] SamplerState sampler_point[] : register(s0, space2); PSOutput main(PSInput ps_in) { diff --git a/examples/basic/shaders/raytracing/raytracing.lib.hlsl b/examples/basic/shaders/raytracing/raytracing.lib.hlsl new file mode 100644 index 00000000..d585cfc5 --- /dev/null +++ b/examples/basic/shaders/raytracing/raytracing.lib.hlsl @@ -0,0 +1,59 @@ +struct Payload +{ + float3 color; + bool allowReflection; + bool missed; +}; + +[[vk::binding(0,1)]] RWTexture2D uav[] : register(u0); +[[vk::binding(0,2)]] RaytracingAccelerationStructure scene[] : register(t0, space1); + + +static const float3 camera = float3(0, 0, -5); +static const float3 light = float3(0, 200, 0); +static const float3 skyTop = float3(0.24, 0.44, 0.72); +static const float3 skyBottom = float3(0.75, 0.86, 0.93); + +[shader("raygeneration")] +void RayGeneration() +{ + uint2 idx = DispatchRaysIndex().xy; + float2 size = DispatchRaysDimensions().xy; + + float2 uv = idx / size; + uv = uv * 2 - 1; + + + RayDesc ray; + ray.Origin = float3(uv.x, uv.y, camera.z); + ray.Direction = float3(uv.x, uv.y, 1); + ray.TMin = 0.001; + ray.TMax = 1000; + + Payload payload; + payload.allowReflection = true; + payload.missed = false; + + TraceRay(scene[0], RAY_FLAG_NONE, 0xFF, 0, 0, 0, ray, payload); + + uav[0][idx] = float4(payload.color, 1); + +} + +[shader("miss")] +void Miss(inout Payload payload) +{ + + float slope = normalize(WorldRayDirection()).y; + float t = saturate(slope * 5 + 0.5); + payload.color = lerp(skyBottom, skyTop, t); + + payload.missed = true; +} + +[shader("closesthit")] +void ClosestHit(inout Payload payload, + BuiltInTriangleIntersectionAttributes attrib) +{ + payload.color = float3(1, 1, 0); +} diff --git a/examples/common/CMakeLists.txt b/examples/common/CMakeLists.txt index 3fdff2c9..1ec9e371 100644 --- a/examples/common/CMakeLists.txt +++ b/examples/common/CMakeLists.txt @@ -12,8 +12,14 @@ if(POSTFIX STREQUAL "vk") endif() target_link_libraries( - ${PROJECT_NAME} PUBLIC wis::debug wis::wisdom wis::descriptor-buffer - wis::platform SDL3::SDL3 glm::glm) + ${PROJECT_NAME} + PUBLIC wis::debug + wis::wisdom + wis::descriptor-buffer + wis::raytracing + wis::platform + SDL3::SDL3 + glm::glm) target_include_directories(${PROJECT_NAME} PUBLIC ${fpng_SOURCE_DIR}/src) target_include_directories(${PROJECT_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/examples/common/mouse.h b/examples/common/mouse.h index 0be244a8..2910a2e6 100644 --- a/examples/common/mouse.h +++ b/examples/common/mouse.h @@ -138,8 +138,8 @@ class Mouse static constexpr unsigned int buffer_size = 16u; private: - int x; - int y; + int x = 0; + int y = 0; bool left_is_pressed = false; bool right_is_pressed = false; bool is_in_window = false; diff --git a/examples/common/wis_helper.h b/examples/common/wis_helper.h index 3a7ffd2c..3dce2745 100644 --- a/examples/common/wis_helper.h +++ b/examples/common/wis_helper.h @@ -48,9 +48,12 @@ struct FramedCommandList { FramedCommandList() = default; FramedCommandList(wis::Device& device, wis::QueueType type = wis::QueueType::Graphics) { + wis::Result result = wis::success; for (size_t i = 0; i < flight_frames; i++) { - cmd_list[i] = Unwrap(device.CreateCommandList(type)); + cmd_list[i] = device.CreateCommandList(result, type); } + + CheckResult(result); } public: diff --git a/examples/common/wis_swapchain.h b/examples/common/wis_swapchain.h index 4ec0805f..1926bff2 100644 --- a/examples/common/wis_swapchain.h +++ b/examples/common/wis_swapchain.h @@ -23,8 +23,9 @@ class Swapchain } ~Swapchain() { - if (swap) + if (swap) { Throttle(); + } } public: @@ -103,9 +104,9 @@ class Swapchain std::span textures; std::array render_targets; - wis::DataFormat format; - uint32_t width; - uint32_t height; - bool stereo; + wis::DataFormat format = ex::swapchain_format; + uint32_t width = 0; + uint32_t height = 0; + bool stereo = false; }; } // namespace ex diff --git a/generator/generator.cpp b/generator/generator.cpp index 797cd708..d7e7e4c8 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp @@ -427,6 +427,9 @@ constexpr decltype(auto) get(ResultValue& rv) noexcept "\n#include " "\n#include \n\nnamespace wis{\n"; for (auto i : variants) { + if (i->implemented_for == Language::Hidden) { + continue; + } if (i->this_type.empty()) { dxapi += MakeCPPVariant(*i, ImplementedFor::DX12); } @@ -450,6 +453,9 @@ constexpr decltype(auto) get(ResultValue& rv) noexcept "\n#include " "\n\nnamespace wis{\n"; for (auto i : variants) { + if (i->implemented_for == Language::Hidden) { + continue; + } if (i->this_type.empty()) { vkapi += MakeCPPVariant(*i, ImplementedFor::Vulkan); } @@ -1291,6 +1297,8 @@ void Generator::ParseVariant(tinyxml2::XMLElement& type, std::string_view extens ref.implemented_for = Language::CPP; } else if (std::string_view(mod->Value()) == "c-only") { ref.implemented_for = Language::C; + } else if (std::string_view(mod->Value()) == "hidden") { + ref.implemented_for = Language::Hidden; } } @@ -1475,7 +1483,7 @@ std::pair Generator::MakeCVariant(const WisVariant& s) { using namespace std::string_literals; - if (s.implemented_for == Language::CPP) { + if (s.implemented_for == Language::CPP || s.implemented_for == Language::Hidden) { return {}; } diff --git a/generator/generator.h b/generator/generator.h index 25cd43ed..91a4c890 100644 --- a/generator/generator.h +++ b/generator/generator.h @@ -85,7 +85,8 @@ struct WisBitmask { enum class Language { None, C, - CPP + CPP, + Hidden, // Special case, cancels generation }; struct WisStructMember { diff --git a/wisdom/extensions/CMakeLists.txt b/wisdom/extensions/CMakeLists.txt index 35c21ac9..c835604a 100644 --- a/wisdom/extensions/CMakeLists.txt +++ b/wisdom/extensions/CMakeLists.txt @@ -8,6 +8,7 @@ add_library(wis::extension ALIAS wisdom-extension) add_subdirectory(debug_info) add_subdirectory(extended_allocation) add_subdirectory(descriptor_buffer) +add_subdirectory(raytracing) install( TARGETS wisdom-extension wisdom-extension-headers diff --git a/wisdom/extensions/raytracing/CMakeLists.txt b/wisdom/extensions/raytracing/CMakeLists.txt new file mode 100644 index 00000000..66d1fcca --- /dev/null +++ b/wisdom/extensions/raytracing/CMakeLists.txt @@ -0,0 +1,87 @@ +project(wisdom-raytracing) + +add_library(wisdom-raytracing-headers INTERFACE) +add_library(wis::raytracing-headers ALIAS wisdom-raytracing-headers) + +target_link_libraries(wisdom-raytracing-headers INTERFACE wisdom-headers) + +target_include_directories( + wisdom-raytracing-headers + INTERFACE $ + $) + +if(WISDOM_BUILD_BINARIES) + add_library( + wisdom-raytracing STATIC + "wisdom/wisdom_raytracing.hpp" + "wisdom/impl/impl.vk.cpp" + "wisdom/vk_rtas.h" + "wisdom/impl.dx12.h" + "wisdom/impl.vk.h" + "wisdom/impl.h" + "wisdom/dx12_raytracing_pipeline.h" + "wisdom/impl/impl.dx12.cpp" + "wisdom/vk_raytracing_pipeline.h") + add_library(wis::raytracing ALIAS wisdom-raytracing) + target_link_libraries(wisdom-raytracing PUBLIC wis::wisdom) + target_include_directories( + wisdom-raytracing PUBLIC $ + $) +else() + add_library(wisdom-raytracing INTERFACE) + add_library(wis::raytracing ALIAS wisdom-raytracing) + target_link_libraries(wisdom-raytracing INTERFACE wisdom-raytracing-headers) +endif() + +# link the debug library to the main wisdom extension library +target_link_libraries(wisdom-extension INTERFACE wisdom-raytracing) +target_link_libraries(wisdom-extension-headers + INTERFACE wisdom-raytracing-headers) + +install( + TARGETS wisdom-raytracing wisdom-raytracing-headers + EXPORT wisdom-targets + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + INCLUDES + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) + +install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/wisdom/ + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/wisdom) + +if(WISDOM_GENERATE_FUNCTIONS) + if(CMAKE_HOST_SYSTEM_NAME MATCHES "Linux") + # Generate a shell script for Linux + set(LAUNCHER_SCRIPT + "#!/bin/sh\nexec \"\$\" -l -i ${CMAKE_CURRENT_SOURCE_DIR}/wisdom/gen/vk_functions.in -o ${CMAKE_CURRENT_SOURCE_DIR}/wisdom/generated/vk_functions.hpp \"$@\"\n" + ) + file( + GENERATE + OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/wisdom/gen/generate_functions.sh" + CONTENT "${LAUNCHER_SCRIPT}") + execute_process( + COMMAND chmod +x + ${CMAKE_CURRENT_SOURCE_DIR}/wisdom/gen/generate_functions.sh) + elseif(CMAKE_HOST_SYSTEM_NAME MATCHES "Windows") + # Generate a batch file for Windows + set(LAUNCHER_SCRIPT + "@echo off\nstart /B \"\" \"\$\" -l -i ${CMAKE_CURRENT_SOURCE_DIR}/wisdom/gen/vk_functions.in -o ${CMAKE_CURRENT_SOURCE_DIR}/wisdom/generated/vk_functions.hpp %*\n" + ) + file( + GENERATE + OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/wisdom/gen/generate_functions.bat" + CONTENT "${LAUNCHER_SCRIPT}") + endif() + + if(WISDOM_GENERATE_API) + add_custom_target( + GenerateFunctions_${PROJECT_NAME} + COMMAND + $ -l -i + ${CMAKE_CURRENT_SOURCE_DIR}/wisdom/gen/vk_functions.in -o + ${CMAKE_CURRENT_SOURCE_DIR}/wisdom/generated/vk_functions.hpp) + add_dependencies(wisdom-descriptor-buffer-headers + GenerateFunctions_${PROJECT_NAME}) + endif() +endif() diff --git a/wisdom/extensions/raytracing/wisdom/dx12_raytracing_pipeline.h b/wisdom/extensions/raytracing/wisdom/dx12_raytracing_pipeline.h new file mode 100644 index 00000000..662fcf76 --- /dev/null +++ b/wisdom/extensions/raytracing/wisdom/dx12_raytracing_pipeline.h @@ -0,0 +1,37 @@ +#ifndef WIS_DX12_RAYTRACING_PIPELINE_H +#define WIS_DX12_RAYTRACING_PIPELINE_H +#include +#include +#include +#include + +namespace wis { +class DX12RaytracingPipeline; + +using DX12RaytracingPipelineView = std::tuple; + +template<> +struct Internal { + wis::com_ptr state_object; + std::unique_ptr shader_identifiers; +}; + +class DX12RaytracingPipeline : public QueryInternal +{ +public: + DX12RaytracingPipeline() noexcept = default; + operator DX12RaytracingPipelineView() noexcept + { + return { state_object.get() }; + } + const uint8_t* GetShaderIdentifiers() const noexcept + { + return shader_identifiers.get(); + } + operator bool() const noexcept + { + return bool(state_object); + } +}; +} // namespace wis +#endif // !WIS_DX12_RAYTRACING_PIPELINE_H diff --git a/wisdom/extensions/raytracing/wisdom/gen/vk_functions.in b/wisdom/extensions/raytracing/wisdom/gen/vk_functions.in new file mode 100644 index 00000000..c166c8ec --- /dev/null +++ b/wisdom/extensions/raytracing/wisdom/gen/vk_functions.in @@ -0,0 +1,11 @@ +[VKRaytracingExt] + +# Device table +vkGetAccelerationStructureBuildSizesKHR +vkCreateAccelerationStructureKHR +vkDestroyAccelerationStructureKHR +vkCmdBuildAccelerationStructuresKHR +vkGetAccelerationStructureDeviceAddressKHR +vkCreateRayTracingPipelinesKHR +vkGetRayTracingShaderGroupHandlesKHR +vkCmdTraceRaysKHR diff --git a/wisdom/extensions/raytracing/wisdom/generated/vk_functions.hpp b/wisdom/extensions/raytracing/wisdom/generated/vk_functions.hpp new file mode 100644 index 00000000..7866d5a6 --- /dev/null +++ b/wisdom/extensions/raytracing/wisdom/generated/vk_functions.hpp @@ -0,0 +1,75 @@ +#pragma once +#include +#include +#include + +namespace wis { + +#if !(defined(VK_KHR_ray_tracing_pipeline)) +#if defined(VK_NV_ray_tracing) +using PFN_vkGetRayTracingShaderGroupHandlesKHR = PFN_vkGetRayTracingShaderGroupHandlesNV; +#endif +#endif + +struct VKRaytracingExtDevice { + PFN_vkCreateRayTracingPipelinesKHR vkCreateRayTracingPipelinesKHR; + PFN_vkCmdTraceRaysKHR vkCmdTraceRaysKHR; + PFN_vkCreateAccelerationStructureKHR vkCreateAccelerationStructureKHR; + PFN_vkGetAccelerationStructureBuildSizesKHR vkGetAccelerationStructureBuildSizesKHR; + PFN_vkDestroyAccelerationStructureKHR vkDestroyAccelerationStructureKHR; + PFN_vkCmdBuildAccelerationStructuresKHR vkCmdBuildAccelerationStructuresKHR; + PFN_vkGetAccelerationStructureDeviceAddressKHR vkGetAccelerationStructureDeviceAddressKHR; + PFN_vkGetRayTracingShaderGroupHandlesKHR vkGetRayTracingShaderGroupHandlesKHR; + +public: + bool Init(VkDevice device, PFN_vkGetDeviceProcAddr vkGetDeviceProcAddr) noexcept + { + vkCreateRayTracingPipelinesKHR = (PFN_vkCreateRayTracingPipelinesKHR)vkGetDeviceProcAddr(device, "vkCreateRayTracingPipelinesKHR"); + if (vkCreateRayTracingPipelinesKHR == nullptr) { + return false; + } + vkCmdTraceRaysKHR = (PFN_vkCmdTraceRaysKHR)vkGetDeviceProcAddr(device, "vkCmdTraceRaysKHR"); + if (vkCmdTraceRaysKHR == nullptr) { + return false; + } + vkCreateAccelerationStructureKHR = (PFN_vkCreateAccelerationStructureKHR)vkGetDeviceProcAddr(device, "vkCreateAccelerationStructureKHR"); + if (vkCreateAccelerationStructureKHR == nullptr) { + return false; + } + vkGetAccelerationStructureBuildSizesKHR = (PFN_vkGetAccelerationStructureBuildSizesKHR)vkGetDeviceProcAddr(device, "vkGetAccelerationStructureBuildSizesKHR"); + if (vkGetAccelerationStructureBuildSizesKHR == nullptr) { + return false; + } + vkDestroyAccelerationStructureKHR = (PFN_vkDestroyAccelerationStructureKHR)vkGetDeviceProcAddr(device, "vkDestroyAccelerationStructureKHR"); + if (vkDestroyAccelerationStructureKHR == nullptr) { + return false; + } + vkCmdBuildAccelerationStructuresKHR = (PFN_vkCmdBuildAccelerationStructuresKHR)vkGetDeviceProcAddr(device, "vkCmdBuildAccelerationStructuresKHR"); + if (vkCmdBuildAccelerationStructuresKHR == nullptr) { + return false; + } + vkGetAccelerationStructureDeviceAddressKHR = (PFN_vkGetAccelerationStructureDeviceAddressKHR)vkGetDeviceProcAddr(device, "vkGetAccelerationStructureDeviceAddressKHR"); + if (vkGetAccelerationStructureDeviceAddressKHR == nullptr) { + return false; + } + static constexpr std::array vkGetRayTracingShaderGroupHandlesKHR_strings{ +#if defined(VK_KHR_ray_tracing_pipeline) + "vkGetRayTracingShaderGroupHandlesKHR", +#endif +#if defined(VK_NV_ray_tracing) + "vkGetRayTracingShaderGroupHandlesNV", +#endif + }; + for (auto vkGetRayTracingShaderGroupHandlesKHR_it : vkGetRayTracingShaderGroupHandlesKHR_strings) { + if ((vkGetRayTracingShaderGroupHandlesKHR = (PFN_vkGetRayTracingShaderGroupHandlesKHR)vkGetDeviceProcAddr(device, vkGetRayTracingShaderGroupHandlesKHR_it))) { + break; + } + } + if (vkGetRayTracingShaderGroupHandlesKHR == nullptr) { + return false; + } + return true; + } +}; + +} // namespace wis diff --git a/wisdom/extensions/raytracing/wisdom/impl.dx12.h b/wisdom/extensions/raytracing/wisdom/impl.dx12.h new file mode 100644 index 00000000..6bab6f43 --- /dev/null +++ b/wisdom/extensions/raytracing/wisdom/impl.dx12.h @@ -0,0 +1,239 @@ +#ifndef WISDOM_RAYTRACING_DX12_HPP +#define WISDOM_RAYTRACING_DX12_HPP +#if defined(WISDOM_DX12) +#include +#include +#include +#include + +namespace wis { +class DX12Raytracing; + +using DX12AccelerationStructure = D3D12_GPU_VIRTUAL_ADDRESS; +using DX12AccelerationStructureView = std::tuple; + +template<> +struct Internal { + wis::com_ptr shared_device; + + bool supports_raytracing = false; +}; + +class ImplDX12Raytracing : public QueryInternalExtension +{ +protected: + virtual wis::Result Init(const wis::DX12Device& instance) noexcept override + { + shared_device = instance.GetInternal().device; + D3D12_FEATURE_DATA_D3D12_OPTIONS5 featureSupportData = {}; + supports_raytracing = wis::succeeded(shared_device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS5, &featureSupportData, sizeof(featureSupportData))) && + featureSupportData.RaytracingTier != D3D12_RAYTRACING_TIER_NOT_SUPPORTED; + return wis::success; + } + +public: + virtual bool Supported() const noexcept override + { + return supports_raytracing; + } + +public: + [[nodiscard]] wis::ASAllocationInfo + GetTopLevelASSize(const wis::TopLevelASBuildDesc& tlas_desc) const noexcept + { + D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS inputs{ + .Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL, + .Flags = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS(convert_dx(tlas_desc.flags) | (tlas_desc.update ? D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PERFORM_UPDATE : 0)), + .NumDescs = tlas_desc.instance_count, + .DescsLayout = tlas_desc.indirect ? D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS : D3D12_ELEMENTS_LAYOUT_ARRAY, + .InstanceDescs = tlas_desc.gpu_address + }; + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO prebuild_info = {}; + shared_device->GetRaytracingAccelerationStructurePrebuildInfo(&inputs, &prebuild_info); + return { wis::detail::aligned_size(uint32_t(prebuild_info.ScratchDataSizeInBytes), uint32_t(D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BYTE_ALIGNMENT)), + wis::detail::aligned_size(uint32_t(prebuild_info.ResultDataMaxSizeInBytes), uint32_t(D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BYTE_ALIGNMENT)), + wis::detail::aligned_size(uint32_t(prebuild_info.UpdateScratchDataSizeInBytes), uint32_t(D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BYTE_ALIGNMENT)) }; + } + + [[nodiscard]] wis::ASAllocationInfo + GetBottomLevelASSize(const wis::DX12BottomLevelASBuildDesc& tlas_desc) const noexcept + { + D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS inputs{ + .Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL, + .Flags = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS(convert_dx(tlas_desc.flags) | (tlas_desc.update ? D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PERFORM_UPDATE : 0)), + .NumDescs = tlas_desc.geometry_count, + .DescsLayout = tlas_desc.geometry_array ? D3D12_ELEMENTS_LAYOUT_ARRAY : D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS, + }; + if (tlas_desc.geometry_array) { + inputs.pGeometryDescs = tlas_desc.geometry_array; + } else { + inputs.ppGeometryDescs = tlas_desc.geometry_indirect; + } + + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO prebuild_info = {}; + shared_device->GetRaytracingAccelerationStructurePrebuildInfo(&inputs, &prebuild_info); + return { wis::detail::aligned_size(uint32_t(prebuild_info.ScratchDataSizeInBytes), uint32_t(D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BYTE_ALIGNMENT)), + wis::detail::aligned_size(uint32_t(prebuild_info.ResultDataMaxSizeInBytes), uint32_t(D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BYTE_ALIGNMENT)), + wis::detail::aligned_size(uint32_t(prebuild_info.UpdateScratchDataSizeInBytes), uint32_t(D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BYTE_ALIGNMENT)) }; + } + + [[nodiscard]] wis::DX12AccelerationStructure + CreateAccelerationStructure(wis::Result& result, wis::DX12BufferView buffer, uint64_t buffer_offset, [[maybe_unused]] uint64_t structure_size, [[maybe_unused]] ASLevel structure_level) const noexcept + { + return std::get<0>(buffer)->GetGPUVirtualAddress() + buffer_offset; + } + + [[nodiscard]] uint64_t GetAccelerationStructureDeviceAddress(wis::DX12AccelerationStructureView as) const noexcept + { + return std::get<0>(as); + } + + [[nodiscard]] WIS_INLINE wis::DX12RaytracingPipeline + CreateRaytracingPipeline(wis::Result& result, const wis::DX12RaytracingPipeineDesc& desc) const noexcept; + + [[nodiscard]] static constexpr wis::ShaderBindingTableInfo GetShaderBindingTableInfo() noexcept + { + return wis::ShaderBindingTableInfo{ D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES, D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT }; + } + +public: + void BuildBottomLevelAS(wis::DX12CommandListView cmd_list, + const wis::DX12BottomLevelASBuildDesc& blas_desc, + wis::DX12AccelerationStructureView dst_acceleration_structure, + uint64_t scratch_buffer_gpu_address, + wis::DX12AccelerationStructureView src_acceleration_structure = {}) const noexcept + { + auto* cmd_list_i = static_cast(std::get<0>(cmd_list)); + D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC build_desc{ + .DestAccelerationStructureData = std::get<0>(dst_acceleration_structure), + .Inputs = { + .Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL, + .Flags = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS(convert_dx(blas_desc.flags) | (blas_desc.update ? D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PERFORM_UPDATE : 0)), + .NumDescs = blas_desc.geometry_count, + .DescsLayout = blas_desc.geometry_array ? D3D12_ELEMENTS_LAYOUT_ARRAY : D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS, + }, + .SourceAccelerationStructureData = std::get<0>(src_acceleration_structure), + .ScratchAccelerationStructureData = { scratch_buffer_gpu_address }, + }; + if (blas_desc.geometry_array) { + build_desc.Inputs.pGeometryDescs = blas_desc.geometry_array; + } else { + build_desc.Inputs.ppGeometryDescs = blas_desc.geometry_indirect; + } + cmd_list_i->BuildRaytracingAccelerationStructure(&build_desc, 0, nullptr); + } + + void BuildTopLevelAS(wis::DX12CommandListView cmd_list, + const wis::TopLevelASBuildDesc& tlas_desc, + wis::DX12AccelerationStructureView dst_acceleration_structure, + uint64_t scratch_buffer_gpu_address, + wis::DX12AccelerationStructureView src_acceleration_structure = {}) const noexcept + { + auto* cmd_list_i = static_cast(std::get<0>(cmd_list)); + + D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC build_desc{ + .DestAccelerationStructureData = std::get<0>(dst_acceleration_structure), + .Inputs = { + .Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL, + .Flags = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS(convert_dx(tlas_desc.flags) | (tlas_desc.update ? D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PERFORM_UPDATE : 0)), + .NumDescs = tlas_desc.instance_count, + .DescsLayout = tlas_desc.indirect ? D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS : D3D12_ELEMENTS_LAYOUT_ARRAY, + .InstanceDescs = tlas_desc.gpu_address }, + .SourceAccelerationStructureData = std::get<0>(src_acceleration_structure), + .ScratchAccelerationStructureData = { scratch_buffer_gpu_address } + }; + + cmd_list_i->BuildRaytracingAccelerationStructure(&build_desc, 0, nullptr); + } + + void SetPipelineState(wis::DX12CommandListView cmd_list, wis::DX12RaytracingPipelineView pipeline) const noexcept + { + auto* cmd_list_i = static_cast(std::get<0>(cmd_list)); + auto* pipeline_i = std::get<0>(pipeline); + cmd_list_i->SetPipelineState1(pipeline_i); + } + + void SetDescriptorStorage(wis::DX12CommandList& cmd_list, wis::DX12DescriptorStorageView desc_storage) const noexcept + { + cmd_list.SetComputeDescriptorStorage(desc_storage); + } + + void PushDescriptor(wis::DX12CommandList& cmd_list, wis::DescriptorType type, uint32_t root_index, wis::DX12BufferView buffer, uint32_t offset) const noexcept + { + cmd_list.PushDescriptorCompute(type, root_index, buffer, offset); + } + + void DispatchRays(wis::DX12CommandListView cmd_list, const wis::RaytracingDispatchDesc& desc) const noexcept + { + auto* cmd_list_i = static_cast(std::get<0>(cmd_list)); + + D3D12_DISPATCH_RAYS_DESC dispatch_desc{ + .RayGenerationShaderRecord = { desc.ray_gen_shader_table_address, desc.ray_gen_shader_table_size }, + .MissShaderTable = { desc.miss_shader_table_address, desc.miss_shader_table_size, desc.miss_shader_table_stride }, + .HitGroupTable = { desc.hit_group_table_address, desc.hit_group_table_size, desc.hit_group_table_stride }, + .CallableShaderTable = { desc.callable_shader_table_address, desc.callable_shader_table_size, desc.callable_shader_table_stride }, + .Width = desc.width, + .Height = desc.height, + .Depth = desc.depth, + }; + cmd_list_i->DispatchRays(&dispatch_desc); + } + + void WriteAccelerationStructure(wis::DX12DescriptorStorageView storage, uint32_t binding_set, uint32_t index, wis::DX12AccelerationStructureView as) noexcept + { + auto& internal = std::get<0>(storage)->GetInternal(); + D3D12_SHADER_RESOURCE_VIEW_DESC desc{ + .Format = DXGI_FORMAT_UNKNOWN, + .ViewDimension = D3D12_SRV_DIMENSION_RAYTRACING_ACCELERATION_STRUCTURE, + .Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, + .RaytracingAccelerationStructure = { std::get<0>(as) } + }; + auto handle = D3D12_CPU_DESCRIPTOR_HANDLE(internal.heap_cpu_starts[0].ptr + internal.heap_offsets[binding_set].offset_in_bytes); + shared_device->CreateShaderResourceView(nullptr, &desc, handle); + } +}; + +[[nodiscard]] inline constexpr wis::DX12AcceleratedGeometryDesc +DX12CreateGeometryDesc(const wis::AcceleratedGeometryInput& desc) noexcept +{ + D3D12_RAYTRACING_GEOMETRY_DESC geometry{ + .Type = convert_dx(desc.geometry_type), + .Flags = convert_dx(desc.flags), + }; + switch (desc.geometry_type) { + case wis::ASGeometryType::Triangles: + geometry.Triangles = { + .Transform3x4 = desc.transform_matrix_address, + .IndexFormat = convert_dx(desc.index_format), + .VertexFormat = convert_dx(desc.vertex_format), + .IndexCount = desc.triangle_or_aabb_count * 3, + .VertexCount = desc.vertex_count, + .IndexBuffer = desc.index_buffer_address, + .VertexBuffer = { + .StartAddress = desc.vertex_or_aabb_buffer_address, + .StrideInBytes = desc.vertex_or_aabb_buffer_stride } + }; + break; + case wis::ASGeometryType::AABBs: + geometry.AABBs = { + .AABBCount = desc.triangle_or_aabb_count, + .AABBs = { + .StartAddress = desc.vertex_or_aabb_buffer_address, + .StrideInBytes = desc.vertex_or_aabb_buffer_stride } + }; + break; + default: + break; + } + return geometry; +} + +#pragma region DX12Raytracing +#pragma endregion DX12Raytracing +} // namespace wis + +// #ifndef WISDOM_BUILD_BINARIES +// #include "impl/impl.dx12.cpp" +// #endif // !WISDOM_PLATFORM_HEADER_ONLY +#endif // WISDOM_DX12 +#endif // !WISDOM_RAYTRACING_DX12_HPP diff --git a/wisdom/extensions/raytracing/wisdom/impl.h b/wisdom/extensions/raytracing/wisdom/impl.h new file mode 100644 index 00000000..4ecd1834 --- /dev/null +++ b/wisdom/extensions/raytracing/wisdom/impl.h @@ -0,0 +1,14 @@ +#ifndef WIS_RAYTRACING_IMPL_H +#define WIS_RAYTRACING_IMPL_H +#include +namespace wis { +struct AccelerationInstance { + float transform[3][4]; + uint32_t instance_id : 24; + uint32_t mask : 8; + uint32_t instance_offset : 24; + uint32_t flags : 8; + uint64_t acceleration_structure_handle; +}; +} // namespace wis +#endif diff --git a/wisdom/extensions/raytracing/wisdom/impl.vk.h b/wisdom/extensions/raytracing/wisdom/impl.vk.h new file mode 100644 index 00000000..00d67e7c --- /dev/null +++ b/wisdom/extensions/raytracing/wisdom/impl.vk.h @@ -0,0 +1,207 @@ +#ifndef WISDOM_RAYTRACING_VK_HPP +#define WISDOM_RAYTRACING_VK_HPP +#if defined(WISDOM_VULKAN) +#include +#include +#include "generated/vk_functions.hpp" +#include "vk_raytracing_pipeline.h" + +namespace wis { +class VKRaytracing; + +template<> +struct Internal { + wis::SharedDevice device; + wis::VKRaytracingExtDevice table; + + wis::ShaderBindingTableInfo sbt_info; + uint32_t compressed_handle_size = 0; +}; + +class ImplVKRaytracing : public QueryInternalExtension +{ +protected: + virtual WIS_INLINE bool + GetExtensionInfo(const std::unordered_map>& available_extensions, + std::unordered_set& ext_name_set, + std::unordered_map& structure_map, + std::unordered_map& property_map) noexcept override; + + virtual WIS_INLINE wis::Result + Init(const wis::VKDevice& instance, + const std::unordered_map& structure_map, + const std::unordered_map& property_map) noexcept override; + +public: + virtual bool Supported() const noexcept override + { + return table.vkGetAccelerationStructureBuildSizesKHR; + } + +public: + [[nodiscard]] WIS_INLINE wis::ASAllocationInfo + GetTopLevelASSize(const wis::TopLevelASBuildDesc& tlas_desc); + + [[nodiscard]] WIS_INLINE wis::ASAllocationInfo + GetBottomLevelASSize(const wis::VKBottomLevelASBuildDesc& blas_desc) const noexcept; + + [[nodiscard]] wis::VKAccelerationStructure + CreateAccelerationStructure(wis::Result& result, wis::VKBufferView buffer, uint64_t buffer_offset, uint64_t structure_size, ASLevel structure_level) const noexcept + { + VkAccelerationStructureCreateInfoKHR create_info{ + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR, + .buffer = std::get<0>(buffer), + .offset = buffer_offset, + .size = structure_size, + .type = structure_level == ASLevel::Top ? VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR : VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR + }; + VKAccelerationStructure as; + auto& as_i = as.GetMutableInternal(); + + auto res = table.vkCreateAccelerationStructureKHR(device.get(), &create_info, nullptr, &as_i.handle); + if (res != VK_SUCCESS) { + result = wis::make_result(res); + return as; + } + as_i.device = device; + as_i.vkDestroyAccelerationStructureKHR = table.vkDestroyAccelerationStructureKHR; + return as; + } + + [[nodiscard]] uint64_t GetAccelerationStructureDeviceAddress(wis::VKAccelerationStructureView as) const noexcept + { + VkAccelerationStructureDeviceAddressInfoKHR info{ + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR, + .accelerationStructure = std::get<0>(as) + }; + return table.vkGetAccelerationStructureDeviceAddressKHR(device.get(), &info); + } + + [[nodiscard]] WIS_INLINE wis::VKRaytracingPipeline + CreateRaytracingPipeline(wis::Result& result, const wis::VKRaytracingPipeineDesc& rt_pipeline_desc) const noexcept; + + [[nodiscard]] WIS_INLINE wis::ShaderBindingTableInfo GetShaderBindingTableInfo() const noexcept + { + return sbt_info; + } + +public: + void + BuildBottomLevelAS(wis::VKCommandListView cmd_buffer, + const wis::VKBottomLevelASBuildDesc& blas_desc, + wis::VKAccelerationStructureView dst_acceleration_structure, + uint64_t scratch_buffer_gpu_address, + wis::VKAccelerationStructureView src_acceleration_structure = {}) const noexcept; + + void BuildTopLevelAS(wis::VKCommandListView cmd_buffer, + const wis::TopLevelASBuildDesc& tlas_desc, + wis::VKAccelerationStructureView dst_acceleration_structure, + uint64_t scratch_buffer_gpu_address, + wis::VKAccelerationStructureView src_acceleration_structure = {}) const noexcept; + + void SetPipelineState(wis::VKCommandListView cmd_list, wis::VKRaytracingPipelineView pipeline) const noexcept + { + device.table().vkCmdBindPipeline(std::get<0>(cmd_list), VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, std::get<0>(pipeline)); + } + void SetDescriptorStorage(wis::VKCommandList& cmd_list, wis::VKDescriptorStorageView desc_storage) const noexcept + { + cmd_list.VKSetDescriptorStorage(desc_storage, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR); + } + void PushDescriptor(wis::VKCommandList& cmd_list, wis::DescriptorType type, uint32_t binding, wis::VKBufferView view, uint32_t offset) const noexcept + { + cmd_list.VKPushDescriptor(type, binding, view, offset, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR); + } + + void DispatchRays(wis::VKCommandListView cmd_list, const wis::RaytracingDispatchDesc& desc) const noexcept + { + auto* cmd_list_i = std::get<0>(cmd_list); + VkStridedDeviceAddressRegionKHR raygen{ + .deviceAddress = desc.ray_gen_shader_table_address, + .stride = desc.ray_gen_shader_table_size, + .size = desc.ray_gen_shader_table_size, + }; + VkStridedDeviceAddressRegionKHR miss{ + .deviceAddress = desc.miss_shader_table_address, + .stride = desc.miss_shader_table_stride, + .size = desc.miss_shader_table_size + }; + VkStridedDeviceAddressRegionKHR hit{ + .deviceAddress = desc.hit_group_table_address, + .stride = desc.hit_group_table_stride, + .size = desc.hit_group_table_size + }; + VkStridedDeviceAddressRegionKHR callable{ + .deviceAddress = desc.callable_shader_table_address, + .stride = desc.callable_shader_table_stride, + .size = desc.callable_shader_table_size + }; + table.vkCmdTraceRaysKHR(cmd_list_i, &raygen, &miss, &hit, &callable, desc.width, desc.height, desc.depth); + } + + void WriteAccelerationStructure(wis::VKDescriptorStorageView storage, uint32_t binding_set, uint32_t index, wis::VKAccelerationStructureView as) noexcept + { + VkWriteDescriptorSetAccelerationStructureKHR as_info{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, + .accelerationStructureCount = 1, + .pAccelerationStructures = &std::get<0>(as), + }; + VkWriteDescriptorSet write{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = &as_info, + .dstSet = std::get<0>(storage)[binding_set], + .dstBinding = 0, + .dstArrayElement = index, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, + }; + device.table().vkUpdateDescriptorSets(device.get(), 1, &write, 0, nullptr); + } +}; + +[[nodiscard]] inline constexpr wis::VKAcceleratedGeometryDesc +VKCreateGeometryDesc(const wis::AcceleratedGeometryInput& desc) noexcept +{ + wis::VKAcceleratedGeometryDesc out; + out.first = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, + .geometryType = convert_vk(desc.geometry_type), + .flags = convert_vk(desc.flags) + }; + out.second = { + .primitiveCount = desc.triangle_or_aabb_count, + }; + switch (desc.geometry_type) { + case wis::ASGeometryType::Triangles: + out.first.geometry.triangles = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR, + .vertexFormat = convert_vk(desc.vertex_format), + .vertexData = { .deviceAddress = desc.vertex_or_aabb_buffer_address }, + .vertexStride = desc.vertex_or_aabb_buffer_stride, + .maxVertex = desc.vertex_count, + .indexType = convert_vk(desc.index_format), + .indexData = { .deviceAddress = desc.index_buffer_address }, + .transformData = { .deviceAddress = desc.transform_matrix_address } + }; + break; + case wis::ASGeometryType::AABBs: + out.first.geometry.aabbs = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_AABBS_DATA_KHR, + .data = { .deviceAddress = desc.vertex_or_aabb_buffer_address }, + .stride = desc.vertex_or_aabb_buffer_stride + }; + break; + default: + break; + } + return out; +} + +#pragma region VKRaytracing +#pragma endregion VKRaytracing +} // namespace wis + +#ifndef WISDOM_BUILD_BINARIES +#include "impl/impl.vk.cpp" +#endif // !WISDOM_PLATFORM_HEADER_ONLY +#endif // WISDOM_VULKAN +#endif // !WISDOM_RAYTRACING_HPP diff --git a/wisdom/extensions/raytracing/wisdom/impl/impl.dx12.cpp b/wisdom/extensions/raytracing/wisdom/impl/impl.dx12.cpp new file mode 100644 index 00000000..bc06d610 --- /dev/null +++ b/wisdom/extensions/raytracing/wisdom/impl/impl.dx12.cpp @@ -0,0 +1,268 @@ +#ifndef WISDOM_RAYTRACING_DX12_CPP +#define WISDOM_RAYTRACING_DX12_CPP +#include + +#if defined(WISDOM_DX12) +wis::DX12RaytracingPipeline +wis::ImplDX12Raytracing::CreateRaytracingPipeline(wis::Result& result, const wis::DX12RaytracingPipeineDesc& desc) const noexcept +{ + wis::DX12RaytracingPipeline out_pipeline; + auto& pipe_i = out_pipeline.GetMutableInternal(); + + uint32_t num_raygen = 0; + uint32_t num_miss = 0; + uint32_t num_callable = 0; + + constexpr static std::wstring_view hit_group_exa = L"H|A|C|I|00000000"; + static auto precalc_wchspace = [](const char* data) { + return MultiByteToWideChar(CP_UTF8, 0, data, strlen(data) + 1, nullptr, 0); + }; + static auto convert_to_wch = [](const char* data, wchar_t** wch) { + size_t szc = strlen(data) + 1; + auto wch_spc = MultiByteToWideChar(CP_UTF8, 0, data, szc, nullptr, 0); + MultiByteToWideChar(CP_UTF8, 0, data, szc, *wch, wch_spc); + auto* prev = *wch; + *wch += wch_spc; + return prev; + }; + static auto rename_export = [](const wchar_t* name, wchar_t** wch, uint32_t index) { + size_t szc = wcslen(name); + memcpy(*wch, name, szc * sizeof(wchar_t)); + // append index + auto* prev = *wch; + *wch += szc; + auto offset = swprintf_s(*wch, 10, L"|%08x", index); + *wch += offset + 1; + return prev; + }; + static auto name_hit_group = [](D3D12_HIT_GROUP_DESC& hg, wchar_t** wch, uint32_t index) { + // format: H|A|C|I|00000000 + hg.HitGroupExport = *wch; + **wch = L'H'; + (*wch)++; + **wch = L'|'; + (*wch)++; + **wch = hg.AnyHitShaderImport ? L'A' : L'0'; + (*wch)++; + **wch = L'|'; + (*wch)++; + **wch = hg.ClosestHitShaderImport ? L'C' : L'0'; + (*wch)++; + **wch = L'|'; + (*wch)++; + **wch = hg.IntersectionShaderImport ? L'I' : L'0'; + (*wch)++; + **wch = L'|'; + (*wch)++; + auto offset = swprintf_s(*wch, 9, L"%08x", index); + *wch += offset + 1; + }; + + // count all the string space + size_t wchspace = 0; + for (uint32_t i = 0; i < desc.export_count; ++i) { + auto sz = precalc_wchspace(desc.exports[i].entry_point); + if (sz == 0) { + result = wis::make_result(E_INVALIDARG); + return out_pipeline; + } + wchspace += sz; + } + + for (uint32_t i = 0; i < desc.export_count; ++i) { + num_raygen += desc.exports[i].shader_type == wis::RaytracingShaderType::Raygen; + num_miss += desc.exports[i].shader_type == wis::RaytracingShaderType::Miss; + num_callable += desc.exports[i].shader_type == wis::RaytracingShaderType::Callable; + } + + uint32_t num_subobjects = desc.shader_count + desc.hit_group_count + 3; // root signature and max recursion depth + size_t string_offset = num_subobjects * sizeof(D3D12_STATE_SUBOBJECT) + + desc.shader_count * sizeof(D3D12_DXIL_LIBRARY_DESC) + + desc.export_count * sizeof(D3D12_EXPORT_DESC) + + desc.hit_group_count * sizeof(D3D12_HIT_GROUP_DESC) + + (num_callable + num_miss + num_raygen) * sizeof(wchar_t*); + + size_t allocation_size = string_offset + // callable, miss, raygen + // string names + wchspace * sizeof(wchar_t) * 2u + // entry points + desc.export_count * sizeof(wchar_t) * 9u + // unique names + entry points + desc.hit_group_count * sizeof(wchar_t) * (hit_group_exa.size() + 1u) // hit group names format: H|A|C|I|00000000 + ; + std::unique_ptr subobjects = wis::detail::make_unique_for_overwrite(allocation_size); + + // burn shader bytecodes + std::span subobjects_span(reinterpret_cast(subobjects.get()), num_subobjects); + std::span dxil_library_span(reinterpret_cast(subobjects_span.data() + num_subobjects), desc.shader_count); + std::span export_span(reinterpret_cast(dxil_library_span.data() + desc.shader_count), desc.export_count); + std::span hit_group_span(reinterpret_cast(export_span.data() + desc.export_count), desc.hit_group_count); + std::span wch_span(reinterpret_cast(subobjects.get() + string_offset), wchspace); + + for (uint32_t i = 0; i < desc.shader_count; ++i) { + dxil_library_span[i] = { + .DXILLibrary = { .pShaderBytecode = std::get<0>(desc.shaders[i]), .BytecodeLength = std::get<1>(desc.shaders[i]) } + }; + subobjects_span[i] = { + .Type = D3D12_STATE_SUBOBJECT_TYPE_DXIL_LIBRARY, + .pDesc = &dxil_library_span[i] + }; + } + + // burn exports with bucket sort O(n) + for (uint32_t i = 0; i < desc.export_count; ++i) { + dxil_library_span[desc.exports[i].shader_array_index].NumExports++; + } + + // place exports in reverse order + wchar_t* names = wch_span.data(); // pointer is incremented in convert_to_wch + wchar_t* renames = names + wchspace; + + wchar_t** callable_span = reinterpret_cast(subobjects.get() + string_offset); // callable pointers in reverse order + wchar_t** miss_span = callable_span - num_callable; + wchar_t** raygen_span = miss_span - num_miss; + + dxil_library_span[0].pExports = export_span.data() + dxil_library_span[0].NumExports; + for (uint32_t i = 1; i < desc.shader_count; ++i) { + dxil_library_span[i].pExports = dxil_library_span[i - 1].pExports + dxil_library_span[i].NumExports; + } + for (int32_t i = desc.export_count - 1; i >= 0; --i) { + uint32_t index = desc.exports[i].shader_array_index; + auto x = --dxil_library_span[index].pExports; + auto& _export = const_cast(*x); + + auto* name = convert_to_wch(desc.exports[i].entry_point, &names); + auto* rename = rename_export(name, &renames, i); + + switch (desc.exports[i].shader_type) { + case wis::RaytracingShaderType::Raygen: + --raygen_span; + *(raygen_span) = rename; + break; + case wis::RaytracingShaderType::Miss: + --miss_span; + *(miss_span) = rename; + break; + case wis::RaytracingShaderType::Callable: + --callable_span; + *(callable_span) = rename; + break; + default: + break; + } + + _export = { + .Name = rename, + .ExportToRename = name, + .Flags = D3D12_EXPORT_FLAG_NONE + }; + } + + // hit groups + wchar_t* hg_names = renames; + for (uint32_t i = 0; i < desc.hit_group_count; ++i) { + auto& hg = desc.hit_groups[i]; + hit_group_span[i] = { + .Type = convert_dx(desc.hit_groups[i].type), + .AnyHitShaderImport = hg.any_hit_export_index == UINT32_MAX ? nullptr : export_span[hg.any_hit_export_index].Name, + .ClosestHitShaderImport = hg.closest_hit_export_index == UINT32_MAX ? nullptr : export_span[hg.closest_hit_export_index].Name, + .IntersectionShaderImport = hg.intersection_export_index == UINT32_MAX ? nullptr : export_span[hg.intersection_export_index].Name + }; + name_hit_group(hit_group_span[i], &renames, i); + subobjects_span[desc.shader_count + i] = { + .Type = D3D12_STATE_SUBOBJECT_TYPE_HIT_GROUP, + .pDesc = &hit_group_span[i] + }; + } + + // shader config + D3D12_RAYTRACING_SHADER_CONFIG shader_config{ + .MaxPayloadSizeInBytes = desc.max_payload_size, + .MaxAttributeSizeInBytes = desc.max_attribute_size + }; + subobjects_span[num_subobjects - 3] = { + .Type = D3D12_STATE_SUBOBJECT_TYPE_RAYTRACING_SHADER_CONFIG, + .pDesc = &shader_config + }; + + // root signature + D3D12_GLOBAL_ROOT_SIGNATURE global_root_signature{ + .pGlobalRootSignature = std::get<0>(desc.root_signature) + }; + subobjects_span[num_subobjects - 2] = { + .Type = D3D12_STATE_SUBOBJECT_TYPE_GLOBAL_ROOT_SIGNATURE, + .pDesc = &global_root_signature // place root signature here + }; + + // max recursion depth + D3D12_RAYTRACING_PIPELINE_CONFIG1 pipeline_config{ + .MaxTraceRecursionDepth = desc.max_recursion_depth, + .Flags = D3D12_RAYTRACING_PIPELINE_FLAG_NONE + }; + subobjects_span[num_subobjects - 1] = { + .Type = D3D12_STATE_SUBOBJECT_TYPE_RAYTRACING_PIPELINE_CONFIG1, + .pDesc = &pipeline_config + }; + + // pipeline + D3D12_STATE_OBJECT_DESC pipeline_desc{ + .Type = D3D12_STATE_OBJECT_TYPE_RAYTRACING_PIPELINE, + .NumSubobjects = num_subobjects, + .pSubobjects = subobjects_span.data() + }; + + // get shader identifiers + pipe_i.shader_identifiers = wis::detail::make_unique_for_overwrite( + (num_raygen + + num_miss + + num_callable + + desc.hit_group_count) * + D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES); + + if (!pipe_i.shader_identifiers) { + result = wis::make_result(E_OUTOFMEMORY); + return out_pipeline; + } + + auto hr = shared_device->CreateStateObject(&pipeline_desc, pipe_i.state_object.iid(), pipe_i.state_object.put_void()); + if (!wis::succeeded(hr)) { + result = wis::make_result(hr); + return out_pipeline; + } + + // get shader handles raygen, miss, hit, callable + wis::com_ptr state_object_props; + std::ignore = pipe_i.state_object.as(&state_object_props); + + uint8_t* raygen_ids = pipe_i.shader_identifiers.get(); + uint8_t* miss_ids = raygen_ids + num_raygen * D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES; + uint8_t* hit_ids = miss_ids + num_miss * D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES; + uint8_t* callable_ids = hit_ids + desc.hit_group_count * D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES; + + for (uint32_t i = 0; i < num_raygen; ++i) { + auto id = state_object_props->GetShaderIdentifier(raygen_span[i]); + memcpy(raygen_ids, id, D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES); + raygen_ids += D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES; + } + + for (uint32_t i = 0; i < num_miss; ++i) { + auto id = state_object_props->GetShaderIdentifier(miss_span[i]); + memcpy(miss_ids, id, D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES); + miss_ids += D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES; + } + + for (uint32_t i = 0; i < desc.hit_group_count; ++i) { + auto id = state_object_props->GetShaderIdentifier(hit_group_span[i].HitGroupExport); + memcpy(hit_ids, id, D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES); + hit_ids += D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES; + } + + for (uint32_t i = 0; i < num_callable; ++i) { + auto id = state_object_props->GetShaderIdentifier(callable_span[i]); + memcpy(callable_ids, id, D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES); + callable_ids += D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES; + } + + return out_pipeline; +} + +#endif // WISDOM_DX12 +#endif // WISDOM_RAYTRACING_DX12_CPP diff --git a/wisdom/extensions/raytracing/wisdom/impl/impl.vk.cpp b/wisdom/extensions/raytracing/wisdom/impl/impl.vk.cpp new file mode 100644 index 00000000..f52d34b8 --- /dev/null +++ b/wisdom/extensions/raytracing/wisdom/impl/impl.vk.cpp @@ -0,0 +1,339 @@ +#ifndef WISDOM_RAYTRACING_VK_CPP +#define WISDOM_RAYTRACING_VK_CPP +#include + +#if defined(WISDOM_VULKAN) +bool wis::ImplVKRaytracing::GetExtensionInfo(const std::unordered_map>& available_extensions, + std::unordered_set& ext_name_set, + std::unordered_map& structure_map, + std::unordered_map& property_map) noexcept +{ + if (!available_extensions.contains(VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME)) { + return false; + } + if (!available_extensions.contains(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME)) { + return false; + } + if (!available_extensions.contains(VK_KHR_DEFERRED_HOST_OPERATIONS_EXTENSION_NAME)) { + return false; + } + if (!available_extensions.contains(VK_KHR_RAY_QUERY_EXTENSION_NAME)) { + return false; + } + + ext_name_set.emplace(VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME); + structure_map[VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_FEATURES_KHR] = sizeof(VkPhysicalDeviceRayTracingPipelineFeaturesKHR); + property_map[VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_PROPERTIES_KHR] = sizeof(VkPhysicalDeviceRayTracingPipelinePropertiesKHR); + + ext_name_set.emplace(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME); + structure_map[VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR] = sizeof(VkPhysicalDeviceAccelerationStructureFeaturesKHR); + property_map[VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_PROPERTIES_KHR] = sizeof(VkPhysicalDeviceAccelerationStructurePropertiesKHR); + + ext_name_set.emplace(VK_KHR_DEFERRED_HOST_OPERATIONS_EXTENSION_NAME); + ext_name_set.emplace(VK_KHR_RAY_QUERY_EXTENSION_NAME); + structure_map[VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR] = sizeof(VkPhysicalDeviceRayQueryFeaturesKHR); + return true; +} + +wis::Result wis::ImplVKRaytracing::Init(const wis::VKDevice& instance, + const std::unordered_map& structure_map, + const std::unordered_map& property_map) noexcept +{ + device = instance.GetInternal().device; + table.Init(device.get(), instance.GetInternal().device.gtable().vkGetDeviceProcAddr); + + auto props = reinterpret_cast(property_map.at(VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_PROPERTIES_KHR)); + + compressed_handle_size = props->shaderGroupHandleSize; + sbt_info = { wis::detail::aligned_size(compressed_handle_size, props->shaderGroupHandleAlignment), props->shaderGroupBaseAlignment }; + return wis::success; +} + +wis::ASAllocationInfo wis::ImplVKRaytracing::GetTopLevelASSize(const wis::TopLevelASBuildDesc& tlas_desc) +{ + VkAccelerationStructureGeometryKHR geometry{ + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, + .geometryType = VK_GEOMETRY_TYPE_INSTANCES_KHR, + .geometry = { + .instances = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR, + .arrayOfPointers = tlas_desc.indirect, + .data = { .deviceAddress = tlas_desc.gpu_address } } }, + }; + VkAccelerationStructureBuildGeometryInfoKHR build_info{ + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, + .type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR, + .flags = convert_vk(tlas_desc.flags), + .mode = tlas_desc.update ? VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR : VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR, + .geometryCount = 1u, + .pGeometries = &geometry, + }; + VkAccelerationStructureBuildSizesInfoKHR build_sizes_info{ + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR, + }; + + uint32_t max_instance_count = tlas_desc.instance_count; + table.vkGetAccelerationStructureBuildSizesKHR(device.get(), + VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, + &build_info, + &max_instance_count, + &build_sizes_info); + return { build_sizes_info.buildScratchSize, build_sizes_info.accelerationStructureSize, build_sizes_info.updateScratchSize }; +} + +wis::ASAllocationInfo wis::ImplVKRaytracing::GetBottomLevelASSize(const wis::VKBottomLevelASBuildDesc& blas_desc) const noexcept +{ + // one is ppGeometries, the other is maxPrimitiveCount + uint32_t direct = bool(blas_desc.geometry_array); + size_t num_bytes = blas_desc.geometry_count * (sizeof(uint32_t) + direct * sizeof(VkAccelerationStructureGeometryKHR*)); + + constexpr size_t initial_geometry_guess = 64; + wis::detail::limited_allocator allocator{ uint32_t(num_bytes), true }; + auto* data = allocator.data(); + + const VkAccelerationStructureGeometryKHR** pp_geometries = reinterpret_cast(data); + uint32_t* max_primitive_count = reinterpret_cast(pp_geometries + direct * blas_desc.geometry_count); + + if (direct) { + for (size_t i = 0; i < blas_desc.geometry_count; ++i) { + pp_geometries[i] = &blas_desc.geometry_array[i].first; + max_primitive_count[i] = blas_desc.geometry_array[i].second.primitiveCount; + } + } else { + for (size_t i = 0; i < blas_desc.geometry_count; ++i) { + max_primitive_count[i] = blas_desc.geometry_indirect[i]->second.primitiveCount; + } + } + + VkAccelerationStructureBuildGeometryInfoKHR build_info{ + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, + .type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR, + .flags = convert_vk(blas_desc.flags), + .mode = blas_desc.update ? VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR : VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR, + .geometryCount = blas_desc.geometry_count, + .ppGeometries = direct + ? reinterpret_cast(data) + : reinterpret_cast(blas_desc.geometry_indirect), + }; + VkAccelerationStructureBuildSizesInfoKHR build_sizes_info{ + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR, + }; + table.vkGetAccelerationStructureBuildSizesKHR(device.get(), + VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, + &build_info, + max_primitive_count, + &build_sizes_info); + return { build_sizes_info.buildScratchSize, build_sizes_info.accelerationStructureSize, build_sizes_info.updateScratchSize }; +} + +wis::VKRaytracingPipeline +wis::ImplVKRaytracing::CreateRaytracingPipeline(wis::Result& result, const wis::VKRaytracingPipeineDesc& rt_pipeline_desc) const noexcept +{ + wis::VKRaytracingPipeline pipeline; + auto& pipe_i = pipeline.GetMutableInternal(); + + uint32_t raygen_count = 0; + uint32_t miss_count = 0; + uint32_t callable_count = 0; + for (size_t i = 0; i < rt_pipeline_desc.export_count; ++i) { + raygen_count += rt_pipeline_desc.exports[i].shader_type == wis::RaytracingShaderType::Raygen; + miss_count += rt_pipeline_desc.exports[i].shader_type == wis::RaytracingShaderType::Miss; + callable_count += rt_pipeline_desc.exports[i].shader_type == wis::RaytracingShaderType::Callable; + } + + // initialize shader stages + std::unique_ptr stages = wis::detail::make_unique_for_overwrite( + rt_pipeline_desc.export_count * sizeof(VkPipelineShaderStageCreateInfo) + + raygen_count * sizeof(VkRayTracingShaderGroupCreateInfoKHR) + + miss_count * sizeof(VkRayTracingShaderGroupCreateInfoKHR) + + rt_pipeline_desc.hit_group_count * sizeof(VkRayTracingShaderGroupCreateInfoKHR) + + callable_count * sizeof(VkRayTracingShaderGroupCreateInfoKHR)); + if (!stages) { + result = wis::make_result(VK_ERROR_OUT_OF_HOST_MEMORY); + return pipeline; + } + + // create memory for shader group handles + uint32_t handle_count = raygen_count + miss_count + rt_pipeline_desc.hit_group_count + callable_count; + uint32_t handle_size = handle_count * sbt_info.entry_size; + pipe_i.shader_identifiers = wis::detail::make_unique_for_overwrite(handle_size); // expanded size for all shader groups + auto* handles = pipe_i.shader_identifiers.get(); + std::memset(handles, 0, handle_size); + + // initialize shader stages + raygen + miss shader groups + std::span stages_span{ reinterpret_cast(stages.get()), rt_pipeline_desc.export_count }; + std::span raygen_span{ reinterpret_cast(stages_span.data() + rt_pipeline_desc.export_count), raygen_count }; + std::span miss_span{ raygen_span.data() + raygen_count, miss_count }; + std::span hit_group_span{ miss_span.data() + miss_count, rt_pipeline_desc.hit_group_count }; + std::span callable_span{ hit_group_span.data() + rt_pipeline_desc.hit_group_count, callable_count }; + + raygen_count = 0; + miss_count = 0; + callable_count = 0; + for (uint32_t i = 0; i < rt_pipeline_desc.export_count; ++i) { + auto& _export = rt_pipeline_desc.exports[i]; // export is a keyword + stages_span[i] = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = convert_vk(_export.shader_type), + .module = std::get<0>(rt_pipeline_desc.shaders[_export.shader_array_index]), + .pName = _export.entry_point, + }; + + switch (_export.shader_type) { + case wis::RaytracingShaderType::Raygen: + raygen_span[raygen_count++] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR, + .generalShader = i, + .closestHitShader = VK_SHADER_UNUSED_KHR, + .anyHitShader = VK_SHADER_UNUSED_KHR, + .intersectionShader = VK_SHADER_UNUSED_KHR, + }; + break; + case wis::RaytracingShaderType::Miss: + miss_span[miss_count++] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR, + .generalShader = i, + .closestHitShader = VK_SHADER_UNUSED_KHR, + .anyHitShader = VK_SHADER_UNUSED_KHR, + .intersectionShader = VK_SHADER_UNUSED_KHR, + }; + break; + case wis::RaytracingShaderType::Callable: + callable_span[callable_count++] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR, + .generalShader = i, + .closestHitShader = VK_SHADER_UNUSED_KHR, + .anyHitShader = VK_SHADER_UNUSED_KHR, + .intersectionShader = VK_SHADER_UNUSED_KHR, + }; + break; + } + } + + // initialize hit groups + for (uint32_t i = 0; i < rt_pipeline_desc.hit_group_count; ++i) { + auto& hg = rt_pipeline_desc.hit_groups[i]; + hit_group_span[i] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + .type = convert_vk(hg.type), + .generalShader = VK_SHADER_UNUSED_KHR, + .closestHitShader = hg.closest_hit_export_index == UINT32_MAX ? VK_SHADER_UNUSED_KHR : hg.closest_hit_export_index, + .anyHitShader = hg.any_hit_export_index == UINT32_MAX ? VK_SHADER_UNUSED_KHR : hg.any_hit_export_index, + .intersectionShader = hg.intersection_export_index == UINT32_MAX ? VK_SHADER_UNUSED_KHR : hg.intersection_export_index + }; + } + + VkRayTracingPipelineInterfaceCreateInfoKHR interface_info{ + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_INTERFACE_CREATE_INFO_KHR, + .maxPipelineRayPayloadSize = rt_pipeline_desc.max_payload_size, + .maxPipelineRayHitAttributeSize = rt_pipeline_desc.max_attribute_size, + }; + + VkRayTracingPipelineCreateInfoKHR pipeline_info{ + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR, + .stageCount = uint32_t(stages_span.size()), + .pStages = stages_span.data(), + .groupCount = raygen_count + miss_count + rt_pipeline_desc.hit_group_count, + .pGroups = raygen_span.data(), + .maxPipelineRayRecursionDepth = rt_pipeline_desc.max_recursion_depth, // user defined + .layout = std::get<0>(rt_pipeline_desc.root_signature), + }; + + auto vr = table.vkCreateRayTracingPipelinesKHR(device.get(), VK_NULL_HANDLE, VK_NULL_HANDLE, 1, &pipeline_info, nullptr, pipe_i.state_object.put(device, device.table().vkDestroyPipeline)); + if (!wis::succeeded(vr)) { + result = wis::make_result(vr); + } + + // retrieve and uncompress shader group handles + table.vkGetRayTracingShaderGroupHandlesKHR(device.get(), pipe_i.state_object.get(), 0, handle_count, handle_size, handles); + if (sbt_info.entry_size == compressed_handle_size) { + return pipeline; + } + + // uncompress shader group handles in-place + uint8_t* uncompressed_handles = handles + handle_size; + uint8_t* compressed_handles = handles + handle_count * compressed_handle_size; + while (compressed_handles != handles) { + compressed_handles -= compressed_handle_size; + uncompressed_handles -= sbt_info.entry_size; + std::memmove(uncompressed_handles, compressed_handles, sbt_info.entry_size); + std::memset(compressed_handles, 0, size_t(uncompressed_handles - compressed_handles)); + } + + return pipeline; +} + +void wis::ImplVKRaytracing::BuildBottomLevelAS(wis::VKCommandListView cmd_buffer, const wis::VKBottomLevelASBuildDesc& blas_desc, wis::VKAccelerationStructureView dst_acceleration_structure, uint64_t scratch_buffer_gpu_address, wis::VKAccelerationStructureView src_acceleration_structure) const noexcept +{ + // one is ppGeometries, the other is maxPrimitiveCount + uint32_t direct = bool(blas_desc.geometry_array); + size_t num_bytes = blas_desc.geometry_count * (sizeof(VkAccelerationStructureBuildRangeInfoKHR*) + direct * sizeof(VkAccelerationStructureGeometryKHR*)); + + constexpr size_t initial_geometry_guess = 64; + wis::detail::limited_allocator allocator{ uint32_t(num_bytes), true }; + auto* data = allocator.data(); + + const VkAccelerationStructureGeometryKHR** pp_geometries = reinterpret_cast(data); + const VkAccelerationStructureBuildRangeInfoKHR** pp_ranges = reinterpret_cast(pp_geometries + direct * blas_desc.geometry_count); + + if (direct) { + for (size_t i = 0; i < blas_desc.geometry_count; ++i) { + pp_geometries[i] = &blas_desc.geometry_array[i].first; + pp_ranges[i] = &blas_desc.geometry_array[i].second; + } + } else { + // May be optimized + for (size_t i = 0; i < blas_desc.geometry_count; ++i) { + pp_ranges[i] = &blas_desc.geometry_indirect[i]->second; + } + } + + VkAccelerationStructureBuildGeometryInfoKHR build_info{ + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, + .type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR, + .flags = convert_vk(blas_desc.flags), + .mode = blas_desc.update ? VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR : VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR, + .srcAccelerationStructure = std::get<0>(src_acceleration_structure), + .dstAccelerationStructure = std::get<0>(dst_acceleration_structure), + .geometryCount = blas_desc.geometry_count, + .ppGeometries = direct + ? reinterpret_cast(data) + : reinterpret_cast(blas_desc.geometry_indirect), + .scratchData = scratch_buffer_gpu_address + }; + table.vkCmdBuildAccelerationStructuresKHR(std::get<0>(cmd_buffer), 1, &build_info, pp_ranges); +} + +void wis::ImplVKRaytracing::BuildTopLevelAS(wis::VKCommandListView cmd_buffer, const wis::TopLevelASBuildDesc& tlas_desc, wis::VKAccelerationStructureView dst_acceleration_structure, uint64_t scratch_buffer_gpu_address, wis::VKAccelerationStructureView src_acceleration_structure) const noexcept +{ + VkAccelerationStructureGeometryKHR geometry{ + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, + .geometryType = VK_GEOMETRY_TYPE_INSTANCES_KHR, + .geometry = { + .instances = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR, + .arrayOfPointers = tlas_desc.indirect, + .data = { .deviceAddress = tlas_desc.gpu_address } } }, + }; + VkAccelerationStructureBuildGeometryInfoKHR build_info{ + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, + .type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR, + .flags = convert_vk(tlas_desc.flags), + .mode = tlas_desc.update ? VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR : VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR, + .srcAccelerationStructure = std::get<0>(src_acceleration_structure), + .dstAccelerationStructure = std::get<0>(dst_acceleration_structure), + .geometryCount = 1u, + .pGeometries = &geometry, + .scratchData = scratch_buffer_gpu_address, + }; + VkAccelerationStructureBuildRangeInfoKHR range_info{ + .primitiveCount = tlas_desc.instance_count, + }; + VkAccelerationStructureBuildRangeInfoKHR* p_range_info = &range_info; + table.vkCmdBuildAccelerationStructuresKHR(std::get<0>(cmd_buffer), 1, &build_info, &p_range_info); +} +#endif // WISDOM_VULKAN +#endif // WISDOM_RAYTRACING_VK_CPP diff --git a/wisdom/extensions/raytracing/wisdom/vk_raytracing_pipeline.h b/wisdom/extensions/raytracing/wisdom/vk_raytracing_pipeline.h new file mode 100644 index 00000000..6417e065 --- /dev/null +++ b/wisdom/extensions/raytracing/wisdom/vk_raytracing_pipeline.h @@ -0,0 +1,35 @@ +#ifndef WIS_VK_RAYTRACING_PIPELINE_H +#define WIS_VK_RAYTRACING_PIPELINE_H +#include +#include + +namespace wis { +class VKRaytracingPipeline; + +using VKRaytracingPipelineView = std::tuple; + +template<> +struct Internal { + wis::managed_handle_ex state_object; + std::unique_ptr shader_identifiers; +}; + +class VKRaytracingPipeline : public QueryInternal +{ +public: + VKRaytracingPipeline() noexcept = default; + operator VKRaytracingPipelineView() noexcept + { + return { state_object.get() }; + } + const uint8_t* GetShaderIdentifiers() const noexcept + { + return shader_identifiers.get(); + } + operator bool() const noexcept + { + return bool(state_object); + } +}; +} // namespace wis +#endif // !WIS_VK_RAYTRACING_PIPELINE_H diff --git a/wisdom/extensions/raytracing/wisdom/vk_rtas.h b/wisdom/extensions/raytracing/wisdom/vk_rtas.h new file mode 100644 index 00000000..5f80e0c9 --- /dev/null +++ b/wisdom/extensions/raytracing/wisdom/vk_rtas.h @@ -0,0 +1,59 @@ +#ifndef WIS_VK_RTAS_H +#define WIS_VK_RTAS_H +#include +#include + +namespace wis { +class VKAccelerationStructure; + +using VKAccelerationStructureView = std::tuple; + +template<> +struct Internal { + wis::SharedDevice device; + h::VkAccelerationStructureKHR handle; + PFN_vkDestroyAccelerationStructureKHR vkDestroyAccelerationStructureKHR = nullptr; + +public: + Internal() noexcept = default; + Internal(Internal&&) noexcept = default; + Internal& operator=(Internal&& o) noexcept + { + if (this == &o) { + return *this; + } + Destroy(); + device = std::move(o.device); + handle = std::move(o.handle); + vkDestroyAccelerationStructureKHR = std::move(o.vkDestroyAccelerationStructureKHR); + return *this; + } + ~Internal() noexcept + { + Destroy(); + } + + void Destroy() noexcept + { + if (handle) { + vkDestroyAccelerationStructureKHR(device.get(), handle, nullptr); + handle = VK_NULL_HANDLE; + } + } +}; + +class VKAccelerationStructure : public QueryInternal +{ +public: + VKAccelerationStructure() noexcept = default; + operator bool() const noexcept + { + return handle != VK_NULL_HANDLE; + } + operator VKAccelerationStructureView() const noexcept + { + return { handle }; + } +}; +} // namespace wis +#endif // WIS_VK_RTAS_H diff --git a/wisdom/extensions/raytracing/wisdom/wisdom_raytracing.hpp b/wisdom/extensions/raytracing/wisdom/wisdom_raytracing.hpp new file mode 100644 index 00000000..c559e6aa --- /dev/null +++ b/wisdom/extensions/raytracing/wisdom/wisdom_raytracing.hpp @@ -0,0 +1,35 @@ +#ifndef WISDOM_RAYTRACING_HPP +#define WISDOM_RAYTRACING_HPP +#include "impl.dx12.h" +#include "impl.vk.h" +#include "impl.h" + +namespace wis { +#if defined(WISDOM_DX12) && !defined(WISDOM_FORCE_VULKAN) +using Raytracing = ImplDX12Raytracing; +using AccelerationStructure = DX12AccelerationStructure; +using AcceleratedGeometryDesc = DX12AcceleratedGeometryDesc; +using RaytracingPipelineDesc = DX12RaytracingPipeineDesc; +using RaytracingPipeline = DX12RaytracingPipeline; + +[[nodiscard]] inline constexpr wis::AcceleratedGeometryDesc +CreateGeometryDesc(const wis::AcceleratedGeometryInput& desc) noexcept +{ + return DX12CreateGeometryDesc(desc); +} + +#elif defined(WISDOM_VULKAN) +using Raytracing = ImplVKRaytracing; +using AccelerationStructure = VKAccelerationStructure; +using AcceleratedGeometryDesc = VKAcceleratedGeometryDesc; +using RaytracingPipelineDesc = VKRaytracingPipeineDesc; +using RaytracingPipeline = VKRaytracingPipeline; + +[[nodiscard]] inline constexpr wis::VKAcceleratedGeometryDesc +CreateGeometryDesc(const wis::AcceleratedGeometryInput& desc) noexcept +{ + return VKCreateGeometryDesc(desc); +} +#endif // WISDOM_DX12 +} // namespace wis +#endif // !WISDOM_RAYTRACING_HPP diff --git a/wisdom/include/wisdom/dx12/dx12_allocator.h b/wisdom/include/wisdom/dx12/dx12_allocator.h index 0a454aeb..a39834f6 100644 --- a/wisdom/include/wisdom/dx12/dx12_allocator.h +++ b/wisdom/include/wisdom/dx12/dx12_allocator.h @@ -9,7 +9,7 @@ class DX12Device; template<> struct Internal { wis::com_ptr allocator; - ID3D12Device10* device; // ownedby allocator, no need to release + ID3D12Device10* device = nullptr; // ownedby allocator, no need to release }; /// @brief Resource allocator for DX12 @@ -72,6 +72,9 @@ class ImplDX12ResourceAllocator : public QueryInternal [[nodiscard]] WIS_INLINE DX12Buffer DX12CreateResource(wis::Result& result, const D3D12MA::ALLOCATION_DESC& all_desc, const D3D12_RESOURCE_DESC1& res_desc, D3D12_RESOURCE_STATES state) const noexcept; + [[nodiscard]] WIS_INLINE wis::DX12Buffer + DX12CreateResource2(wis::Result& result, const D3D12MA::ALLOCATION_DESC& all_desc, const D3D12_RESOURCE_DESC1& res_desc, D3D12_RESOURCE_STATES state) const noexcept; + WIS_INLINE static void DX12FillBufferDesc(uint64_t size, BufferUsage flags, D3D12_RESOURCE_DESC1& info) noexcept; diff --git a/wisdom/include/wisdom/dx12/dx12_command_list.h b/wisdom/include/wisdom/dx12/dx12_command_list.h index 9be5f50b..62d0fa56 100644 --- a/wisdom/include/wisdom/dx12/dx12_command_list.h +++ b/wisdom/include/wisdom/dx12/dx12_command_list.h @@ -46,6 +46,8 @@ class ImplDX12CommandList : public QueryInternal WIS_INLINE void CopyTextureToBuffer(DX12TextureView src_texture, DX12BufferView dest_buffer, const wis::BufferTextureCopyRegion* regions, uint32_t region_count) const noexcept; + WIS_INLINE void CopyTexture(DX12TextureView source, DX12TextureView destination, const wis::TextureCopyRegion* regions, uint32_t region_count) const noexcept; + WIS_INLINE void BufferBarrier(wis::BufferBarrier barrier, DX12BufferView buffer) noexcept; // 8 buffers at once max for efficiency WIS_INLINE void BufferBarriers(const wis::DX12BufferBarrier2* barriers, uint32_t barrier_count) noexcept; @@ -60,6 +62,8 @@ class ImplDX12CommandList : public QueryInternal WIS_INLINE void SetRootSignature(wis::DX12RootSignatureView root_signature) noexcept; + WIS_INLINE void SetComputeRootSignature(wis::DX12RootSignatureView root_signature) noexcept; + WIS_INLINE void SetPipelineState(wis::DX12PipelineView pipeline_state) noexcept; WIS_INLINE void IASetPrimitiveTopology(wis::PrimitiveTopology vp) noexcept; @@ -88,12 +92,18 @@ class ImplDX12CommandList : public QueryInternal uint32_t start_vertex = 0, uint32_t start_instance = 0) noexcept; + WIS_INLINE void Dispatch(uint32_t group_count_x, uint32_t group_count_y, uint32_t group_count_z) noexcept; + WIS_INLINE void SetPushConstants(const void* data, uint32_t size_4bytes, uint32_t offset_4bytes, wis::ShaderStages stage) noexcept; + WIS_INLINE void SetComputePushConstants(const void* data, uint32_t size_4bytes, uint32_t offset_4bytes) noexcept; WIS_INLINE void PushDescriptor(wis::DescriptorType type, uint32_t binding, wis::DX12BufferView view, uint32_t offset = 0) noexcept; + WIS_INLINE void PushDescriptorCompute(wis::DescriptorType type, uint32_t binding, wis::DX12BufferView view, uint32_t offset = 0) noexcept; WIS_INLINE void SetDescriptorStorage(wis::DX12DescriptorStorageView desc_storage) noexcept; + WIS_INLINE void SetComputeDescriptorStorage(wis::DX12DescriptorStorageView desc_storage) noexcept; + protected: bool closed = false; }; @@ -172,6 +182,17 @@ class DX12CommandList : public wis::ImplDX12CommandList { wis::ImplDX12CommandList::CopyTextureToBuffer(std::move(source), std::move(destination), regions, region_count); } + /** + * @brief Copies data from one texture to another. + * @param source The source texture to copy from. + * @param destination The destination texture to copy to. + * @param regions The regions to copy. + * @param region_count The number of regions to copy. + * */ + inline void CopyTexture(wis::DX12TextureView source, wis::DX12TextureView destination, const wis::TextureCopyRegion* regions, uint32_t region_count) noexcept + { + wis::ImplDX12CommandList::CopyTexture(std::move(source), std::move(destination), regions, region_count); + } /** * @brief Sets the barrier on the buffer. * @param barrier The barrier to set. @@ -231,6 +252,15 @@ class DX12CommandList : public wis::ImplDX12CommandList { wis::ImplDX12CommandList::SetRootSignature(std::move(root_signature)); } + /** + * @brief Sets the pipeline signature object to compute pipeline. Used to determine how to pick descriptors from descriptor buffer. + * May only work with compute pipelines. + * @param root_signature The root signature to set. + * */ + inline void SetComputeRootSignature(wis::DX12RootSignatureView root_signature) noexcept + { + wis::ImplDX12CommandList::SetComputeRootSignature(std::move(root_signature)); + } /** * @brief Sets the primitive topology. Detemines how vertices shall be processed. * @param topology The primitive topology to set. @@ -331,6 +361,16 @@ class DX12CommandList : public wis::ImplDX12CommandList { wis::ImplDX12CommandList::DrawInstanced(vertex_count_per_instance, instance_count, start_vertex, start_instance); } + /** + * @brief Dispatches compute shader. + * @param group_count_x The number of groups to dispatch in X dimension. + * @param group_count_y The number of groups to dispatch in Y dimension. Default is 1. + * @param group_count_z The number of groups to dispatch in Z dimension. Default is 1. + * */ + inline void Dispatch(uint32_t group_count_x, uint32_t group_count_y = 1, uint32_t group_count_z = 1) noexcept + { + wis::ImplDX12CommandList::Dispatch(group_count_x, group_count_y, group_count_z); + } /** * @brief Sets the root constants for the shader. * @param data The data to set the root constants with. @@ -342,6 +382,16 @@ class DX12CommandList : public wis::ImplDX12CommandList { wis::ImplDX12CommandList::SetPushConstants(data, size_4bytes, offset_4bytes, stage); } + /** + * @brief Sets the root constants for the compute or raytracing shader. + * @param data The data to set the root constants with. + * @param size_4bytes The size of the data in 4-byte units. + * @param offset_4bytes The offset in the data in 4-byte units. + * */ + inline void SetComputePushConstants(void* data, uint32_t size_4bytes, uint32_t offset_4bytes) noexcept + { + wis::ImplDX12CommandList::SetComputePushConstants(data, size_4bytes, offset_4bytes); + } /** * @brief Pushes descriptor directly to the command list, without putting it to the table. * Works only with buffer bindings. @@ -355,6 +405,36 @@ class DX12CommandList : public wis::ImplDX12CommandList { wis::ImplDX12CommandList::PushDescriptor(type, root_index, std::move(buffer), offset); } + /** + * @brief Pushes descriptor directly to the command list, without putting it to the table. + * Works only with buffer bindings. + * Works with compute or raytracing pipelines. + * Buffer is always bound with full size. + * @param type The type of the descriptor to set. + * @param root_index The index of the root descriptor to set. + * @param buffer The buffer to set. + * @param offset The offset in the descriptor table to set the descriptor to. + * */ + inline void PushDescriptorCompute(wis::DescriptorType type, uint32_t root_index, wis::DX12BufferView buffer, uint32_t offset) noexcept + { + wis::ImplDX12CommandList::PushDescriptorCompute(type, root_index, std::move(buffer), offset); + } + /** + * @brief Sets the descriptor storage object for graphics pipeline. + * @param storage The descriptor storage to set. + * */ + inline void SetDescriptorStorage(wis::DX12DescriptorStorageView storage) noexcept + { + wis::ImplDX12CommandList::SetDescriptorStorage(std::move(storage)); + } + /** + * @brief Sets the descriptor storage object for compute pipeline. + * @param storage The descriptor storage to set. + * */ + inline void SetComputeDescriptorStorage(wis::DX12DescriptorStorageView storage) noexcept + { + wis::ImplDX12CommandList::SetComputeDescriptorStorage(std::move(storage)); + } }; #pragma endregion DX12CommandList diff --git a/wisdom/include/wisdom/dx12/dx12_descriptor_storage.h b/wisdom/include/wisdom/dx12/dx12_descriptor_storage.h index d6cced46..100972b4 100644 --- a/wisdom/include/wisdom/dx12/dx12_descriptor_storage.h +++ b/wisdom/include/wisdom/dx12/dx12_descriptor_storage.h @@ -12,17 +12,21 @@ class DX12DescriptorStorage; template<> struct Internal { - // sampler, Uniform buffer, storage RW buffer, sampled image, storage RW image. Maybe storage read buffer will be needed. - static constexpr uint32_t max_sets = uint32_t(wis::BindingIndex::Count); + struct OffsetIndicator { + uint32_t offset_in_bytes : 31; + uint32_t sampler : 1; + }; wis::com_ptr device; - wis::com_ptr heap_resource; - wis::com_ptr heap_sampler; + wis::com_ptr heaps[2]; - CD3DX12_CPU_DESCRIPTOR_HANDLE heap_starts[max_sets]{}; CD3DX12_GPU_DESCRIPTOR_HANDLE heap_gpu_starts[2]{}; // 0 - resource, 1 - sampler. Used for SetDescriptorHeaps - uint32_t heap_resource_increment = 0; + CD3DX12_CPU_DESCRIPTOR_HANDLE heap_cpu_starts[2]{}; // 0 - resource, 1 - sampler. Used for CopyDescriptorsSimple + + std::unique_ptr heap_offsets; uint32_t heap_sampler_increment = 0; + uint32_t heap_resource_increment = 0; + uint32_t heap_count = 0; }; class ImplDX12DescriptorStorage : public QueryInternal @@ -31,7 +35,7 @@ class ImplDX12DescriptorStorage : public QueryInternal ImplDX12DescriptorStorage() = default; operator bool() const noexcept { - return bool(heap_resource) || bool(heap_sampler); + return bool(heaps[0]) || bool(heaps[1]); } operator DX12DescriptorStorageView() const noexcept @@ -40,29 +44,78 @@ class ImplDX12DescriptorStorage : public QueryInternal } public: - void WriteSampler(uint32_t index, wis::DX12SamplerView sampler) noexcept + void WriteSampler(uint32_t binding, uint32_t index, wis::DX12SamplerView sampler) noexcept { - auto handle = CD3DX12_CPU_DESCRIPTOR_HANDLE(heap_starts[uint32_t(wis::BindingIndex::Sampler) - 1], index, heap_sampler_increment); + auto handle = DX12GetSamplerCPUDescriptorHandle(binding, index); auto& sampler_handle = std::get<0>(sampler); device->CopyDescriptorsSimple(1, handle, sampler_handle, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); } - void WriteConstantBuffer(uint32_t index, wis::DX12BufferView buffer, uint32_t size, uint32_t offset = 0) noexcept + void WriteConstantBuffer(uint32_t binding, uint32_t index, wis::DX12BufferView buffer, uint32_t size, uint32_t offset = 0) noexcept { auto* cbv = std::get<0>(buffer); D3D12_CONSTANT_BUFFER_VIEW_DESC desc{ .BufferLocation = std::get<0>(buffer)->GetGPUVirtualAddress() + offset, - .SizeInBytes = wis::detail::aligned_size(size, uint32_t(D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)) + .SizeInBytes = wis::detail::aligned_size(size, uint32_t(D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)) // is this correct? }; - auto handle = CD3DX12_CPU_DESCRIPTOR_HANDLE(heap_starts[uint32_t(wis::BindingIndex::ConstantBuffer) - 1], index, heap_resource_increment); + auto handle = DX12GetResourceCPUDescriptorHandle(binding, index); device->CreateConstantBufferView(&desc, handle); } - void WriteTexture(uint32_t index, wis::DX12ShaderResourceView srv) noexcept + void WriteTexture(uint32_t binding, uint32_t index, wis::DX12ShaderResourceView srv) noexcept { - auto handle = CD3DX12_CPU_DESCRIPTOR_HANDLE(heap_starts[uint32_t(wis::BindingIndex::Texture) - 1], index, heap_resource_increment); + auto handle = DX12GetResourceCPUDescriptorHandle(binding, index); auto& srv_handle = std::get<0>(srv); device->CopyDescriptorsSimple(1, handle, srv_handle, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); } + void WriteRWTexture(uint32_t binding, uint32_t index, wis::DX12UnorderedAccessTextureView uav) noexcept + { + auto handle = DX12GetResourceCPUDescriptorHandle(binding, index); + auto& uav_handle = std::get<0>(uav); + + device->CopyDescriptorsSimple(1, handle, uav_handle, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + } + + void WriteRWStrcturedBuffer(uint32_t binding, uint32_t index, wis::DX12BufferView buffer, uint32_t stride, uint32_t element_count, uint32_t offset_elements = 0) noexcept + { + D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc{ + .Format = DXGI_FORMAT_R32_TYPELESS, + .ViewDimension = D3D12_UAV_DIMENSION_BUFFER, + .Buffer{ + .FirstElement = offset_elements, + .NumElements = element_count, + .StructureByteStride = stride, + .CounterOffsetInBytes = 0, + .Flags = D3D12_BUFFER_UAV_FLAG_NONE }, + }; + auto handle = DX12GetResourceCPUDescriptorHandle(binding, index); + device->CreateUnorderedAccessView(std::get<0>(buffer), nullptr, &uav_desc, handle); + } + + void WriteStructuredBuffer(uint32_t binding, uint32_t index, wis::DX12BufferView buffer, uint32_t stride, uint32_t element_count, uint32_t offset_elements = 0) noexcept + { + D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc{ + .Format = DXGI_FORMAT_UNKNOWN, + .ViewDimension = D3D12_SRV_DIMENSION_BUFFER, + .Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, + .Buffer{ + .FirstElement = offset_elements, + .NumElements = element_count, + .StructureByteStride = stride, + .Flags = D3D12_BUFFER_SRV_FLAG_NONE }, + }; + auto handle = DX12GetResourceCPUDescriptorHandle(binding, index); + device->CreateShaderResourceView(std::get<0>(buffer), &srv_desc, handle); + } + +public: + D3D12_CPU_DESCRIPTOR_HANDLE DX12GetResourceCPUDescriptorHandle(uint32_t binding, uint32_t index) const noexcept + { + return D3D12_CPU_DESCRIPTOR_HANDLE(heap_cpu_starts[0].ptr + heap_offsets[binding].offset_in_bytes + index * heap_resource_increment); + } + D3D12_CPU_DESCRIPTOR_HANDLE DX12GetSamplerCPUDescriptorHandle(uint32_t binding, uint32_t index) const noexcept + { + return D3D12_CPU_DESCRIPTOR_HANDLE(heap_cpu_starts[1].ptr + heap_offsets[binding].offset_in_bytes + index * heap_sampler_increment); + } }; #pragma region DX12DescriptorStorage @@ -78,33 +131,36 @@ class DX12DescriptorStorage : public wis::ImplDX12DescriptorStorage public: /** * @brief Writes the sampler to the sampler descriptor storage. - * @param index Index in array of samplers to fill. + * @param set_index Index in storage sets, defined by the place in the binding array at the creation. + * @param binding Index in array of samplers to fill. * @param sampler The sampler to write. * */ - inline void WriteSampler(uint32_t index, wis::DX12SamplerView sampler) noexcept + inline void WriteSampler(uint32_t set_index, uint32_t binding, wis::DX12SamplerView sampler) noexcept { - wis::ImplDX12DescriptorStorage::WriteSampler(index, std::move(sampler)); + wis::ImplDX12DescriptorStorage::WriteSampler(set_index, binding, std::move(sampler)); } /** * @brief Writes the constant buffer to the constant buffer descriptor storage. - * @param index Index in array of constant buffers to fill. + * @param set_index Index in storage sets, defined by the place in the binding array at the creation. + * @param binding Index in array of constant buffers to fill. * @param buffer The buffer to write. * @param size The size of the constant buffer in bytes. * @param offset The offset in the buffer to write the constant buffer to. * size + offset must be less or equal the overall size of the bound buffer. * */ - inline void WriteConstantBuffer(uint32_t index, wis::DX12BufferView buffer, uint32_t size, uint32_t offset = 0) noexcept + inline void WriteConstantBuffer(uint32_t set_index, uint32_t binding, wis::DX12BufferView buffer, uint32_t size, uint32_t offset = 0) noexcept { - wis::ImplDX12DescriptorStorage::WriteConstantBuffer(index, std::move(buffer), size, offset); + wis::ImplDX12DescriptorStorage::WriteConstantBuffer(set_index, binding, std::move(buffer), size, offset); } /** * @brief Writes the texture to the shader resource descriptor storage. - * @param index Index in array of shader resources to fill. + * @param set_index Index in storage sets, defined by the place in the binding array at the creation. + * @param binding Index in array of shader resources to fill. * @param resource The shader resource to write. * */ - inline void WriteTexture(uint32_t index, wis::DX12ShaderResourceView resource) noexcept + inline void WriteTexture(uint32_t set_index, uint32_t binding, wis::DX12ShaderResourceView resource) noexcept { - wis::ImplDX12DescriptorStorage::WriteTexture(index, std::move(resource)); + wis::ImplDX12DescriptorStorage::WriteTexture(set_index, binding, std::move(resource)); } }; #pragma endregion DX12DescriptorStorage diff --git a/wisdom/include/wisdom/dx12/dx12_device.h b/wisdom/include/wisdom/dx12/dx12_device.h index 73fa291e..f6a1c1b2 100644 --- a/wisdom/include/wisdom/dx12/dx12_device.h +++ b/wisdom/include/wisdom/dx12/dx12_device.h @@ -54,6 +54,9 @@ class ImplDX12Device : public QueryInternal [[nodiscard]] WIS_INLINE wis::DX12PipelineState CreateGraphicsPipeline(wis::Result& result, const wis::DX12GraphicsPipelineDesc& desc) const noexcept; + [[nodiscard]] WIS_INLINE wis::DX12PipelineState + CreateComputePipeline(wis::Result& result, const wis::DX12ComputePipelineDesc& desc) const noexcept; + [[nodiscard]] WIS_INLINE wis::DX12Shader CreateShader(wis::Result& result, void* data, size_t size) const noexcept; @@ -70,30 +73,29 @@ class ImplDX12Device : public QueryInternal CreateSampler(wis::Result& result, const wis::SamplerDesc& desc) const noexcept; [[nodiscard]] WIS_INLINE wis::DX12ShaderResource - CreateShaderResource(wis::Result& result, DX12TextureView texture, wis::ShaderResourceDesc desc) const noexcept; + CreateShaderResource(wis::Result& result, DX12TextureView texture, const wis::ShaderResourceDesc& desc) const noexcept; + + [[nodiscard]] WIS_INLINE wis::DX12UnorderedAccessTexture + CreateUnorderedAccessTexture(wis::Result& result, DX12TextureView buffer, const wis::UnorderedAccessDesc& desc) const noexcept; // returns true only for now [[nodiscard]] WIS_INLINE bool QueryFeatureSupport(wis::DeviceFeature feature) const noexcept; [[nodiscard]] WIS_INLINE wis::DX12DescriptorStorage - CreateDescriptorStorage(wis::Result& result, const wis::DescriptorStorageDesc& desc) const noexcept; + CreateDescriptorStorage(wis::Result& result, + const wis::DescriptorBindingDesc* descriptor_bindings = nullptr, + uint32_t descriptor_bindings_count = 0, + wis::DescriptorMemory = wis::DescriptorMemory::ShaderVisible) const noexcept; [[nodiscard]] WIS_INLINE wis::DX12RootSignature CreateRootSignature(wis::Result& result, - const PushConstant* constants = nullptr, - uint32_t constants_size = 0, - const PushDescriptor* push_descriptors = nullptr, - uint32_t push_descriptors_size = 0, - uint32_t space_overlap_count = 1) const noexcept; - - [[nodiscard]] WIS_INLINE wis::DX12RootSignature - CreateRootSignature2(wis::Result& result, - const wis::PushConstant* push_constants = nullptr, - uint32_t constants_count = 0, - const wis::PushDescriptor* push_descriptors = nullptr, - uint32_t push_descriptors_count = 0, - const wis::DescriptorSpacing* descriptor_spacing = nullptr) const noexcept; + const wis::PushConstant* push_constants = nullptr, + uint32_t constants_count = 0, + const wis::PushDescriptor* push_descriptors = nullptr, + uint32_t push_descriptors_count = 0, + const wis::DescriptorBindingDesc* descriptor_bindings = nullptr, + uint32_t descriptor_bindings_count = 0) const noexcept; }; #pragma region DX12Device @@ -200,74 +202,70 @@ class DX12Device : public wis::ImplDX12Device return wis::ResultValue{ &wis::ImplDX12Device::CreateGraphicsPipeline, this, desc }; } /** - * @brief Creates a root signature object for use with DescriptorStorage. - * @param push_constants The root constants to create the root signature with. - * @param constants_count The number of push constants. Max is 5. - * @param push_descriptors The root descriptors to create the root signature with. - * In shader will appear in order of submission. e.g. push_descriptors[5] is [[vk::binding(5,0)]] ... : register(b5/t5/u5) - * @param descriptors_count The number of push descriptors. Max is 8. - * @param space_overlap_count Count of descriptor spaces to overlap for each of the DescriptorStorage types. - * Default is 1. Max is 16. This is used primarily for descriptor type aliasing. - * Example: If wis::DX12Device is 2, that means that 2 descriptor spaces will be allocated for each descriptor type. - * [[vk::binding(0,0)]] SamplerState samplers: register(s0,space1); // space1 can be used for different type of samplers e.g. SamplerComparisonState - * [[vk::binding(0,0)]] SamplerComparisonState shadow_samplers: register(s0,space2); // they use the same binding (works like overloading) - * [[vk::binding(0,1)]] ConstantBuffer cbuffers: register(b0,space3); // this type also has 2 spaces, next will be on space 4 etc. - * @return wis::DX12RootSignature on success (wis::Status::Ok). + * @brief Creates a compute pipeline state object. + * @param desc The description of the compute pipeline to create. + * @return wis::DX12PipelineState on success (wis::Status::Ok). * */ - [[nodiscard]] inline wis::DX12RootSignature CreateRootSignature(wis::Result& result, const wis::PushConstant* push_constants = nullptr, uint32_t constants_count = 0, const wis::PushDescriptor* push_descriptors = nullptr, uint32_t descriptors_count = 0, uint32_t space_overlap_count = 1) const noexcept + [[nodiscard]] inline wis::DX12PipelineState CreateComputePipeline(wis::Result& result, const wis::DX12ComputePipelineDesc& desc) const noexcept { - return wis::ImplDX12Device::CreateRootSignature(result, push_constants, constants_count, push_descriptors, descriptors_count, space_overlap_count); + return wis::ImplDX12Device::CreateComputePipeline(result, desc); } /** - * @brief Creates a root signature object for use with DescriptorStorage. - * @param push_constants The root constants to create the root signature with. - * @param constants_count The number of push constants. Max is 5. - * @param push_descriptors The root descriptors to create the root signature with. - * In shader will appear in order of submission. e.g. push_descriptors[5] is [[vk::binding(5,0)]] ... : register(b5/t5/u5) - * @param descriptors_count The number of push descriptors. Max is 8. - * @param space_overlap_count Count of descriptor spaces to overlap for each of the DescriptorStorage types. - * Default is 1. Max is 16. This is used primarily for descriptor type aliasing. - * Example: If wis::DX12Device is 2, that means that 2 descriptor spaces will be allocated for each descriptor type. - * [[vk::binding(0,0)]] SamplerState samplers: register(s0,space1); // space1 can be used for different type of samplers e.g. SamplerComparisonState - * [[vk::binding(0,0)]] SamplerComparisonState shadow_samplers: register(s0,space2); // they use the same binding (works like overloading) - * [[vk::binding(0,1)]] ConstantBuffer cbuffers: register(b0,space3); // this type also has 2 spaces, next will be on space 4 etc. - * @return wis::DX12RootSignature on success (wis::Status::Ok). + * @brief Creates a compute pipeline state object. + * @param desc The description of the compute pipeline to create. + * @return wis::DX12PipelineState on success (wis::Status::Ok). * */ - [[nodiscard]] inline wis::ResultValue CreateRootSignature(const wis::PushConstant* push_constants = nullptr, uint32_t constants_count = 0, const wis::PushDescriptor* push_descriptors = nullptr, uint32_t descriptors_count = 0, uint32_t space_overlap_count = 1) const noexcept + [[nodiscard]] inline wis::ResultValue CreateComputePipeline(const wis::DX12ComputePipelineDesc& desc) const noexcept { - return wis::ResultValue{ &wis::ImplDX12Device::CreateRootSignature, this, push_constants, constants_count, push_descriptors, descriptors_count, space_overlap_count }; + return wis::ResultValue{ &wis::ImplDX12Device::CreateComputePipeline, this, desc }; } /** * @brief Creates a root signature object for use with DescriptorStorage. - * Supplies number of types for each descriptor type separately. + * DescriptorStorage is used for bindless and non-uniform bindings. Don't combine with Descriptor buffers, this may reduce performance. + * Push constants and push descriptors are used for fast changing data. + * Spaces may not overlap, but can be in any order. Push descriptors always have space0 and [[vk::binding(x,0)]]. + * That means that all the binding numbers are off by 1. Meaning that if you have Descriptor Storage with 1 binding, it will be [[vk::binding(0,1)]] + * even though it is supposed to be binding 0. This is done for consistency. + * Set number is the position of binding in bindings array. e.g. bindings[5] is set 5 and on HLSL side it is [[vk::binding(0,5)]]. + * For several overlapping types e.g. 2D and 3D textures, use different spaces. + * Those are specified in the bindings array. Space overlap count means how many consecutive spaces are used by the binding. * @param push_constants The root constants to create the root signature with. - * @param constants_count The number of push constants. Max is 5. + * @param push_constant_count The number of push constants. Max is 5. * @param push_descriptors The root descriptors to create the root signature with. - * In shader will appear in order of submission. e.g. root_descriptors[5] is [[vk::binding(5,0)]] ... : register(b5/t5/u5) - * @param push_descriptors_count The number of push descriptors. Max is 8. - * @param descriptor_spacing Descriptor spacing allocation. - * nullptr means allocate 1 space for each. + * In shader will appear in order of submission. e.g. push_descriptors[5] is [[vk::binding(5,0)]] ... : register(b5/t5/u5) + * @param push_descriptor_count The number of push descriptors. Max is 8. + * @param bindings The bindings to allocate. Order matters, binding count is ignored. + * One block of bindings can contain up to 4096 descriptors. For Sampler blocks, max amount of samplers across all bindings is 2048. + * @param binding_count Count of bindings to allocate. Max is 64 - push_constant_count - push_descriptor_count * 2. * @return wis::DX12RootSignature on success (wis::Status::Ok). * */ - [[nodiscard]] inline wis::DX12RootSignature CreateRootSignature2(wis::Result& result, const wis::PushConstant* push_constants = nullptr, uint32_t constants_count = 0, const wis::PushDescriptor* push_descriptors = nullptr, uint32_t push_descriptors_count = 0, const wis::DescriptorSpacing* descriptor_spacing = nullptr) const noexcept + [[nodiscard]] inline wis::DX12RootSignature CreateRootSignature(wis::Result& result, const wis::PushConstant* push_constants = nullptr, uint32_t push_constant_count = 0, const wis::PushDescriptor* push_descriptors = nullptr, uint32_t push_descriptor_count = 0, const wis::DescriptorBindingDesc* bindings = nullptr, uint32_t binding_count = 0) const noexcept { - return wis::ImplDX12Device::CreateRootSignature2(result, push_constants, constants_count, push_descriptors, push_descriptors_count, descriptor_spacing); + return wis::ImplDX12Device::CreateRootSignature(result, push_constants, push_constant_count, push_descriptors, push_descriptor_count, bindings, binding_count); } /** * @brief Creates a root signature object for use with DescriptorStorage. - * Supplies number of types for each descriptor type separately. + * DescriptorStorage is used for bindless and non-uniform bindings. Don't combine with Descriptor buffers, this may reduce performance. + * Push constants and push descriptors are used for fast changing data. + * Spaces may not overlap, but can be in any order. Push descriptors always have space0 and [[vk::binding(x,0)]]. + * That means that all the binding numbers are off by 1. Meaning that if you have Descriptor Storage with 1 binding, it will be [[vk::binding(0,1)]] + * even though it is supposed to be binding 0. This is done for consistency. + * Set number is the position of binding in bindings array. e.g. bindings[5] is set 5 and on HLSL side it is [[vk::binding(0,5)]]. + * For several overlapping types e.g. 2D and 3D textures, use different spaces. + * Those are specified in the bindings array. Space overlap count means how many consecutive spaces are used by the binding. * @param push_constants The root constants to create the root signature with. - * @param constants_count The number of push constants. Max is 5. + * @param push_constant_count The number of push constants. Max is 5. * @param push_descriptors The root descriptors to create the root signature with. - * In shader will appear in order of submission. e.g. root_descriptors[5] is [[vk::binding(5,0)]] ... : register(b5/t5/u5) - * @param push_descriptors_count The number of push descriptors. Max is 8. - * @param descriptor_spacing Descriptor spacing allocation. - * nullptr means allocate 1 space for each. + * In shader will appear in order of submission. e.g. push_descriptors[5] is [[vk::binding(5,0)]] ... : register(b5/t5/u5) + * @param push_descriptor_count The number of push descriptors. Max is 8. + * @param bindings The bindings to allocate. Order matters, binding count is ignored. + * One block of bindings can contain up to 4096 descriptors. For Sampler blocks, max amount of samplers across all bindings is 2048. + * @param binding_count Count of bindings to allocate. Max is 64 - push_constant_count - push_descriptor_count * 2. * @return wis::DX12RootSignature on success (wis::Status::Ok). * */ - [[nodiscard]] inline wis::ResultValue CreateRootSignature2(const wis::PushConstant* push_constants = nullptr, uint32_t constants_count = 0, const wis::PushDescriptor* push_descriptors = nullptr, uint32_t push_descriptors_count = 0, const wis::DescriptorSpacing* descriptor_spacing = nullptr) const noexcept + [[nodiscard]] inline wis::ResultValue CreateRootSignature(const wis::PushConstant* push_constants = nullptr, uint32_t push_constant_count = 0, const wis::PushDescriptor* push_descriptors = nullptr, uint32_t push_descriptor_count = 0, const wis::DescriptorBindingDesc* bindings = nullptr, uint32_t binding_count = 0) const noexcept { - return wis::ResultValue{ &wis::ImplDX12Device::CreateRootSignature2, this, push_constants, constants_count, push_descriptors, push_descriptors_count, descriptor_spacing }; + return wis::ResultValue{ &wis::ImplDX12Device::CreateRootSignature, this, push_constants, push_constant_count, push_descriptors, push_descriptor_count, bindings, binding_count }; } /** * @brief Creates a shader object. @@ -395,22 +393,26 @@ class DX12Device : public wis::ImplDX12Device /** * @brief Creates a descriptor storage object with specified number of bindings to allocate. * Switching between several DescriptorStorage is slow, consider allocating one big set and copy descriptors to it. - * @param desc The description of the descriptor storage to create. + * @param bindings The bindings to allocate. Space and space overlap counts are ignored. + * @param bindings_count The number of bindings to allocate. + * @param memory The memory to allocate the descriptors in. * @return wis::DX12DescriptorStorage on success (wis::Status::Ok). * */ - [[nodiscard]] inline wis::DX12DescriptorStorage CreateDescriptorStorage(wis::Result& result, const wis::DescriptorStorageDesc& desc) const noexcept + [[nodiscard]] inline wis::DX12DescriptorStorage CreateDescriptorStorage(wis::Result& result, const wis::DescriptorBindingDesc* bindings, uint32_t bindings_count, wis::DescriptorMemory memory = wis::DescriptorMemory::ShaderVisible) const noexcept { - return wis::ImplDX12Device::CreateDescriptorStorage(result, desc); + return wis::ImplDX12Device::CreateDescriptorStorage(result, bindings, bindings_count, memory); } /** * @brief Creates a descriptor storage object with specified number of bindings to allocate. * Switching between several DescriptorStorage is slow, consider allocating one big set and copy descriptors to it. - * @param desc The description of the descriptor storage to create. + * @param bindings The bindings to allocate. Space and space overlap counts are ignored. + * @param bindings_count The number of bindings to allocate. + * @param memory The memory to allocate the descriptors in. * @return wis::DX12DescriptorStorage on success (wis::Status::Ok). * */ - [[nodiscard]] inline wis::ResultValue CreateDescriptorStorage(const wis::DescriptorStorageDesc& desc) const noexcept + [[nodiscard]] inline wis::ResultValue CreateDescriptorStorage(const wis::DescriptorBindingDesc* bindings, uint32_t bindings_count, wis::DescriptorMemory memory = wis::DescriptorMemory::ShaderVisible) const noexcept { - return wis::ResultValue{ &wis::ImplDX12Device::CreateDescriptorStorage, this, desc }; + return wis::ResultValue{ &wis::ImplDX12Device::CreateDescriptorStorage, this, bindings, bindings_count, memory }; } /** * @brief Queries if the device supports the feature. diff --git a/wisdom/include/wisdom/dx12/dx12_resource.h b/wisdom/include/wisdom/dx12/dx12_resource.h index 949d2af0..de024eee 100644 --- a/wisdom/include/wisdom/dx12/dx12_resource.h +++ b/wisdom/include/wisdom/dx12/dx12_resource.h @@ -43,6 +43,12 @@ class ImplDX12Buffer : public QueryInternal { resource->Unmap(0, nullptr); } + + [[nodiscard]] + uint64_t GetGPUAddress() const noexcept + { + return resource->GetGPUVirtualAddress(); + } }; using DX12Texture = DX12Buffer; @@ -116,6 +122,8 @@ class DX12ShaderResource : public QueryInternal } }; +using DX12UnorderedAccessTexture = DX12ShaderResource; + #pragma region DX12Buffer /** * @brief Represents buffer object for storing linear data. @@ -141,6 +149,14 @@ class DX12Buffer : public wis::ImplDX12Buffer { wis::ImplDX12Buffer::Unmap(); } + /** + * @brief Returns the address of the resource in GPU memory. + * @return The address of the resource in GPU memory. + * */ + inline uint64_t GetGPUAddress() const noexcept + { + return wis::ImplDX12Buffer::GetGPUAddress(); + } }; #pragma endregion DX12Buffer diff --git a/wisdom/include/wisdom/dx12/dx12_shader.h b/wisdom/include/wisdom/dx12/dx12_shader.h index eb841e7e..706ea46a 100644 --- a/wisdom/include/wisdom/dx12/dx12_shader.h +++ b/wisdom/include/wisdom/dx12/dx12_shader.h @@ -10,7 +10,7 @@ class DX12Shader; template<> struct Internal { std::unique_ptr bytecode; - size_t size; + size_t size = 0; }; class DX12Shader : public QueryInternal diff --git a/wisdom/include/wisdom/dx12/dx12_views.h b/wisdom/include/wisdom/dx12/dx12_views.h index 892334ef..158a3b5e 100644 --- a/wisdom/include/wisdom/dx12/dx12_views.h +++ b/wisdom/include/wisdom/dx12/dx12_views.h @@ -28,9 +28,11 @@ using DX12DescriptorBufferView = std::tuple; using DX12DescriptorBufferGPUView = std::tuple; using DX12SamplerView = std::tuple; using DX12ShaderResourceView = std::tuple; +using DX12UnorderedAccessTextureView = std::tuple; using DX12MemoryView = std::tuple; using DX12PipelineView = std::tuple; using DX12DescriptorStorageView = std::tuple; +using DX12AcceleratedGeometryDesc = D3D12_RAYTRACING_GEOMETRY_DESC; } // namespace wis #endif // !WIS_DX12_VIEWS_H diff --git a/wisdom/include/wisdom/dx12/impl/dx12_allocator.cpp b/wisdom/include/wisdom/dx12/impl/dx12_allocator.cpp index d3884a80..0d20f154 100644 --- a/wisdom/include/wisdom/dx12/impl/dx12_allocator.cpp +++ b/wisdom/include/wisdom/dx12/impl/dx12_allocator.cpp @@ -19,7 +19,9 @@ wis::ImplDX12ResourceAllocator::CreateBuffer(wis::Result& result, uint64_t size, .Flags = convert_dx(mem_flags), .HeapType = convert_dx(memory), }; - + if (usage & wis::BufferUsage::AccelerationStructureBuffer) { + return DX12CreateResource2(result, all_desc, buffer_desc, D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE); + } return DX12CreateResource(result, all_desc, buffer_desc, D3D12_RESOURCE_STATE_COMMON); } wis::DX12Texture @@ -207,11 +209,31 @@ wis::ImplDX12ResourceAllocator::DX12CreateResource(wis::Result& result, const D3 return buffer; } +wis::DX12Buffer +wis::ImplDX12ResourceAllocator::DX12CreateResource2(wis::Result& result, const D3D12MA::ALLOCATION_DESC& all_desc, const D3D12_RESOURCE_DESC1& res_desc, D3D12_RESOURCE_STATES state) const noexcept +{ + wis::DX12Buffer buffer; + auto& internal = buffer.GetMutableInternal(); + auto& memory_internal = internal.memory.GetMutableInternal(); + + HRESULT hr = allocator->CreateResource3(&all_desc, &res_desc, + D3D12_BARRIER_LAYOUT_UNDEFINED, nullptr, 0, nullptr, + memory_internal.allocation.put(), __uuidof(*internal.resource), internal.resource.put_void()); + + if (!wis::succeeded(hr)) { + result = wis::make_result(hr); + } + + memory_internal.allocator = allocator; // Copy allocator to memory + + return buffer; +} + void wis::ImplDX12ResourceAllocator::DX12FillBufferDesc(uint64_t size, BufferUsage flags, D3D12_RESOURCE_DESC1& info) noexcept { - uint64_t alignment = flags & BufferUsage::ConstantBuffer ? D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT : 1; + uint64_t alignment = flags & wis::BufferUsage::ConstantBuffer ? D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT : 1; size = wis::detail::aligned_size(size, alignment); - info = CD3DX12_RESOURCE_DESC1::Buffer(size); + info = CD3DX12_RESOURCE_DESC1::Buffer(size, convert_dx(flags)); } void wis::ImplDX12ResourceAllocator::DX12FillTextureDesc(const TextureDesc& desc, D3D12_RESOURCE_DESC1& info) noexcept { diff --git a/wisdom/include/wisdom/dx12/impl/dx12_command_list.cpp b/wisdom/include/wisdom/dx12/impl/dx12_command_list.cpp index 4bf807e1..bd80cc1d 100644 --- a/wisdom/include/wisdom/dx12/impl/dx12_command_list.cpp +++ b/wisdom/include/wisdom/dx12/impl/dx12_command_list.cpp @@ -56,6 +56,44 @@ void wis::ImplDX12CommandList::CopyBufferToTexture(DX12BufferView src_buffer, DX } } +void wis::ImplDX12CommandList::CopyTexture(DX12TextureView source, DX12TextureView destination, const wis::TextureCopyRegion* regions, uint32_t region_count) const noexcept +{ + auto src_texture = std::get<0>(source); + auto dst_texture = std::get<0>(destination); + auto src_desc = src_texture->GetDesc(); + auto dst_desc = dst_texture->GetDesc(); + + wis::com_ptr device; + auto hr = src_texture->GetDevice(__uuidof(*device), device.put_void()); + for (uint32_t i = 0; i < region_count; i++) { + auto& region = regions[i]; + uint32_t num_rows = 0; + uint64_t row_size = 0; + uint64_t required_size = 0; + uint32_t src_subresource = D3D12CalcSubresource(region.src.mip, region.src.array_layer, 0u, src_desc.MipLevels, src_desc.DepthOrArraySize); + D3D12_TEXTURE_COPY_LOCATION src{ + .pResource = src_texture, + .Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + .SubresourceIndex = src_subresource + }; + uint32_t dst_subresource = D3D12CalcSubresource(region.dst.mip, region.dst.array_layer, 0u, dst_desc.MipLevels, dst_desc.DepthOrArraySize); + D3D12_TEXTURE_COPY_LOCATION dst{ + .pResource = dst_texture, + .Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + .SubresourceIndex = dst_subresource + }; + D3D12_BOX box{ + .left = region.src.offset.width, + .top = region.src.offset.height, + .front = region.src.offset.depth_or_layers, + .right = region.src.offset.width + region.src.size.width, + .bottom = region.src.offset.height + region.src.size.height, + .back = region.src.offset.depth_or_layers + region.src.size.depth_or_layers + }; + list->CopyTextureRegion(&dst, region.dst.offset.width, region.dst.offset.depth_or_layers, region.dst.offset.depth_or_layers, &src, &box); + } +} + void wis::ImplDX12CommandList::CopyTextureToBuffer(DX12TextureView src_texture, DX12BufferView dest_buffer, const wis::BufferTextureCopyRegion* regions, uint32_t region_count) const noexcept { auto texture = std::get<0>(src_texture); @@ -274,8 +312,9 @@ void wis::ImplDX12CommandList::BeginRenderPass(const wis::DX12RenderPassDesc* pa list->BeginRenderPass(pass_desc->target_count, data, (ds_selector != DSSelect::None) ? &depth_stencil : nullptr, convert_dx(pass_desc->flags)); - if (pass_desc->view_mask) + if (pass_desc->view_mask) { list->SetViewInstanceMask(pass_desc->view_mask); + } } void wis::ImplDX12CommandList::EndRenderPass() noexcept @@ -365,6 +404,14 @@ void wis::ImplDX12CommandList::SetRootSignature(wis::DX12RootSignatureView root_ push_descriptor_count = std::get<3>(root_signature); } +void wis::ImplDX12CommandList::SetComputeRootSignature(wis::DX12RootSignatureView root_signature) noexcept +{ + list->SetComputeRootSignature(std::get<0>(root_signature)); + root_stage_map = std::get<1>(root_signature); + push_constant_count = std::get<2>(root_signature); + push_descriptor_count = std::get<3>(root_signature); +} + void wis::ImplDX12CommandList::DrawIndexedInstanced(uint32_t vertex_count_per_instance, uint32_t instance_count, uint32_t start_index, @@ -382,53 +429,48 @@ void wis::ImplDX12CommandList::DrawInstanced(uint32_t vertex_count_per_instance, list->DrawInstanced(vertex_count_per_instance, instance_count, base_vertex, start_instance); } +void wis::ImplDX12CommandList::Dispatch(uint32_t group_count_x, uint32_t group_count_y, uint32_t group_count_z) noexcept +{ + list->Dispatch(group_count_x, group_count_y, group_count_z); +} + void wis::ImplDX12CommandList::SetPushConstants(const void* data, uint32_t size_4bytes, uint32_t offset_4bytes, wis::ShaderStages stage) noexcept { list->SetGraphicsRoot32BitConstants(uint32_t(root_stage_map[uint32_t(stage)]), size_4bytes, data, offset_4bytes); } +void wis::ImplDX12CommandList::SetComputePushConstants(const void* data, uint32_t size_4bytes, uint32_t offset_4bytes) noexcept +{ + list->SetComputeRoot32BitConstants(uint32_t(root_stage_map[0]), size_4bytes, data, offset_4bytes); +} + void wis::ImplDX12CommandList::SetDescriptorStorage(wis::DX12DescriptorStorageView desc_storage) noexcept { auto& storage = std::get<0>(desc_storage)->GetInternal(); - std::array heaps{}; - uint32_t increment = 0; - if (storage.heap_sampler) { - heaps[increment++] = storage.heap_sampler.get(); - } - if (storage.heap_resource) { - heaps[increment++] = storage.heap_resource.get(); + uint32_t table_count = bool(storage.heaps[0]) + bool(storage.heaps[1]); + uint32_t table_offset = !bool(storage.heaps[0]); + list->SetDescriptorHeaps(table_count, reinterpret_cast(storage.heaps + table_offset)); + + for (size_t i = 0; i < storage.heap_count; i++) { + auto& offset = storage.heap_offsets[i]; + auto handle = D3D12_GPU_DESCRIPTOR_HANDLE(storage.heap_gpu_starts[offset.sampler].ptr + offset.offset_in_bytes); + list->SetGraphicsRootDescriptorTable(i + push_constant_count + push_descriptor_count, handle); } +} - list->SetDescriptorHeaps(increment, heaps.data()); +void wis::ImplDX12CommandList::SetComputeDescriptorStorage(wis::DX12DescriptorStorageView desc_storage) noexcept +{ + auto& storage = std::get<0>(desc_storage)->GetInternal(); - if (storage.heap_sampler) { - list->SetGraphicsRootDescriptorTable(push_constant_count + push_descriptor_count, storage.heap_gpu_starts[1]); // 0 is reserved for push constants and push descriptors - } - if (storage.heap_resource) { - CD3DX12_GPU_DESCRIPTOR_HANDLE handles[+wis::BindingIndex::Count - 1]{ - // 0 is reserved for sampler heap - storage.heap_gpu_starts[0], - CD3DX12_GPU_DESCRIPTOR_HANDLE(storage.heap_gpu_starts[0], uint32_t(storage.heap_starts[2].ptr - storage.heap_starts[1].ptr)), - CD3DX12_GPU_DESCRIPTOR_HANDLE(storage.heap_gpu_starts[0], uint32_t(storage.heap_starts[3].ptr - storage.heap_starts[1].ptr)), - CD3DX12_GPU_DESCRIPTOR_HANDLE(storage.heap_gpu_starts[0], uint32_t(storage.heap_starts[4].ptr - storage.heap_starts[1].ptr)), - CD3DX12_GPU_DESCRIPTOR_HANDLE(storage.heap_gpu_starts[0], uint32_t(storage.heap_starts[5].ptr - storage.heap_starts[1].ptr)), - }; - if (storage.heap_starts[uint32_t(wis::BindingIndex::ConstantBuffer) - 1].ptr != 0) { - list->SetGraphicsRootDescriptorTable(push_constant_count + push_descriptor_count + uint32_t(wis::BindingIndex::ConstantBuffer) - 1, handles[0]); - } - if (storage.heap_starts[uint32_t(wis::BindingIndex::Texture) - 1].ptr != 0) { - list->SetGraphicsRootDescriptorTable(push_constant_count + push_descriptor_count + uint32_t(wis::BindingIndex::Texture) - 1, handles[1]); - } - if (storage.heap_starts[uint32_t(wis::BindingIndex::RWTexture) - 1].ptr != 0) { - list->SetGraphicsRootDescriptorTable(push_constant_count + push_descriptor_count + uint32_t(wis::BindingIndex::RWTexture) - 1, handles[2]); - } - if (storage.heap_starts[uint32_t(wis::BindingIndex::RWBuffer) - 1].ptr != 0) { - list->SetGraphicsRootDescriptorTable(push_constant_count + push_descriptor_count + uint32_t(wis::BindingIndex::RWBuffer) - 1, handles[3]); - } - if (storage.heap_starts[uint32_t(wis::BindingIndex::Buffer) - 1].ptr != 0) { - list->SetGraphicsRootDescriptorTable(push_constant_count + push_descriptor_count + uint32_t(wis::BindingIndex::Buffer) - 1, handles[4]); - } + uint32_t table_count = bool(storage.heaps[0]) + bool(storage.heaps[1]); + uint32_t table_offset = !bool(storage.heaps[0]); + list->SetDescriptorHeaps(table_count, reinterpret_cast(storage.heaps + table_offset)); + + for (size_t i = 0; i < storage.heap_count; i++) { + auto& offset = storage.heap_offsets[i]; + auto handle = D3D12_GPU_DESCRIPTOR_HANDLE(storage.heap_gpu_starts[offset.sampler].ptr + offset.offset_in_bytes); + list->SetComputeRootDescriptorTable(i + push_constant_count + push_descriptor_count, handle); } } @@ -450,4 +492,23 @@ void wis::ImplDX12CommandList::PushDescriptor(wis::DescriptorType type, uint32_t break; } } + +void wis::ImplDX12CommandList::PushDescriptorCompute(wis::DescriptorType type, uint32_t binding, wis::DX12BufferView view, uint32_t offset) noexcept +{ + auto handle = std::get<0>(view)->GetGPUVirtualAddress(); + switch (type) { + case wis::DescriptorType::Buffer: + list->SetComputeRootShaderResourceView(push_constant_count + binding, handle + offset); + break; + case wis::DescriptorType::Texture: + case wis::DescriptorType::RWTexture: + return; + case wis::DescriptorType::ConstantBuffer: + list->SetComputeRootConstantBufferView(push_constant_count + binding, handle + offset); + break; + case wis::DescriptorType::RWBuffer: + list->SetComputeRootUnorderedAccessView(push_constant_count + binding, handle + offset); + break; + } +} #endif // WIS_DX12_COMMAND_LIST_H diff --git a/wisdom/include/wisdom/dx12/impl/dx12_device.cpp b/wisdom/include/wisdom/dx12/impl/dx12_device.cpp index 95548823..e45fc63f 100644 --- a/wisdom/include/wisdom/dx12/impl/dx12_device.cpp +++ b/wisdom/include/wisdom/dx12/impl/dx12_device.cpp @@ -149,7 +149,6 @@ wis::ImplDX12Device::CreateGraphicsPipeline(wis::Result& result, const wis::DX12 convert_dx(desc.topology_type); //--Root signature - pipeline_stream.allocate() = std::get<0>(desc.root_signature); @@ -316,6 +315,33 @@ wis::ImplDX12Device::CreateGraphicsPipeline(wis::Result& result, const wis::DX12 return out_state; } +wis::DX12PipelineState +wis::ImplDX12Device::CreateComputePipeline(wis::Result& result, const wis::DX12ComputePipelineDesc& desc) const noexcept +{ + DX12PipelineState out_state; + auto& internal = out_state.GetMutableInternal(); + + //--Shader stages + wis::detail::memory_pool pipeline_stream; + wis::detail::DX12FillShaderStage(pipeline_stream, + desc.shader); + + //--Root signature + pipeline_stream.allocate() = + std::get<0>(desc.root_signature); + + D3D12_PIPELINE_STATE_STREAM_DESC psstream_desc{ + .SizeInBytes = pipeline_stream.size_bytes(), + .pPipelineStateSubobjectStream = pipeline_stream.data(), + }; + + HRESULT hr = device->CreatePipelineState(&psstream_desc, internal.pipeline.iid(), internal.pipeline.put_void()); + if (!wis::succeeded(hr)) { + result = wis::make_result(hr); + } + return out_state; +} + wis::DX12Shader wis::ImplDX12Device::CreateShader(wis::Result& result, void* data, size_t size) const noexcept @@ -541,7 +567,7 @@ wis::ImplDX12Device::CreateSampler(wis::Result& result, const wis::SamplerDesc& } wis::DX12ShaderResource -wis::ImplDX12Device::CreateShaderResource(wis::Result& result, DX12TextureView texture, wis::ShaderResourceDesc desc) const noexcept +wis::ImplDX12Device::CreateShaderResource(wis::Result& result, DX12TextureView texture, const wis::ShaderResourceDesc& desc) const noexcept { DX12ShaderResource out_resource; auto& internal = out_resource.GetMutableInternal(); @@ -638,6 +664,77 @@ wis::ImplDX12Device::CreateShaderResource(wis::Result& result, DX12TextureView t return out_resource; } +wis::DX12UnorderedAccessTexture +wis::ImplDX12Device::CreateUnorderedAccessTexture(wis::Result& result, DX12TextureView texture, const wis::UnorderedAccessDesc& desc) const noexcept +{ + DX12ShaderResource out_resource; + auto& internal = out_resource.GetMutableInternal(); + + D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc{ + .Format = convert_dx(desc.format), + .ViewDimension = D3D12_UAV_DIMENSION(convert_dx(desc.view_type)), + }; + + switch (desc.view_type) { + case wis::TextureViewType::Texture1D: + uav_desc.Texture1D = { + .MipSlice = desc.subresource_range.base_mip_level, + }; + break; + case wis::TextureViewType::Texture1DArray: + uav_desc.Texture1DArray = { + .MipSlice = desc.subresource_range.base_mip_level, + .FirstArraySlice = desc.subresource_range.base_array_layer, + .ArraySize = desc.subresource_range.layer_count, + }; + break; + default: + case wis::TextureViewType::Texture2D: + uav_desc.Texture2D = { + .MipSlice = desc.subresource_range.base_mip_level, + .PlaneSlice = 0, + }; + break; + case wis::TextureViewType::TextureCube: + case wis::TextureViewType::TextureCubeArray: + case wis::TextureViewType::Texture2DArray: + uav_desc.Texture2DArray = { + .MipSlice = desc.subresource_range.base_mip_level, + .FirstArraySlice = desc.subresource_range.base_array_layer, + .ArraySize = desc.subresource_range.layer_count, + .PlaneSlice = 0, + }; + break; + case wis::TextureViewType::Texture2DMS: + uav_desc.Texture2DMS = {}; + break; + case wis::TextureViewType::Texture2DMSArray: + uav_desc.Texture2DMSArray = { + .FirstArraySlice = desc.subresource_range.base_array_layer, + .ArraySize = desc.subresource_range.layer_count, + }; + break; + case wis::TextureViewType::Texture3D: + uav_desc.Texture3D = { + .MipSlice = desc.subresource_range.base_mip_level, + .FirstWSlice = desc.subresource_range.base_array_layer, + .WSize = desc.subresource_range.layer_count, + }; + break; + } + + D3D12_DESCRIPTOR_HEAP_DESC heap_desc{ + .Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, + .NumDescriptors = 1, + .Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE + }; + + auto x = device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + device->CreateDescriptorHeap(&heap_desc, internal.heap.iid(), internal.heap.put_void()); + device->CreateUnorderedAccessView(std::get<0>(texture), nullptr, &uav_desc, internal.heap->GetCPUDescriptorHandleForHeapStart()); + return out_resource; +} + bool wis::ImplDX12Device::QueryFeatureSupport(wis::DeviceFeature feature) const noexcept { switch (feature) { @@ -661,65 +758,92 @@ bool wis::ImplDX12Device::QueryFeatureSupport(wis::DeviceFeature feature) const } wis::DX12DescriptorStorage -wis::ImplDX12Device::CreateDescriptorStorage(wis::Result& result, const wis::DescriptorStorageDesc& desc) const noexcept +wis::ImplDX12Device::CreateDescriptorStorage(wis::Result& result, + const wis::DescriptorBindingDesc* descriptor_bindings, + uint32_t descriptor_bindings_count, + wis::DescriptorMemory memory) const noexcept { DX12DescriptorStorage out_storage; auto& internal = out_storage.GetMutableInternal(); - uint32_t size_resources = desc.cbuffer_count + desc.sbuffer_count + desc.stexture_count + desc.texture_count + desc.rbuffer_count; - uint32_t size_samplers = desc.sampler_count; + // calculate sizes + uint32_t size_resources = 0; + uint32_t size_samplers = 0; - std::array sizes{ - desc.sampler_count, desc.cbuffer_count, desc.texture_count, desc.stexture_count, desc.sbuffer_count, desc.rbuffer_count - }; + for (uint32_t i = 0; i < descriptor_bindings_count; ++i) { + auto& desc = descriptor_bindings[i]; + if (desc.binding_type == wis::DescriptorType::Sampler) { + size_samplers += desc.binding_count; + } else { + size_resources += desc.binding_count; + } + } + // create descriptor heaps D3D12_DESCRIPTOR_HEAP_DESC resource_heap_desc{ .Type = D3D12_DESCRIPTOR_HEAP_TYPE::D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, .NumDescriptors = size_resources, - .Flags = convert_dx(desc.memory), + .Flags = convert_dx(memory), .NodeMask = 0u }; D3D12_DESCRIPTOR_HEAP_DESC sampler_heap_desc{ .Type = D3D12_DESCRIPTOR_HEAP_TYPE::D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, .NumDescriptors = size_samplers, - .Flags = convert_dx(desc.memory), + .Flags = convert_dx(memory), .NodeMask = 0u }; if (size_resources) { // create resource heap - auto hr = device->CreateDescriptorHeap(&resource_heap_desc, __uuidof(*internal.heap_resource), internal.heap_resource.put_void()); + auto hr = device->CreateDescriptorHeap(&resource_heap_desc, internal.heaps[0].iid(), internal.heaps[0].put_void()); if (!wis::succeeded(hr)) { result = wis::make_result(hr); return out_storage; } - internal.heap_resource_increment = device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - internal.heap_starts[1] = internal.heap_resource->GetCPUDescriptorHandleForHeapStart(); - internal.heap_gpu_starts[0] = internal.heap_resource->GetGPUDescriptorHandleForHeapStart(); - - for (uint32_t i = 2; i < sizes.size(); i++) { // first one is sampler - internal.heap_starts[i] = - sizes[i] - ? CD3DX12_CPU_DESCRIPTOR_HANDLE(internal.heap_starts[i - 1], sizes[i - 1], internal.heap_resource_increment) - : CD3DX12_CPU_DESCRIPTOR_HANDLE(); - } + internal.heap_cpu_starts[0] = internal.heaps[0]->GetCPUDescriptorHandleForHeapStart(); + internal.heap_gpu_starts[0] = internal.heaps[0]->GetGPUDescriptorHandleForHeapStart(); } if (size_samplers) { // create sampler heap - auto hr = device->CreateDescriptorHeap(&sampler_heap_desc, __uuidof(*internal.heap_sampler), internal.heap_sampler.put_void()); + auto hr = device->CreateDescriptorHeap(&sampler_heap_desc, internal.heaps[1].iid(), internal.heaps[1].put_void()); if (!wis::succeeded(hr)) { result = wis::make_result(hr); return out_storage; } internal.heap_sampler_increment = device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); - internal.heap_starts[0] = internal.heap_sampler->GetCPUDescriptorHandleForHeapStart(); - internal.heap_gpu_starts[1] = internal.heap_sampler->GetGPUDescriptorHandleForHeapStart(); + internal.heap_cpu_starts[1] = internal.heaps[1]->GetCPUDescriptorHandleForHeapStart(); + internal.heap_gpu_starts[1] = internal.heaps[1]->GetGPUDescriptorHandleForHeapStart(); } internal.device = device; + // allocate descriptors + using OffsetIndicator = Internal::OffsetIndicator; + internal.heap_offsets = wis::detail::make_unique_for_overwrite(descriptor_bindings_count); + internal.heap_count = descriptor_bindings_count; + + if (!internal.heap_offsets) { + result = wis::make_result(E_OUTOFMEMORY); + return out_storage; + } + + OffsetIndicator resource_start{}; + OffsetIndicator sampler_start{ .sampler = 1 }; + + for (uint32_t i = 0; i < descriptor_bindings_count; ++i) { + auto& desc = descriptor_bindings[i]; + D3D12_CPU_DESCRIPTOR_HANDLE handle; + if (desc.binding_type == wis::DescriptorType::Sampler) { + internal.heap_offsets[i] = sampler_start; + sampler_start.offset_in_bytes += desc.binding_count * internal.heap_sampler_increment; + } else { + internal.heap_offsets[i] = resource_start; + resource_start.offset_in_bytes += desc.binding_count * internal.heap_resource_increment; + } + } + return out_storage; } @@ -743,42 +867,28 @@ constexpr inline D3D12_ROOT_PARAMETER_TYPE to_dx(wis::DescriptorType type) noexc wis::DX12RootSignature wis::ImplDX12Device::CreateRootSignature(wis::Result& result, - const wis::PushConstant* constants, - uint32_t constants_size, - const PushDescriptor* push_descriptors, - uint32_t push_descriptors_size, - uint32_t space_overlap_count) const noexcept + const wis::PushConstant* push_constants, + uint32_t push_constants_count, + const wis::PushDescriptor* push_descriptors, + uint32_t push_descriptors_count, + const wis::DescriptorBindingDesc* descriptor_bindings, + uint32_t descriptor_bindings_count) const noexcept { - DX12RootSignature out_signature; - auto& internal = out_signature.GetMutableInternal(); + DX12RootSignature out_state; + auto& internal = out_state.GetMutableInternal(); - if (constants_size > wis::max_push_constants) { - result = wis::make_result(E_INVALIDARG); - return out_signature; - } - if (push_descriptors_size > wis::max_push_descriptors) { - result = wis::make_result(E_INVALIDARG); - return out_signature; - } - if (space_overlap_count > wis::max_descriptor_space_overlap) { - result = wis::make_result(E_INVALIDARG); - return out_signature; + D3D12_ROOT_PARAMETER1 root_params[64]{}; // 64 is the maximum number of root parameters + if (push_constants_count + push_descriptors_count * 2 + descriptor_bindings_count > 64) { + result = wis::make_result(E_INVALIDARG); + return out_state; } - uint32_t push_constants_count = constants_size; - uint32_t push_descriptors_count = push_descriptors_size; - uint32_t space_overlap = space_overlap_count; - constexpr static uint32_t tables_size = +wis::BindingIndex::Count; - - // max push constants + max push descriptors + max tables - D3D12_ROOT_PARAMETER1 root_params[wis::max_push_constants + wis::max_push_descriptors + tables_size]{}; - // push constants std::array stage_map{}; std::fill(stage_map.begin(), stage_map.end(), -1); for (uint32_t i = 0; i < push_constants_count; ++i) { - auto& constant = constants[i]; + auto& constant = push_constants[i]; root_params[i] = { .ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS, .Constants = { @@ -794,8 +904,8 @@ wis::ImplDX12Device::CreateRootSignature(wis::Result& result, // push descriptors for (uint32_t i = 0; i < push_descriptors_count; ++i) { auto& descriptor = push_descriptors[i]; - root_params[i + push_constants_count] = { - .ParameterType = detail::to_dx(descriptor.type), + root_params[i + push_constants_count] = D3D12_ROOT_PARAMETER1{ + .ParameterType = wis::detail::to_dx(descriptor.type), .Descriptor = { .ShaderRegister = i, .RegisterSpace = 0, // always 0 for push descriptors @@ -804,179 +914,47 @@ wis::ImplDX12Device::CreateRootSignature(wis::Result& result, }; } - // - D3D12_DESCRIPTOR_RANGE1 memory[+wis::BindingIndex::Count][wis::max_descriptor_space_overlap]{}; - constexpr static D3D12_DESCRIPTOR_RANGE_TYPE types[+wis::BindingIndex::Count]{ - D3D12_DESCRIPTOR_RANGE_TYPE::D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, // sampler - D3D12_DESCRIPTOR_RANGE_TYPE::D3D12_DESCRIPTOR_RANGE_TYPE_CBV, // cbuffer - D3D12_DESCRIPTOR_RANGE_TYPE::D3D12_DESCRIPTOR_RANGE_TYPE_SRV, // texture - D3D12_DESCRIPTOR_RANGE_TYPE::D3D12_DESCRIPTOR_RANGE_TYPE_UAV, // stexture - D3D12_DESCRIPTOR_RANGE_TYPE::D3D12_DESCRIPTOR_RANGE_TYPE_UAV, // sbuffer - D3D12_DESCRIPTOR_RANGE_TYPE::D3D12_DESCRIPTOR_RANGE_TYPE_SRV, // read only buffer - }; + // descriptor bindings + uint32_t memory_size = 0; + for (uint32_t i = 0; i < descriptor_bindings_count; ++i) { + auto& descriptor = descriptor_bindings[i]; + memory_size += std::max(descriptor.space_overlap_count, 1u); + } - uint32_t spaces = 1; // 0 is reserved for push constants - for (uint32_t i = 0; i < +wis::BindingIndex::Count; ++i) { - for (uint32_t j = 0; j < space_overlap; ++j) { - memory[i][j] = { - .RangeType = types[i], - .NumDescriptors = UINT32_MAX, - .BaseShaderRegister = 0, - .RegisterSpace = spaces++, - .Flags = D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE, // always volatile for unbounded arrays - .OffsetInDescriptorsFromTableStart = j ? 0 : D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND, - }; - } + std::unique_ptr memory = wis::detail::make_unique_for_overwrite(memory_size); + if (!memory) { + result = wis::make_result(E_OUTOFMEMORY); + return out_state; + } + auto* base_memory = memory.get(); + for (uint32_t i = 0; i < descriptor_bindings_count; ++i) { + auto& desc = descriptor_bindings[i]; + uint32_t spaces = std::max(desc.space_overlap_count, 1u); root_params[i + push_constants_count + push_descriptors_count] = { .ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE, .DescriptorTable = { - .NumDescriptorRanges = space_overlap, - .pDescriptorRanges = memory[i], + .NumDescriptorRanges = spaces, + .pDescriptorRanges = base_memory, }, .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL, }; + for (uint32_t j = 0; j < spaces; ++j) { + base_memory[0] = { + .RangeType = convert_dx(desc.binding_type), + .NumDescriptors = UINT32_MAX, + .BaseShaderRegister = 0, + .RegisterSpace = j + desc.binding_space, + .Flags = D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE, + .OffsetInDescriptorsFromTableStart = j ? 0 : D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND, + }; + ++base_memory; + } } D3D12_ROOT_SIGNATURE_FLAGS flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC desc; - desc.Init_1_1(push_constants_count + push_descriptors_count + tables_size, root_params, 0, nullptr, flags); - - wis::com_ptr signature; - wis::com_ptr error; - HRESULT hr = D3D12SerializeVersionedRootSignature(&desc, signature.put(), error.put()); - - if (!wis::succeeded(hr)) { - result = wis::make_result(hr); - return out_signature; - } - hr = device->CreateRootSignature(0, signature->GetBufferPointer(), signature->GetBufferSize(), - internal.root.iid(), internal.root.put_void()); - - if (!wis::succeeded(hr)) { - result = wis::make_result(hr); - return out_signature; - } - internal.stage_map = stage_map; - internal.push_constant_count = push_constants_count; - internal.push_descriptor_count = push_descriptors_count; - - return out_signature; -} - -namespace wis::detail { -inline constexpr uint32_t allocate_space( - D3D12_ROOT_PARAMETER1* root_params, - D3D12_DESCRIPTOR_RANGE1* memory, - uint32_t spaces, - uint32_t base_space) -{ - spaces = std::min(spaces, 1u); - for (uint32_t j = 0; j < spaces; ++j) { - memory[j] = { - .RangeType = D3D12_DESCRIPTOR_RANGE_TYPE::D3D12_DESCRIPTOR_RANGE_TYPE_SRV, - .NumDescriptors = UINT32_MAX, - .BaseShaderRegister = 0, - .RegisterSpace = base_space + j, - .Flags = D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE, - .OffsetInDescriptorsFromTableStart = j ? 0 : D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND, - }; - } - *root_params = { - .ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE, - .DescriptorTable = { - .NumDescriptorRanges = spaces, - .pDescriptorRanges = memory, - }, - .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL, - }; - return spaces; -} -} // namespace wis::detail - -wis::DX12RootSignature -wis::ImplDX12Device::CreateRootSignature2(wis::Result& result, const wis::PushConstant* push_constants, - uint32_t constants_count, - const wis::PushDescriptor* push_descriptors, - uint32_t push_descriptors_count, - const wis::DescriptorSpacing* descriptor_spacing) const noexcept -{ - DX12RootSignature out_state; - auto& internal = out_state.GetMutableInternal(); - - if (constants_count > wis::max_push_constants) { - result = wis::make_result(E_INVALIDARG); - return out_state; - } - if (push_descriptors_count > wis::max_push_descriptors) { - result = wis::make_result(E_INVALIDARG); - return out_state; - } - wis::DescriptorSpacing default_spacing{}; - if (!descriptor_spacing) { - descriptor_spacing = &default_spacing; - } - - uint32_t push_constants_count = constants_count; - constexpr static uint32_t tables_size = +wis::BindingIndex::Count; - - // max push constants + max push descriptors + max tables - D3D12_ROOT_PARAMETER1 root_params[wis::max_push_constants + wis::max_push_descriptors + tables_size]{}; - - // push constants - std::array stage_map{}; - std::fill(stage_map.begin(), stage_map.end(), -1); - - for (uint32_t i = 0; i < push_constants_count; ++i) { - auto& constant = push_constants[i]; - root_params[i] = { - .ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS, - .Constants = { - .ShaderRegister = constant.bind_register, - .RegisterSpace = 0, - .Num32BitValues = constant.size_bytes / 4, - }, - .ShaderVisibility = D3D12_SHADER_VISIBILITY(constant.stage), - }; - stage_map[+constant.stage] = i; - } - - // push descriptors - for (uint32_t i = 0; i < push_descriptors_count; ++i) { - auto& descriptor = push_descriptors[i]; - root_params[i + push_constants_count] = { - .ParameterType = detail::to_dx(descriptor.type), - .Descriptor = { - .ShaderRegister = i, - .RegisterSpace = 0, // always 0 for push descriptors - }, - .ShaderVisibility = convert_dx(descriptor.stage), - }; - } - - // - D3D12_DESCRIPTOR_RANGE1 memory[+wis::BindingIndex::Count][wis::max_descriptor_space_overlap]{}; - constexpr static D3D12_DESCRIPTOR_RANGE_TYPE types[+wis::BindingIndex::Count]{ - D3D12_DESCRIPTOR_RANGE_TYPE::D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, // sampler - D3D12_DESCRIPTOR_RANGE_TYPE::D3D12_DESCRIPTOR_RANGE_TYPE_CBV, // cbuffer - D3D12_DESCRIPTOR_RANGE_TYPE::D3D12_DESCRIPTOR_RANGE_TYPE_SRV, // texture - D3D12_DESCRIPTOR_RANGE_TYPE::D3D12_DESCRIPTOR_RANGE_TYPE_UAV, // stexture - D3D12_DESCRIPTOR_RANGE_TYPE::D3D12_DESCRIPTOR_RANGE_TYPE_UAV, // sbuffer - D3D12_DESCRIPTOR_RANGE_TYPE::D3D12_DESCRIPTOR_RANGE_TYPE_SRV, // read only buffer - }; - - uint32_t base_space = 1; // 0 is reserved for push constants - uint32_t allocated_spaces = 0; // 0 is reserved for push constants - allocated_spaces = detail::allocate_space(&root_params[0 + push_constants_count + push_descriptors_count], memory[0], descriptor_spacing->sampler_count, base_space); - allocated_spaces = detail::allocate_space(&root_params[1 + push_constants_count + push_descriptors_count], memory[1], descriptor_spacing->cbuffer_count, base_space += allocated_spaces); - allocated_spaces = detail::allocate_space(&root_params[2 + push_constants_count + push_descriptors_count], memory[2], descriptor_spacing->texture_count, base_space += allocated_spaces); - allocated_spaces = detail::allocate_space(&root_params[3 + push_constants_count + push_descriptors_count], memory[3], descriptor_spacing->stexture_count, base_space += allocated_spaces); - allocated_spaces = detail::allocate_space(&root_params[4 + push_constants_count + push_descriptors_count], memory[4], descriptor_spacing->sbuffer_count, base_space += allocated_spaces); - allocated_spaces = detail::allocate_space(&root_params[5 + push_constants_count + push_descriptors_count], memory[5], descriptor_spacing->rbuffer_count, base_space += allocated_spaces); - - D3D12_ROOT_SIGNATURE_FLAGS flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; - CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC desc; - desc.Init_1_1(push_constants_count + push_descriptors_count + tables_size, root_params, 0, nullptr, flags); + desc.Init_1_1(push_constants_count + push_descriptors_count + descriptor_bindings_count, root_params, 0, nullptr, flags); wis::com_ptr signature; wis::com_ptr error; @@ -998,5 +976,4 @@ wis::ImplDX12Device::CreateRootSignature2(wis::Result& result, const wis::PushCo internal.push_descriptor_count = push_descriptors_count; return out_state; } - #endif // !DX12_DEVICE_CPP diff --git a/wisdom/include/wisdom/generated/api/api.hpp b/wisdom/include/wisdom/generated/api/api.hpp index 77f8d5dd..b919ae4d 100644 --- a/wisdom/include/wisdom/generated/api/api.hpp +++ b/wisdom/include/wisdom/generated/api/api.hpp @@ -47,10 +47,18 @@ struct DescriptorTable; struct SamplerDesc; struct ComponentMapping; struct ShaderResourceDesc; +struct UnorderedAccessDesc; struct FactoryExtQuery; struct DeviceExtQuery; -struct DescriptorStorageDesc; -struct DescriptorSpacing; +struct TopLevelASBuildDesc; +struct AcceleratedGeometryInput; +struct ASAllocationInfo; +struct DescriptorBindingDesc; +struct ShaderExport; +struct HitGroupDesc; +struct ShaderBindingTableInfo; +struct RaytracingDispatchDesc; +struct TextureCopyRegion; /** * @brief Shader stages that can be used in the pipeline. @@ -115,12 +123,14 @@ enum class Status : int32_t { }; /** - * @brief Determines the behavior when wait for multiple fences is issued. + * @brief Type of the queue to create. * * */ -enum class MutiWaitFlags : uint32_t { - All = 0, ///< All the fences in the batch are triggered. - Any = 1, ///< At least one of the fences from the batch is triggered. +enum class QueueType : uint32_t { + Graphics = 0, ///< Queue is used for graphics operations. + Compute = 2, ///< Queue is used for compute operations. + Copy = 3, ///< Queue is used for copy operations. + VideoDecode = 4, ///< Queue is used for video decoding operations. }; /** @@ -154,17 +164,27 @@ enum class DescriptorType : uint32_t { * May be bigger than constant buffers, but slower. * */ Buffer = 5, + AccelerationStructure = 6, ///< Descriptor is an acceleration structure. }; /** - * @brief Type of the queue to create. + * @brief Determines the behavior when wait for multiple fences is issued. * * */ -enum class QueueType : uint32_t { - Graphics = 0, ///< Queue is used for graphics operations. - Compute = 2, ///< Queue is used for compute operations. - Copy = 3, ///< Queue is used for copy operations. - VideoDecode = 4, ///< Queue is used for video decoding operations. +enum class MutiWaitFlags : uint32_t { + All = 0, ///< All the fences in the batch are triggered. + Any = 1, ///< At least one of the fences from the batch is triggered. +}; + +/** + * @brief Type of the geometry in the Acceleration Structure. + * + * Translates to VkGeometryTypeKHR for vk implementation. + * Translates to D3D12_RAYTRACING_GEOMETRY_TYPE for dx implementation. + * */ +enum class ASGeometryType : uint32_t { + Triangles = 0, ///< Triangles geometry type. Used for triangle meshes. + AABBs = 1, ///< Axis Aligned Bounding Boxes geometry type. Used for bounding volume hierarchies. }; /** @@ -187,6 +207,20 @@ enum class AdapterPreference { Performance = 2, }; +/** + * @brief Shader stages that can be used in the raytracing pipeline. + * + * Translates to VkShaderStageFlagBits for vk implementation. + * */ +enum class RaytracingShaderType : uint32_t { + Raygen = 0, ///< Ray generation shader stage. + Miss = 1, ///< Miss shader stage. + ClosestHit = 2, ///< Closest hit shader stage. + AnyHit = 3, ///< Any hit shader stage. + Intersection = 4, ///< Intersection shader stage. + Callable = 5, ///< Callable shader stage. +}; + /** * @brief Log message severity. * Used with wis::DebugCallback and internal library logging. @@ -213,6 +247,26 @@ enum class Severity { Critical = 5, }; +/** + * @brief Level of the Raytracing Acceleration Structure. Used to create Acceleration structures. + * + * */ +enum class ASLevel : uint32_t { + Bottom = 0, ///< Bottom level Acceleration Structure. Contains geometry data. + Top = 1, ///< Top level Acceleration Structure. Contains instance data. +}; + +/** + * @brief Type of the hit group in the raytracing pipeline. + * + * Translates to VkRayTracingShaderGroupTypeKHR for vk implementation. + * Translates to D3D12_HIT_GROUP_TYPE for dx implementation. + * */ +enum class HitGroupType : uint32_t { + Triangles = 0, ///< Hit group for triangles. + Procedural = 1, ///< Hit group for procedural geometry. +}; + /** * @brief Input classification for vertex buffer data. * @@ -913,11 +967,12 @@ enum class LogicOp : uint32_t { * Translates to VkMemoryPropertyFlags for vk implementation. * */ enum class MemoryType : uint32_t { + Default = 0, ///< Default memory type. Alias for wis::MemoryType::DeviceLocal /** * @brief Default memory type. * Local device memory, most efficient for rendering. * */ - Default = 0, + DeviceLocal = 0, /** * @brief Upload memory type. * Used for data that is uploaded to the GPU Local memory using copy operations. @@ -996,32 +1051,6 @@ enum class TextureLayout : uint32_t { Texture3D = 8, ///< Texture is 3D volume. }; -/** - * @brief Binding index for resources. - * Used in wis::DescriptorStorage to determine which descriptor type goes where when binding. - * Same values are used for HLSL side to pick descriptors up. - * Space 0 and set 0 are reserved for push descriptors and push constants. - * - * */ -enum class BindingIndex : uint32_t { - /** - * @brief No binding index set.Results in [[vk::binding(*,0)]] and register(*). - * This space is reserved for push constants and push descriptors. - * */ - None = 0, - Sampler = 1, ///< Binding index for sampler descriptors. Results in [[vk::binding(0,1)]] and register(s0, space1). - ConstantBuffer = 2, ///< Binding index for constant buffer descriptors. Results in [[vk::binding(0,2)]] and register(b0, space2). - Texture = 3, ///< Binding index for texture descriptors. Results in [[vk::binding(0,3)]] and register(t0, space3). - RWTexture = 4, ///< Binding index for read-write texture descriptors. Results in [[vk::binding(0,4)]] and register(u0, space4). - RWBuffer = 5, ///< Binding index for read-write buffer descriptors. Results in [[vk::binding(0,5)]] and register(u0, space5). - /** - * @brief Binding index for read buffer descriptors. Results in [[vk::binding(0,6)]] and register(t0, space6). - * Can't be merged with Texture because of Vulkan. - * */ - Buffer = 6, - Count = 6, ///< Number of binding indices. Used for array sizes. -}; - /** * @brief Descriptor heap type. * @@ -1250,14 +1279,21 @@ enum class ColorComponents { * @brief Buffer usage flags. * Determine how the buffer can be used throughout its lifetime. * + * Translates to VkBufferUsageFlags for vk implementation. + * Translates to D3D12_RESOURCE_FLAGS for dx implementation. * */ enum class BufferUsage { None = 0x0, ///< No flags set. Buffer is not used. CopySrc = 1 << 0, ///< Buffer is used as a source for copy operations. CopyDst = 1 << 1, ///< Buffer is used as a destination for copy operations. - ConstantBuffer = 1 << 4, ///< Buffer is used as a constant buffer. - IndexBuffer = 1 << 6, ///< Buffer is used as an index buffer. - VertexBuffer = 1 << 7, ///< Buffer is used as a vertex buffer or an instance buffer. + ConstantBuffer = 1 << 2, ///< Buffer is used as a constant buffer. + IndexBuffer = 1 << 3, ///< Buffer is used as an index buffer. + VertexBuffer = 1 << 4, ///< Buffer is used as a vertex buffer or an instance buffer. + IndirectBuffer = 1 << 5, ///< Buffer is used as an indirect buffer. + StorageBuffer = 1 << 6, ///< Buffer is used as a storage unordered access buffer. + AccelerationStructureBuffer = 1 << 7, ///< Buffer is used as an acceleration structure buffer. + AccelerationStructureInput = 1 << 8, ///< Buffer is used as a read only acceleration instance input buffer. + ShaderBindingTable = 1 << 9, ///< Buffer is used as a shader binding table buffer. }; /** @@ -1360,8 +1396,8 @@ enum class ResourceAccess { CopyDest = 1 << 10, ///< Copy destination access. CopySource = 1 << 11, ///< Copy source access. ConditionalRendering = 1 << 12, ///< Conditional rendering access. - AccelerationStrucureRead = 1 << 13, ///< Acceleration structure read access. - AccelerationStrucureWrite = 1 << 14, ///< Acceleration structure write access. + AccelerationStructureRead = 1 << 13, ///< Acceleration structure read access. + AccelerationStructureWrite = 1 << 14, ///< Acceleration structure write access. ShadingRate = 1 << 15, ///< Shading rate access. Used in variable shading rate. VideoDecodeRead = 1 << 16, ///< Video decode read access. VideoDecodeWrite = 1 << 17, ///< Video decode write access. @@ -1413,6 +1449,47 @@ enum class PipelineFlags { DescriptorBuffer = 1 << 0, }; +/** + * @brief Geometry flags for additional geometry features + * + * Translates to D3D12_RAYTRACING_GEOMETRY_FLAGS for dx implementation. + * Translates to VkGeometryFlagsKHR for vk implementation. + * */ +enum class ASGeometryFlags { + None = 0x0, ///< No flags set. Geometry is regular. + Opaque = 1 << 0, ///< Geometry is opaque. Used for opaque geometry. + NoDuplicateAnyHitInvocation = 1 << 1, ///< Geometry has no duplicate any hit invocation. +}; + +/** + * @brief Acceleration structure flags for additional acceleration structure features + * + * Translates to D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS for dx implementation. + * Translates to VkBuildAccelerationStructureFlagsKHR for vk implementation. + * */ +enum class AccelerationStructureFlags { + None = 0x0, ///< No flags set. Acceleration structure is regular. + AllowUpdate = 1 << 0, ///< Acceleration structure is allowed to be updated. + AllowCompaction = 1 << 1, ///< Acceleration structure is allowed to be compacted. + PreferFastTrace = 1 << 2, ///< Acceleration structure is preferred to be fast traced. + PreferFastBuild = 1 << 3, ///< Acceleration structure is preferred to be fast built. + MinimizeMemory = 1 << 4, ///< Acceleration structure is minimized for memory usage. +}; + +/** + * @brief Instance flags for additional instance features + * + * Translates to D3D12_RAYTRACING_INSTANCE_FLAGS for dx implementation. + * Translates to VkGeometryInstanceFlagsKHR for vk implementation. + * */ +enum class ASInstanceFlags { + None = 0x0, ///< No flags set. Instance is regular. + TriangleCullDisable = 1 << 0, ///< Triangle cull is disabled. + TriangleFrontCounterClockwise = 1 << 1, ///< Triangle front is counter clockwise. + ForceOpaque = 1 << 2, ///< Force opaque. + ForceNoOpaque = 1 << 3, ///< Force no opaque. +}; + /** * @brief Main source of communication of operation success. * To check for success compare wis::Result::status with wis::Status::Ok. @@ -1786,6 +1863,15 @@ struct ShaderResourceDesc { wis::SubresourceRange subresource_range; ///< Subresource range of the resource. }; +/** + * @brief Unordered access description for RW Texture creation. + * */ +struct UnorderedAccessDesc { + wis::DataFormat format; ///< Resource format. + wis::TextureViewType view_type; ///< Resource view type. + wis::SubresourceRange subresource_range; ///< Subresource range of the resource. +}; + /** * @brief Struct used to query the extensions for C code. * Queried results should not be freed, their lifetime ends with the Factory they were created with. @@ -1819,30 +1905,124 @@ struct DeviceExtQuery { }; /** - * @brief Descriptor storage description for wis::DescriptorStorage creation. + * @brief Top level acceleration structure build description. * */ -struct DescriptorStorageDesc { - uint32_t sampler_count; ///< Count of sampler descriptors to allocate. - uint32_t cbuffer_count; ///< Count of constant buffer descriptors to allocate. - uint32_t sbuffer_count; ///< Count of storage buffer descriptors to allocate. - uint32_t texture_count; ///< Count of texture descriptors to allocate. - uint32_t stexture_count; ///< Count of storage texture descriptors to allocate. - uint32_t rbuffer_count; ///< Count of read only storage buffer descriptors to allocate. - wis::DescriptorMemory memory; ///< Descriptor memory to use. +struct TopLevelASBuildDesc { + wis::AccelerationStructureFlags flags; ///< Build flags. + uint32_t instance_count; ///< Instance count. + uint64_t gpu_address; ///< Address of instances. + bool indirect; ///< If true Buffer under address contains pointers to the instances, rather than instances themselves. + /** + * @brief true means the acceleration structure is being updated. + * flags must have contained wis::AccelerationStructureFlags::AllowUpdate to perfom updates. + * */ + bool update; +}; + +/** + * @brief Geometry description for bottom-level acceleration structure. Mayy contain AABBs or Triangles. + * */ +struct AcceleratedGeometryInput { + wis::ASGeometryType geometry_type; ///< Type of the geometry (Triangles/AABB). + wis::ASGeometryFlags flags; ///< Geometry flags. + uint64_t vertex_or_aabb_buffer_address; ///< Buffer address of the buffer containing vertex data or AABB data (float [6]) depending on the geometry type. + uint64_t vertex_or_aabb_buffer_stride; ///< Stride of the vertex buffer in bytes or stride of the AABB buffer in bytes. + uint64_t index_buffer_address; ///< Buffer address of the buffer containing index data. Unused for wis::ASGeometryType::AABBs. + uint64_t transform_matrix_address; ///< GPU Buffer address of the containing transform matrix (float [3][4]). Unused for wis::ASGeometryType::AABBs. + uint32_t vertex_count; ///< Vertex count. Unused for wis::ASGeometryType::AABBs. + uint32_t triangle_or_aabb_count; ///< For triangles it is equal to (index_count/3) and count for AABBs. + wis::DataFormat vertex_format; ///< Format of the vertices. Unused for wis::ASGeometryType::AABBs. + wis::IndexType index_format; ///< Format of the indices. Unused for wis::ASGeometryType::AABBs. +}; + +/** + * @brief Acceleration structure allocation info. Used to query sizes for AS build/update buffers. + * */ +struct ASAllocationInfo { + uint64_t scratch_size; ///< Size of the scratch buffer. + uint64_t result_size; ///< Size of the result buffer. + uint64_t update_size; ///< Size of the update buffer. }; /** - * @brief Describes how many types can descriptors be reinterpreted as. - * Minimal amount of spaces for each type is 1, 0 is treated as 1. - * Used for RootSignature. + * @brief Descriptor binding description for RootSignature and Descriptor Storage creation. + * Description place in array determines binding index that this lane maps to. e.g. bindings[1] means on HLSL side this results in [[vk::binding(0,1)]]. + * All the bindings in Descriptor Storage are unbounded, array of these structures determine the presence and order of the bindings. * */ -struct DescriptorSpacing { - uint32_t sampler_count; ///< Count of spaces of sampler descriptors to allocate. - uint32_t cbuffer_count; ///< Count of spaces of constant buffer descriptors to allocate. - uint32_t sbuffer_count; ///< Count of spaces of storage buffer descriptors to allocate. - uint32_t texture_count; ///< Count of spaces of texture descriptors to allocate. - uint32_t stexture_count; ///< Count of spaces of storage texture descriptors to allocate. - uint32_t rbuffer_count; ///< Count of spaces of read only storage buffer descriptors to allocate. +struct DescriptorBindingDesc { + wis::DescriptorType binding_type; ///< Binding type. Must be unique in array. + uint32_t binding_space; ///< Binding space number in HLSL. + /** + * @brief Number of consecutive spaces this binding occupies. + * e.g. for binding_space = 1 and space_overlap_count = 3, HLSL binding will be :register(x0,space1), register(x0,space2), register(x0,space3) + * This is useful for binding multiple resource types to the same register array in HLSL. + * */ + uint32_t space_overlap_count; + /** + * @brief How many bindings should be allocated. + * Affects only the count of descriptors allocated in the descriptor heap, Root Signature always receives unbounded array with max amount of 4096 registers. + * */ + uint32_t binding_count; +}; + +/** + * @brief Defines export shader functions from a library shader. + * */ +struct ShaderExport { + const char* entry_point; ///< Entry point of the shader. + wis::RaytracingShaderType shader_type; ///< Type of the shader. + uint32_t shader_array_index; ///< Index of the shader in the shader array. +}; + +/** + * @brief Hit group description for Raytracing pipeline. + * */ +struct HitGroupDesc { + /** + * @brief Type of the hit group. + * wis::HitGroupType::Triangles - hit group for triangles. Uses closest hit shader and optionally any hit shader for transparency. + * wis::HitGroupType::Procedural - hit group for procedural geometry. Uses intersection shader and optionally any hit shader for transparency. + * */ + wis::HitGroupType type; + uint32_t closest_hit_export_index = UINT32_MAX; ///< Closest hit shader from wis::ShaderExport. + uint32_t any_hit_export_index = UINT32_MAX; ///< Any hit shader. + uint32_t intersection_export_index = UINT32_MAX; ///< Intersection shader. +}; + +/** + * @brief Shader binding table description for Raytracing pipeline. + * */ +struct ShaderBindingTableInfo { + uint32_t entry_size; ///< Size/stride of the entry in bytes. + uint32_t table_start_alignment; ///< Alignment of the table start in bytes. +}; + +/** + * @brief Raytracing dispatch description for wis::CommandList. + * */ +struct RaytracingDispatchDesc { + uint64_t ray_gen_shader_table_address; ///< Address of the ray generation shader table. + uint64_t miss_shader_table_address; ///< Address of the miss shader table. + uint64_t hit_group_table_address; ///< Address of the hit group shader table. + uint64_t callable_shader_table_address; ///< Address of the callable shader table. + uint32_t ray_gen_shader_table_size; ///< Size of the ray generation shader table in bytes. + uint32_t miss_shader_table_size; ///< Size of the miss shader table in bytes. + uint32_t hit_group_table_size; ///< Size of the hit group shader table in bytes. + uint32_t callable_shader_table_size; ///< Size of the callable shader table in bytes. + uint32_t miss_shader_table_stride; ///< Stride of the miss shader table in bytes. + uint32_t hit_group_table_stride; ///< Stride of the hit group shader table in bytes. + uint32_t callable_shader_table_stride; ///< Stride of the callable shader table in bytes. + uint32_t width; ///< Width of the dispatch in number of rays. + uint32_t height; ///< Height of the dispatch in number of rays. + uint32_t depth; ///< Depth of the dispatch in number of rays. +}; + +/** + * @brief Texture to texture copy region. + * */ +struct TextureCopyRegion { + wis::TextureRegion src; ///< Source texture region. + wis::TextureRegion dst; ///< Destination texture region. }; //=================================DELEGATES================================= @@ -1892,6 +2072,15 @@ struct is_flag_enum : public std::true_type { template<> struct is_flag_enum : public std::true_type { }; +template<> +struct is_flag_enum : public std::true_type { +}; +template<> +struct is_flag_enum : public std::true_type { +}; +template<> +struct is_flag_enum : public std::true_type { +}; //============================== CONSTS ============================== static inline constexpr Result success{ diff --git a/wisdom/include/wisdom/generated/dx12/dx12_structs.hpp b/wisdom/include/wisdom/generated/dx12/dx12_structs.hpp index d2016974..786db5a7 100644 --- a/wisdom/include/wisdom/generated/dx12/dx12_structs.hpp +++ b/wisdom/include/wisdom/generated/dx12/dx12_structs.hpp @@ -5,6 +5,17 @@ #include namespace wis { +/** + * @brief Bottom level acceleration structure build description. + * */ +struct DX12BottomLevelASBuildDesc { + wis::AccelerationStructureFlags flags; ///< Build flags. + uint32_t geometry_count; ///< Geometry count. + const wis::DX12AcceleratedGeometryDesc* geometry_array; ///< Buffer of geometries. + const wis::DX12AcceleratedGeometryDesc** geometry_indirect; ///< Buffer of pointers to geometry. geometry_array must be NULL for this to be used. + bool update; ///< true If the acceleration structure is being updated. +}; + /** * @brief Variant of BufferBarrier with BufferView. * */ @@ -32,6 +43,27 @@ struct DX12GraphicsShaderStages { wis::DX12ShaderView pixel; ///< Pixel shader. }; +/** + * @brief Raytracing pipeline descriptor for pipeline creation. + * */ +struct DX12RaytracingPipeineDesc { + wis::DX12RootSignatureView root_signature; ///< Root signature. + const wis::DX12ShaderView* shaders; ///< Shader libraries. + uint32_t shader_count; ///< Shader library count. + const wis::ShaderExport* exports; ///< Shader library exports (entry points). + uint32_t export_count; ///< Shader export count. + /** + * @brief Hit group descriptions. + * Note: Raygen and miss shaders don't have their dedicated shader groups, instead groups are defined in order of appearance in . + * And groups for SBTs are exported as raygen:miss:hit. + * */ + const wis::HitGroupDesc* hit_groups; + uint32_t hit_group_count; ///< Hit group count. + uint32_t max_recursion_depth = 1; ///< Max recursion depth. Default is 1. + uint32_t max_payload_size = 0; ///< Max payload size. Default is 0. + uint32_t max_attribute_size = 0; ///< Max attribute size. Default is 0. +}; + /** * @brief Variant of PipelineStateDesc for graphics pipeline. * */ @@ -53,6 +85,14 @@ struct DX12GraphicsPipelineDesc { wis::PipelineFlags flags; ///< Pipeline flags to add options to pipeline creation. }; +/** + * @brief Variant of PipelineStateDesc for compute pipeline. + * */ +struct DX12ComputePipelineDesc { + wis::DX12RootSignatureView root_signature; ///< Root signature. + wis::DX12ShaderView shader; ///< Compute shader. +}; + /** * @brief Variant of RenderPassDesc for render target. * */ @@ -111,6 +151,17 @@ inline constexpr D3D12_SHADER_VISIBILITY convert_dx(ShaderStages value) noexcept { return static_cast(value); } +inline constexpr D3D12_HIT_GROUP_TYPE convert_dx(HitGroupType value) noexcept +{ + switch (value) { + default: + return {}; + case HitGroupType::Triangles: + return D3D12_HIT_GROUP_TYPE_TRIANGLES; + case HitGroupType::Procedural: + return D3D12_HIT_GROUP_TYPE_PROCEDURAL_PRIMITIVE; + } +} inline constexpr D3D12_DESCRIPTOR_RANGE_TYPE convert_dx(DescriptorType value) noexcept { switch (value) { @@ -128,6 +179,8 @@ inline constexpr D3D12_DESCRIPTOR_RANGE_TYPE convert_dx(DescriptorType value) no return D3D12_DESCRIPTOR_RANGE_TYPE_UAV; case DescriptorType::Buffer: return D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + case DescriptorType::AccelerationStructure: + return D3D12_DESCRIPTOR_RANGE_TYPE_SRV; } } inline constexpr DXGI_FORMAT convert_dx(DataFormat value) noexcept @@ -175,7 +228,7 @@ inline constexpr D3D12_HEAP_TYPE convert_dx(MemoryType value) noexcept switch (value) { default: return {}; - case MemoryType::Default: + case MemoryType::DeviceLocal: return D3D12_HEAP_TYPE_DEFAULT; case MemoryType::Upload: return D3D12_HEAP_TYPE_UPLOAD; @@ -364,6 +417,28 @@ inline constexpr DXGI_FORMAT convert_dx(IndexType value) noexcept return DXGI_FORMAT_R32_UINT; } } +inline constexpr D3D12_RAYTRACING_GEOMETRY_TYPE convert_dx(ASGeometryType value) noexcept +{ + switch (value) { + default: + return {}; + case ASGeometryType::Triangles: + return D3D12_RAYTRACING_GEOMETRY_TYPE_TRIANGLES; + case ASGeometryType::AABBs: + return D3D12_RAYTRACING_GEOMETRY_TYPE_PROCEDURAL_PRIMITIVE_AABBS; + } +} +inline constexpr D3D12_RESOURCE_FLAGS convert_dx(BufferUsage value) noexcept +{ + D3D12_RESOURCE_FLAGS output = {}; + if (value & BufferUsage::StorageBuffer) { + output |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + } + if (value & BufferUsage::AccelerationStructureBuffer) { + output |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS | D3D12_RESOURCE_FLAG_RAYTRACING_ACCELERATION_STRUCTURE; + } + return output; +} inline constexpr D3D12MA::ALLOCATION_FLAGS convert_dx(MemoryFlags value) noexcept { D3D12MA::ALLOCATION_FLAGS output = {}; @@ -479,10 +554,10 @@ inline constexpr D3D12_BARRIER_ACCESS convert_dx(ResourceAccess value) noexcept if (value & ResourceAccess::ConditionalRendering) { output |= D3D12_BARRIER_ACCESS_PREDICATION; } - if (value & ResourceAccess::AccelerationStrucureRead) { + if (value & ResourceAccess::AccelerationStructureRead) { output |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ; } - if (value & ResourceAccess::AccelerationStrucureWrite) { + if (value & ResourceAccess::AccelerationStructureWrite) { output |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_WRITE; } if (value & ResourceAccess::ShadingRate) { @@ -555,4 +630,52 @@ inline constexpr D3D12_PIPELINE_STATE_FLAGS convert_dx(PipelineFlags value) noex D3D12_PIPELINE_STATE_FLAGS output = {}; return output; } +inline constexpr D3D12_RAYTRACING_GEOMETRY_FLAGS convert_dx(ASGeometryFlags value) noexcept +{ + D3D12_RAYTRACING_GEOMETRY_FLAGS output = {}; + if (value & ASGeometryFlags::Opaque) { + output |= D3D12_RAYTRACING_GEOMETRY_FLAG_OPAQUE; + } + if (value & ASGeometryFlags::NoDuplicateAnyHitInvocation) { + output |= D3D12_RAYTRACING_GEOMETRY_FLAG_NO_DUPLICATE_ANYHIT_INVOCATION; + } + return output; +} +inline constexpr D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS convert_dx(AccelerationStructureFlags value) noexcept +{ + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS output = {}; + if (value & AccelerationStructureFlags::AllowUpdate) { + output |= D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_UPDATE; + } + if (value & AccelerationStructureFlags::AllowCompaction) { + output |= D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_COMPACTION; + } + if (value & AccelerationStructureFlags::PreferFastTrace) { + output |= D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_TRACE; + } + if (value & AccelerationStructureFlags::PreferFastBuild) { + output |= D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_BUILD; + } + if (value & AccelerationStructureFlags::MinimizeMemory) { + output |= D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_MINIMIZE_MEMORY; + } + return output; +} +inline constexpr D3D12_RAYTRACING_INSTANCE_FLAGS convert_dx(ASInstanceFlags value) noexcept +{ + D3D12_RAYTRACING_INSTANCE_FLAGS output = {}; + if (value & ASInstanceFlags::TriangleCullDisable) { + output |= D3D12_RAYTRACING_INSTANCE_FLAG_TRIANGLE_CULL_DISABLE; + } + if (value & ASInstanceFlags::TriangleFrontCounterClockwise) { + output |= D3D12_RAYTRACING_INSTANCE_FLAG_TRIANGLE_FRONT_COUNTERCLOCKWISE; + } + if (value & ASInstanceFlags::ForceOpaque) { + output |= D3D12_RAYTRACING_INSTANCE_FLAG_FORCE_OPAQUE; + } + if (value & ASInstanceFlags::ForceNoOpaque) { + output |= D3D12_RAYTRACING_INSTANCE_FLAG_FORCE_NON_OPAQUE; + } + return output; +} } // namespace wis diff --git a/wisdom/include/wisdom/generated/vulkan/vk_functions.hpp b/wisdom/include/wisdom/generated/vulkan/vk_functions.hpp index 72820219..6528365d 100644 --- a/wisdom/include/wisdom/generated/vulkan/vk_functions.hpp +++ b/wisdom/include/wisdom/generated/vulkan/vk_functions.hpp @@ -84,6 +84,11 @@ using PFN_vkGetBufferDeviceAddress = PFN_vkGetBufferDeviceAddressEXT; #endif #if !(defined(VK_VERSION_1_3)) #if defined(VK_KHR_copy_commands2) +using PFN_vkCmdCopyImage2 = PFN_vkCmdCopyImage2KHR; +#endif +#endif +#if !(defined(VK_VERSION_1_3)) +#if defined(VK_KHR_copy_commands2) using PFN_vkCmdCopyBufferToImage2 = PFN_vkCmdCopyBufferToImage2KHR; #endif #endif @@ -129,20 +134,25 @@ struct VKMainGlobal { bool Init(LibTokenView token) noexcept { vkGetInstanceProcAddr = token.GetProcAddress("vkGetInstanceProcAddr"); - if (vkGetInstanceProcAddr == nullptr) + if (vkGetInstanceProcAddr == nullptr) { return false; + } vkGetDeviceProcAddr = token.GetProcAddress("vkGetDeviceProcAddr"); - if (vkGetDeviceProcAddr == nullptr) + if (vkGetDeviceProcAddr == nullptr) { return false; + } vkEnumerateInstanceLayerProperties = token.GetProcAddress("vkEnumerateInstanceLayerProperties"); - if (vkEnumerateInstanceLayerProperties == nullptr) + if (vkEnumerateInstanceLayerProperties == nullptr) { return false; + } vkEnumerateInstanceExtensionProperties = token.GetProcAddress("vkEnumerateInstanceExtensionProperties"); - if (vkEnumerateInstanceExtensionProperties == nullptr) + if (vkEnumerateInstanceExtensionProperties == nullptr) { return false; + } vkCreateInstance = token.GetProcAddress("vkCreateInstance"); - if (vkCreateInstance == nullptr) + if (vkCreateInstance == nullptr) { return false; + } vkEnumerateInstanceVersion = token.GetProcAddress("vkEnumerateInstanceVersion"); return true; } @@ -178,26 +188,33 @@ struct VKMainInstance { bool Init(VkInstance instance, PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr) noexcept { vkGetPhysicalDeviceMemoryProperties = (PFN_vkGetPhysicalDeviceMemoryProperties)vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceMemoryProperties"); - if (vkGetPhysicalDeviceMemoryProperties == nullptr) + if (vkGetPhysicalDeviceMemoryProperties == nullptr) { return false; + } vkDestroyInstance = (PFN_vkDestroyInstance)vkGetInstanceProcAddr(instance, "vkDestroyInstance"); - if (vkDestroyInstance == nullptr) + if (vkDestroyInstance == nullptr) { return false; + } vkCreateDevice = (PFN_vkCreateDevice)vkGetInstanceProcAddr(instance, "vkCreateDevice"); - if (vkCreateDevice == nullptr) + if (vkCreateDevice == nullptr) { return false; + } vkGetPhysicalDeviceProperties = (PFN_vkGetPhysicalDeviceProperties)vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceProperties"); - if (vkGetPhysicalDeviceProperties == nullptr) + if (vkGetPhysicalDeviceProperties == nullptr) { return false; + } vkEnumeratePhysicalDevices = (PFN_vkEnumeratePhysicalDevices)vkGetInstanceProcAddr(instance, "vkEnumeratePhysicalDevices"); - if (vkEnumeratePhysicalDevices == nullptr) + if (vkEnumeratePhysicalDevices == nullptr) { return false; + } vkGetPhysicalDeviceQueueFamilyProperties = (PFN_vkGetPhysicalDeviceQueueFamilyProperties)vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceQueueFamilyProperties"); - if (vkGetPhysicalDeviceQueueFamilyProperties == nullptr) + if (vkGetPhysicalDeviceQueueFamilyProperties == nullptr) { return false; + } vkEnumerateDeviceExtensionProperties = (PFN_vkEnumerateDeviceExtensionProperties)vkGetInstanceProcAddr(instance, "vkEnumerateDeviceExtensionProperties"); - if (vkEnumerateDeviceExtensionProperties == nullptr) + if (vkEnumerateDeviceExtensionProperties == nullptr) { return false; + } static constexpr std::array vkGetPhysicalDeviceFeatures2_strings{ #if defined(VK_VERSION_1_1) "vkGetPhysicalDeviceFeatures2", @@ -206,11 +223,14 @@ struct VKMainInstance { "vkGetPhysicalDeviceFeatures2KHR", #endif }; - for (auto vkGetPhysicalDeviceFeatures2_it : vkGetPhysicalDeviceFeatures2_strings) - if ((vkGetPhysicalDeviceFeatures2 = (PFN_vkGetPhysicalDeviceFeatures2)vkGetInstanceProcAddr(instance, vkGetPhysicalDeviceFeatures2_it))) + for (auto vkGetPhysicalDeviceFeatures2_it : vkGetPhysicalDeviceFeatures2_strings) { + if ((vkGetPhysicalDeviceFeatures2 = (PFN_vkGetPhysicalDeviceFeatures2)vkGetInstanceProcAddr(instance, vkGetPhysicalDeviceFeatures2_it))) { break; - if (vkGetPhysicalDeviceFeatures2 == nullptr) + } + } + if (vkGetPhysicalDeviceFeatures2 == nullptr) { return false; + } static constexpr std::array vkGetPhysicalDeviceProperties2_strings{ #if defined(VK_VERSION_1_1) "vkGetPhysicalDeviceProperties2", @@ -219,11 +239,14 @@ struct VKMainInstance { "vkGetPhysicalDeviceProperties2KHR", #endif }; - for (auto vkGetPhysicalDeviceProperties2_it : vkGetPhysicalDeviceProperties2_strings) - if ((vkGetPhysicalDeviceProperties2 = (PFN_vkGetPhysicalDeviceProperties2)vkGetInstanceProcAddr(instance, vkGetPhysicalDeviceProperties2_it))) + for (auto vkGetPhysicalDeviceProperties2_it : vkGetPhysicalDeviceProperties2_strings) { + if ((vkGetPhysicalDeviceProperties2 = (PFN_vkGetPhysicalDeviceProperties2)vkGetInstanceProcAddr(instance, vkGetPhysicalDeviceProperties2_it))) { break; - if (vkGetPhysicalDeviceProperties2 == nullptr) + } + } + if (vkGetPhysicalDeviceProperties2 == nullptr) { return false; + } static constexpr std::array vkGetPhysicalDeviceMemoryProperties2_strings{ #if defined(VK_VERSION_1_1) "vkGetPhysicalDeviceMemoryProperties2", @@ -232,30 +255,38 @@ struct VKMainInstance { "vkGetPhysicalDeviceMemoryProperties2KHR", #endif }; - for (auto vkGetPhysicalDeviceMemoryProperties2_it : vkGetPhysicalDeviceMemoryProperties2_strings) - if ((vkGetPhysicalDeviceMemoryProperties2 = (PFN_vkGetPhysicalDeviceMemoryProperties2)vkGetInstanceProcAddr(instance, vkGetPhysicalDeviceMemoryProperties2_it))) + for (auto vkGetPhysicalDeviceMemoryProperties2_it : vkGetPhysicalDeviceMemoryProperties2_strings) { + if ((vkGetPhysicalDeviceMemoryProperties2 = (PFN_vkGetPhysicalDeviceMemoryProperties2)vkGetInstanceProcAddr(instance, vkGetPhysicalDeviceMemoryProperties2_it))) { break; + } + } vkGetPhysicalDeviceSurfaceCapabilities2KHR = (PFN_vkGetPhysicalDeviceSurfaceCapabilities2KHR)vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceSurfaceCapabilities2KHR"); vkDestroySurfaceKHR = (PFN_vkDestroySurfaceKHR)vkGetInstanceProcAddr(instance, "vkDestroySurfaceKHR"); - if (vkDestroySurfaceKHR == nullptr) + if (vkDestroySurfaceKHR == nullptr) { return false; + } vkGetPhysicalDeviceSurfaceSupportKHR = (PFN_vkGetPhysicalDeviceSurfaceSupportKHR)vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceSurfaceSupportKHR"); - if (vkGetPhysicalDeviceSurfaceSupportKHR == nullptr) + if (vkGetPhysicalDeviceSurfaceSupportKHR == nullptr) { return false; + } vkGetPhysicalDeviceSurfaceCapabilitiesKHR = (PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR)vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceSurfaceCapabilitiesKHR"); - if (vkGetPhysicalDeviceSurfaceCapabilitiesKHR == nullptr) + if (vkGetPhysicalDeviceSurfaceCapabilitiesKHR == nullptr) { return false; + } vkGetPhysicalDeviceSurfaceFormatsKHR = (PFN_vkGetPhysicalDeviceSurfaceFormatsKHR)vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceSurfaceFormatsKHR"); - if (vkGetPhysicalDeviceSurfaceFormatsKHR == nullptr) + if (vkGetPhysicalDeviceSurfaceFormatsKHR == nullptr) { return false; + } vkGetPhysicalDeviceSurfacePresentModesKHR = (PFN_vkGetPhysicalDeviceSurfacePresentModesKHR)vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceSurfacePresentModesKHR"); - if (vkGetPhysicalDeviceSurfacePresentModesKHR == nullptr) + if (vkGetPhysicalDeviceSurfacePresentModesKHR == nullptr) { return false; + } return true; } }; struct VKMainDevice { + PFN_vkCmdCopyImage2 vkCmdCopyImage2; PFN_vkCmdCopyBufferToImage2 vkCmdCopyBufferToImage2; PFN_vkCmdCopyImageToBuffer2 vkCmdCopyImageToBuffer2; PFN_vkUnmapMemory vkUnmapMemory; @@ -297,6 +328,7 @@ struct VKMainDevice { PFN_vkCreateShaderModule vkCreateShaderModule; PFN_vkDestroyShaderModule vkDestroyShaderModule; PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer; + PFN_vkCreateComputePipelines vkCreateComputePipelines; PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines; PFN_vkResetCommandBuffer vkResetCommandBuffer; PFN_vkDestroyPipelineLayout vkDestroyPipelineLayout; @@ -311,6 +343,7 @@ struct VKMainDevice { PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets; PFN_vkCmdDraw vkCmdDraw; PFN_vkCmdDrawIndexed vkCmdDrawIndexed; + PFN_vkCmdDispatch vkCmdDispatch; PFN_vkCmdCopyBuffer vkCmdCopyBuffer; PFN_vkGetDeviceBufferMemoryRequirements vkGetDeviceBufferMemoryRequirements; PFN_vkGetDeviceImageMemoryRequirements vkGetDeviceImageMemoryRequirements; @@ -343,9 +376,9 @@ struct VKMainDevice { PFN_vkWaitSemaphores vkWaitSemaphores; PFN_vkSignalSemaphore vkSignalSemaphore; PFN_vkGetSemaphoreCounterValue vkGetSemaphoreCounterValue; + PFN_vkGetBufferDeviceAddress vkGetBufferDeviceAddress; PFN_vkCmdPipelineBarrier2 vkCmdPipelineBarrier2; PFN_vkQueueSubmit2 vkQueueSubmit2; - PFN_vkGetBufferDeviceAddress vkGetBufferDeviceAddress; PFN_vkCmdBeginRendering vkCmdBeginRendering; PFN_vkCmdEndRendering vkCmdEndRendering; PFN_vkCmdSetPrimitiveTopology vkCmdSetPrimitiveTopology; @@ -369,6 +402,22 @@ struct VKMainDevice { public: bool Init(VkDevice device, PFN_vkGetDeviceProcAddr vkGetDeviceProcAddr) noexcept { + static constexpr std::array vkCmdCopyImage2_strings{ +#if defined(VK_VERSION_1_3) + "vkCmdCopyImage2", +#endif +#if defined(VK_KHR_copy_commands2) + "vkCmdCopyImage2KHR", +#endif + }; + for (auto vkCmdCopyImage2_it : vkCmdCopyImage2_strings) { + if ((vkCmdCopyImage2 = (PFN_vkCmdCopyImage2)vkGetDeviceProcAddr(device, vkCmdCopyImage2_it))) { + break; + } + } + if (vkCmdCopyImage2 == nullptr) { + return false; + } static constexpr std::array vkCmdCopyBufferToImage2_strings{ #if defined(VK_VERSION_1_3) "vkCmdCopyBufferToImage2", @@ -377,11 +426,14 @@ struct VKMainDevice { "vkCmdCopyBufferToImage2KHR", #endif }; - for (auto vkCmdCopyBufferToImage2_it : vkCmdCopyBufferToImage2_strings) - if ((vkCmdCopyBufferToImage2 = (PFN_vkCmdCopyBufferToImage2)vkGetDeviceProcAddr(device, vkCmdCopyBufferToImage2_it))) + for (auto vkCmdCopyBufferToImage2_it : vkCmdCopyBufferToImage2_strings) { + if ((vkCmdCopyBufferToImage2 = (PFN_vkCmdCopyBufferToImage2)vkGetDeviceProcAddr(device, vkCmdCopyBufferToImage2_it))) { break; - if (vkCmdCopyBufferToImage2 == nullptr) + } + } + if (vkCmdCopyBufferToImage2 == nullptr) { return false; + } static constexpr std::array vkCmdCopyImageToBuffer2_strings{ #if defined(VK_VERSION_1_3) "vkCmdCopyImageToBuffer2", @@ -390,173 +442,238 @@ struct VKMainDevice { "vkCmdCopyImageToBuffer2KHR", #endif }; - for (auto vkCmdCopyImageToBuffer2_it : vkCmdCopyImageToBuffer2_strings) - if ((vkCmdCopyImageToBuffer2 = (PFN_vkCmdCopyImageToBuffer2)vkGetDeviceProcAddr(device, vkCmdCopyImageToBuffer2_it))) + for (auto vkCmdCopyImageToBuffer2_it : vkCmdCopyImageToBuffer2_strings) { + if ((vkCmdCopyImageToBuffer2 = (PFN_vkCmdCopyImageToBuffer2)vkGetDeviceProcAddr(device, vkCmdCopyImageToBuffer2_it))) { break; - if (vkCmdCopyImageToBuffer2 == nullptr) + } + } + if (vkCmdCopyImageToBuffer2 == nullptr) { return false; + } vkUnmapMemory = (PFN_vkUnmapMemory)vkGetDeviceProcAddr(device, "vkUnmapMemory"); - if (vkUnmapMemory == nullptr) + if (vkUnmapMemory == nullptr) { return false; + } vkDestroyDevice = (PFN_vkDestroyDevice)vkGetDeviceProcAddr(device, "vkDestroyDevice"); - if (vkDestroyDevice == nullptr) + if (vkDestroyDevice == nullptr) { return false; + } vkBeginCommandBuffer = (PFN_vkBeginCommandBuffer)vkGetDeviceProcAddr(device, "vkBeginCommandBuffer"); - if (vkBeginCommandBuffer == nullptr) + if (vkBeginCommandBuffer == nullptr) { return false; + } vkQueueSubmit = (PFN_vkQueueSubmit)vkGetDeviceProcAddr(device, "vkQueueSubmit"); - if (vkQueueSubmit == nullptr) + if (vkQueueSubmit == nullptr) { return false; + } vkCreateSampler = (PFN_vkCreateSampler)vkGetDeviceProcAddr(device, "vkCreateSampler"); - if (vkCreateSampler == nullptr) + if (vkCreateSampler == nullptr) { return false; + } vkFlushMappedMemoryRanges = (PFN_vkFlushMappedMemoryRanges)vkGetDeviceProcAddr(device, "vkFlushMappedMemoryRanges"); - if (vkFlushMappedMemoryRanges == nullptr) + if (vkFlushMappedMemoryRanges == nullptr) { return false; + } vkAllocateMemory = (PFN_vkAllocateMemory)vkGetDeviceProcAddr(device, "vkAllocateMemory"); - if (vkAllocateMemory == nullptr) + if (vkAllocateMemory == nullptr) { return false; + } vkDestroyPipeline = (PFN_vkDestroyPipeline)vkGetDeviceProcAddr(device, "vkDestroyPipeline"); - if (vkDestroyPipeline == nullptr) + if (vkDestroyPipeline == nullptr) { return false; + } vkQueueWaitIdle = (PFN_vkQueueWaitIdle)vkGetDeviceProcAddr(device, "vkQueueWaitIdle"); - if (vkQueueWaitIdle == nullptr) + if (vkQueueWaitIdle == nullptr) { return false; + } vkCreateImage = (PFN_vkCreateImage)vkGetDeviceProcAddr(device, "vkCreateImage"); - if (vkCreateImage == nullptr) + if (vkCreateImage == nullptr) { return false; + } vkCreateDescriptorSetLayout = (PFN_vkCreateDescriptorSetLayout)vkGetDeviceProcAddr(device, "vkCreateDescriptorSetLayout"); - if (vkCreateDescriptorSetLayout == nullptr) + if (vkCreateDescriptorSetLayout == nullptr) { return false; + } vkFreeMemory = (PFN_vkFreeMemory)vkGetDeviceProcAddr(device, "vkFreeMemory"); - if (vkFreeMemory == nullptr) + if (vkFreeMemory == nullptr) { return false; + } vkMapMemory = (PFN_vkMapMemory)vkGetDeviceProcAddr(device, "vkMapMemory"); - if (vkMapMemory == nullptr) + if (vkMapMemory == nullptr) { return false; + } vkDestroyDescriptorSetLayout = (PFN_vkDestroyDescriptorSetLayout)vkGetDeviceProcAddr(device, "vkDestroyDescriptorSetLayout"); - if (vkDestroyDescriptorSetLayout == nullptr) + if (vkDestroyDescriptorSetLayout == nullptr) { return false; + } vkInvalidateMappedMemoryRanges = (PFN_vkInvalidateMappedMemoryRanges)vkGetDeviceProcAddr(device, "vkInvalidateMappedMemoryRanges"); - if (vkInvalidateMappedMemoryRanges == nullptr) + if (vkInvalidateMappedMemoryRanges == nullptr) { return false; + } vkCmdSetScissor = (PFN_vkCmdSetScissor)vkGetDeviceProcAddr(device, "vkCmdSetScissor"); - if (vkCmdSetScissor == nullptr) + if (vkCmdSetScissor == nullptr) { return false; + } vkBindBufferMemory = (PFN_vkBindBufferMemory)vkGetDeviceProcAddr(device, "vkBindBufferMemory"); - if (vkBindBufferMemory == nullptr) + if (vkBindBufferMemory == nullptr) { return false; + } vkBindImageMemory = (PFN_vkBindImageMemory)vkGetDeviceProcAddr(device, "vkBindImageMemory"); - if (vkBindImageMemory == nullptr) + if (vkBindImageMemory == nullptr) { return false; + } vkGetBufferMemoryRequirements = (PFN_vkGetBufferMemoryRequirements)vkGetDeviceProcAddr(device, "vkGetBufferMemoryRequirements"); - if (vkGetBufferMemoryRequirements == nullptr) + if (vkGetBufferMemoryRequirements == nullptr) { return false; + } vkCmdSetViewport = (PFN_vkCmdSetViewport)vkGetDeviceProcAddr(device, "vkCmdSetViewport"); - if (vkCmdSetViewport == nullptr) + if (vkCmdSetViewport == nullptr) { return false; + } vkDestroySampler = (PFN_vkDestroySampler)vkGetDeviceProcAddr(device, "vkDestroySampler"); - if (vkDestroySampler == nullptr) + if (vkDestroySampler == nullptr) { return false; + } vkDestroyImageView = (PFN_vkDestroyImageView)vkGetDeviceProcAddr(device, "vkDestroyImageView"); - if (vkDestroyImageView == nullptr) + if (vkDestroyImageView == nullptr) { return false; + } vkGetImageMemoryRequirements = (PFN_vkGetImageMemoryRequirements)vkGetDeviceProcAddr(device, "vkGetImageMemoryRequirements"); - if (vkGetImageMemoryRequirements == nullptr) + if (vkGetImageMemoryRequirements == nullptr) { return false; + } vkCreateFence = (PFN_vkCreateFence)vkGetDeviceProcAddr(device, "vkCreateFence"); - if (vkCreateFence == nullptr) + if (vkCreateFence == nullptr) { return false; + } vkDestroyFence = (PFN_vkDestroyFence)vkGetDeviceProcAddr(device, "vkDestroyFence"); - if (vkDestroyFence == nullptr) + if (vkDestroyFence == nullptr) { return false; + } vkCreateCommandPool = (PFN_vkCreateCommandPool)vkGetDeviceProcAddr(device, "vkCreateCommandPool"); - if (vkCreateCommandPool == nullptr) + if (vkCreateCommandPool == nullptr) { return false; + } vkCmdBindPipeline = (PFN_vkCmdBindPipeline)vkGetDeviceProcAddr(device, "vkCmdBindPipeline"); - if (vkCmdBindPipeline == nullptr) + if (vkCmdBindPipeline == nullptr) { return false; + } vkResetFences = (PFN_vkResetFences)vkGetDeviceProcAddr(device, "vkResetFences"); - if (vkResetFences == nullptr) + if (vkResetFences == nullptr) { return false; + } vkWaitForFences = (PFN_vkWaitForFences)vkGetDeviceProcAddr(device, "vkWaitForFences"); - if (vkWaitForFences == nullptr) + if (vkWaitForFences == nullptr) { return false; + } vkCreateSemaphore = (PFN_vkCreateSemaphore)vkGetDeviceProcAddr(device, "vkCreateSemaphore"); - if (vkCreateSemaphore == nullptr) + if (vkCreateSemaphore == nullptr) { return false; + } vkDestroyBuffer = (PFN_vkDestroyBuffer)vkGetDeviceProcAddr(device, "vkDestroyBuffer"); - if (vkDestroyBuffer == nullptr) + if (vkDestroyBuffer == nullptr) { return false; + } vkDestroySemaphore = (PFN_vkDestroySemaphore)vkGetDeviceProcAddr(device, "vkDestroySemaphore"); - if (vkDestroySemaphore == nullptr) + if (vkDestroySemaphore == nullptr) { return false; + } vkCreateBuffer = (PFN_vkCreateBuffer)vkGetDeviceProcAddr(device, "vkCreateBuffer"); - if (vkCreateBuffer == nullptr) + if (vkCreateBuffer == nullptr) { return false; + } vkDestroyImage = (PFN_vkDestroyImage)vkGetDeviceProcAddr(device, "vkDestroyImage"); - if (vkDestroyImage == nullptr) + if (vkDestroyImage == nullptr) { return false; + } vkCreatePipelineLayout = (PFN_vkCreatePipelineLayout)vkGetDeviceProcAddr(device, "vkCreatePipelineLayout"); - if (vkCreatePipelineLayout == nullptr) + if (vkCreatePipelineLayout == nullptr) { return false; + } vkCreateImageView = (PFN_vkCreateImageView)vkGetDeviceProcAddr(device, "vkCreateImageView"); - if (vkCreateImageView == nullptr) + if (vkCreateImageView == nullptr) { return false; + } vkCreateShaderModule = (PFN_vkCreateShaderModule)vkGetDeviceProcAddr(device, "vkCreateShaderModule"); - if (vkCreateShaderModule == nullptr) + if (vkCreateShaderModule == nullptr) { return false; + } vkDestroyShaderModule = (PFN_vkDestroyShaderModule)vkGetDeviceProcAddr(device, "vkDestroyShaderModule"); - if (vkDestroyShaderModule == nullptr) + if (vkDestroyShaderModule == nullptr) { return false; + } vkCmdBindIndexBuffer = (PFN_vkCmdBindIndexBuffer)vkGetDeviceProcAddr(device, "vkCmdBindIndexBuffer"); - if (vkCmdBindIndexBuffer == nullptr) + if (vkCmdBindIndexBuffer == nullptr) { + return false; + } + vkCreateComputePipelines = (PFN_vkCreateComputePipelines)vkGetDeviceProcAddr(device, "vkCreateComputePipelines"); + if (vkCreateComputePipelines == nullptr) { return false; + } vkCreateGraphicsPipelines = (PFN_vkCreateGraphicsPipelines)vkGetDeviceProcAddr(device, "vkCreateGraphicsPipelines"); - if (vkCreateGraphicsPipelines == nullptr) + if (vkCreateGraphicsPipelines == nullptr) { return false; + } vkResetCommandBuffer = (PFN_vkResetCommandBuffer)vkGetDeviceProcAddr(device, "vkResetCommandBuffer"); - if (vkResetCommandBuffer == nullptr) + if (vkResetCommandBuffer == nullptr) { return false; + } vkDestroyPipelineLayout = (PFN_vkDestroyPipelineLayout)vkGetDeviceProcAddr(device, "vkDestroyPipelineLayout"); - if (vkDestroyPipelineLayout == nullptr) + if (vkDestroyPipelineLayout == nullptr) { return false; + } vkAllocateCommandBuffers = (PFN_vkAllocateCommandBuffers)vkGetDeviceProcAddr(device, "vkAllocateCommandBuffers"); - if (vkAllocateCommandBuffers == nullptr) + if (vkAllocateCommandBuffers == nullptr) { return false; + } vkCreateDescriptorPool = (PFN_vkCreateDescriptorPool)vkGetDeviceProcAddr(device, "vkCreateDescriptorPool"); - if (vkCreateDescriptorPool == nullptr) + if (vkCreateDescriptorPool == nullptr) { return false; + } vkCmdPushConstants = (PFN_vkCmdPushConstants)vkGetDeviceProcAddr(device, "vkCmdPushConstants"); - if (vkCmdPushConstants == nullptr) + if (vkCmdPushConstants == nullptr) { return false; + } vkDestroyDescriptorPool = (PFN_vkDestroyDescriptorPool)vkGetDeviceProcAddr(device, "vkDestroyDescriptorPool"); - if (vkDestroyDescriptorPool == nullptr) + if (vkDestroyDescriptorPool == nullptr) { return false; + } vkAllocateDescriptorSets = (PFN_vkAllocateDescriptorSets)vkGetDeviceProcAddr(device, "vkAllocateDescriptorSets"); - if (vkAllocateDescriptorSets == nullptr) + if (vkAllocateDescriptorSets == nullptr) { return false; + } vkUpdateDescriptorSets = (PFN_vkUpdateDescriptorSets)vkGetDeviceProcAddr(device, "vkUpdateDescriptorSets"); - if (vkUpdateDescriptorSets == nullptr) + if (vkUpdateDescriptorSets == nullptr) { return false; + } vkDestroyCommandPool = (PFN_vkDestroyCommandPool)vkGetDeviceProcAddr(device, "vkDestroyCommandPool"); - if (vkDestroyCommandPool == nullptr) + if (vkDestroyCommandPool == nullptr) { return false; + } vkEndCommandBuffer = (PFN_vkEndCommandBuffer)vkGetDeviceProcAddr(device, "vkEndCommandBuffer"); - if (vkEndCommandBuffer == nullptr) + if (vkEndCommandBuffer == nullptr) { return false; + } vkCmdBindDescriptorSets = (PFN_vkCmdBindDescriptorSets)vkGetDeviceProcAddr(device, "vkCmdBindDescriptorSets"); - if (vkCmdBindDescriptorSets == nullptr) + if (vkCmdBindDescriptorSets == nullptr) { return false; + } vkCmdDraw = (PFN_vkCmdDraw)vkGetDeviceProcAddr(device, "vkCmdDraw"); - if (vkCmdDraw == nullptr) + if (vkCmdDraw == nullptr) { return false; + } vkCmdDrawIndexed = (PFN_vkCmdDrawIndexed)vkGetDeviceProcAddr(device, "vkCmdDrawIndexed"); - if (vkCmdDrawIndexed == nullptr) + if (vkCmdDrawIndexed == nullptr) { return false; + } + vkCmdDispatch = (PFN_vkCmdDispatch)vkGetDeviceProcAddr(device, "vkCmdDispatch"); + if (vkCmdDispatch == nullptr) { + return false; + } vkCmdCopyBuffer = (PFN_vkCmdCopyBuffer)vkGetDeviceProcAddr(device, "vkCmdCopyBuffer"); - if (vkCmdCopyBuffer == nullptr) + if (vkCmdCopyBuffer == nullptr) { return false; + } static constexpr std::array vkGetDeviceBufferMemoryRequirements_strings{ #if defined(VK_VERSION_1_3) "vkGetDeviceBufferMemoryRequirements", @@ -565,11 +682,14 @@ struct VKMainDevice { "vkGetDeviceBufferMemoryRequirementsKHR", #endif }; - for (auto vkGetDeviceBufferMemoryRequirements_it : vkGetDeviceBufferMemoryRequirements_strings) - if ((vkGetDeviceBufferMemoryRequirements = (PFN_vkGetDeviceBufferMemoryRequirements)vkGetDeviceProcAddr(device, vkGetDeviceBufferMemoryRequirements_it))) + for (auto vkGetDeviceBufferMemoryRequirements_it : vkGetDeviceBufferMemoryRequirements_strings) { + if ((vkGetDeviceBufferMemoryRequirements = (PFN_vkGetDeviceBufferMemoryRequirements)vkGetDeviceProcAddr(device, vkGetDeviceBufferMemoryRequirements_it))) { break; - if (vkGetDeviceBufferMemoryRequirements == nullptr) + } + } + if (vkGetDeviceBufferMemoryRequirements == nullptr) { return false; + } static constexpr std::array vkGetDeviceImageMemoryRequirements_strings{ #if defined(VK_VERSION_1_3) "vkGetDeviceImageMemoryRequirements", @@ -578,14 +698,18 @@ struct VKMainDevice { "vkGetDeviceImageMemoryRequirementsKHR", #endif }; - for (auto vkGetDeviceImageMemoryRequirements_it : vkGetDeviceImageMemoryRequirements_strings) - if ((vkGetDeviceImageMemoryRequirements = (PFN_vkGetDeviceImageMemoryRequirements)vkGetDeviceProcAddr(device, vkGetDeviceImageMemoryRequirements_it))) + for (auto vkGetDeviceImageMemoryRequirements_it : vkGetDeviceImageMemoryRequirements_strings) { + if ((vkGetDeviceImageMemoryRequirements = (PFN_vkGetDeviceImageMemoryRequirements)vkGetDeviceProcAddr(device, vkGetDeviceImageMemoryRequirements_it))) { break; - if (vkGetDeviceImageMemoryRequirements == nullptr) + } + } + if (vkGetDeviceImageMemoryRequirements == nullptr) { return false; + } vkGetDeviceQueue2 = (PFN_vkGetDeviceQueue2)vkGetDeviceProcAddr(device, "vkGetDeviceQueue2"); - if (vkGetDeviceQueue2 == nullptr) + if (vkGetDeviceQueue2 == nullptr) { return false; + } vkCmdBindIndexBuffer2KHR = (PFN_vkCmdBindIndexBuffer2KHR)vkGetDeviceProcAddr(device, "vkCmdBindIndexBuffer2KHR"); static constexpr std::array vkGetImageMemoryRequirements2_strings{ #if defined(VK_VERSION_1_1) @@ -595,9 +719,11 @@ struct VKMainDevice { "vkGetImageMemoryRequirements2KHR", #endif }; - for (auto vkGetImageMemoryRequirements2_it : vkGetImageMemoryRequirements2_strings) - if ((vkGetImageMemoryRequirements2 = (PFN_vkGetImageMemoryRequirements2)vkGetDeviceProcAddr(device, vkGetImageMemoryRequirements2_it))) + for (auto vkGetImageMemoryRequirements2_it : vkGetImageMemoryRequirements2_strings) { + if ((vkGetImageMemoryRequirements2 = (PFN_vkGetImageMemoryRequirements2)vkGetDeviceProcAddr(device, vkGetImageMemoryRequirements2_it))) { break; + } + } static constexpr std::array vkGetBufferMemoryRequirements2_strings{ #if defined(VK_VERSION_1_1) "vkGetBufferMemoryRequirements2", @@ -606,9 +732,11 @@ struct VKMainDevice { "vkGetBufferMemoryRequirements2KHR", #endif }; - for (auto vkGetBufferMemoryRequirements2_it : vkGetBufferMemoryRequirements2_strings) - if ((vkGetBufferMemoryRequirements2 = (PFN_vkGetBufferMemoryRequirements2)vkGetDeviceProcAddr(device, vkGetBufferMemoryRequirements2_it))) + for (auto vkGetBufferMemoryRequirements2_it : vkGetBufferMemoryRequirements2_strings) { + if ((vkGetBufferMemoryRequirements2 = (PFN_vkGetBufferMemoryRequirements2)vkGetDeviceProcAddr(device, vkGetBufferMemoryRequirements2_it))) { break; + } + } static constexpr std::array vkBindBufferMemory2_strings{ #if defined(VK_VERSION_1_1) "vkBindBufferMemory2", @@ -617,9 +745,11 @@ struct VKMainDevice { "vkBindBufferMemory2KHR", #endif }; - for (auto vkBindBufferMemory2_it : vkBindBufferMemory2_strings) - if ((vkBindBufferMemory2 = (PFN_vkBindBufferMemory2)vkGetDeviceProcAddr(device, vkBindBufferMemory2_it))) + for (auto vkBindBufferMemory2_it : vkBindBufferMemory2_strings) { + if ((vkBindBufferMemory2 = (PFN_vkBindBufferMemory2)vkGetDeviceProcAddr(device, vkBindBufferMemory2_it))) { break; + } + } static constexpr std::array vkBindImageMemory2_strings{ #if defined(VK_VERSION_1_1) "vkBindImageMemory2", @@ -628,9 +758,11 @@ struct VKMainDevice { "vkBindImageMemory2KHR", #endif }; - for (auto vkBindImageMemory2_it : vkBindImageMemory2_strings) - if ((vkBindImageMemory2 = (PFN_vkBindImageMemory2)vkGetDeviceProcAddr(device, vkBindImageMemory2_it))) + for (auto vkBindImageMemory2_it : vkBindImageMemory2_strings) { + if ((vkBindImageMemory2 = (PFN_vkBindImageMemory2)vkGetDeviceProcAddr(device, vkBindImageMemory2_it))) { break; + } + } static constexpr std::array vkWaitSemaphores_strings{ #if defined(VK_VERSION_1_2) "vkWaitSemaphores", @@ -639,11 +771,14 @@ struct VKMainDevice { "vkWaitSemaphoresKHR", #endif }; - for (auto vkWaitSemaphores_it : vkWaitSemaphores_strings) - if ((vkWaitSemaphores = (PFN_vkWaitSemaphores)vkGetDeviceProcAddr(device, vkWaitSemaphores_it))) + for (auto vkWaitSemaphores_it : vkWaitSemaphores_strings) { + if ((vkWaitSemaphores = (PFN_vkWaitSemaphores)vkGetDeviceProcAddr(device, vkWaitSemaphores_it))) { break; - if (vkWaitSemaphores == nullptr) + } + } + if (vkWaitSemaphores == nullptr) { return false; + } static constexpr std::array vkSignalSemaphore_strings{ #if defined(VK_VERSION_1_2) "vkSignalSemaphore", @@ -652,11 +787,14 @@ struct VKMainDevice { "vkSignalSemaphoreKHR", #endif }; - for (auto vkSignalSemaphore_it : vkSignalSemaphore_strings) - if ((vkSignalSemaphore = (PFN_vkSignalSemaphore)vkGetDeviceProcAddr(device, vkSignalSemaphore_it))) + for (auto vkSignalSemaphore_it : vkSignalSemaphore_strings) { + if ((vkSignalSemaphore = (PFN_vkSignalSemaphore)vkGetDeviceProcAddr(device, vkSignalSemaphore_it))) { break; - if (vkSignalSemaphore == nullptr) + } + } + if (vkSignalSemaphore == nullptr) { return false; + } static constexpr std::array vkGetSemaphoreCounterValue_strings{ #if defined(VK_VERSION_1_2) "vkGetSemaphoreCounterValue", @@ -665,11 +803,33 @@ struct VKMainDevice { "vkGetSemaphoreCounterValueKHR", #endif }; - for (auto vkGetSemaphoreCounterValue_it : vkGetSemaphoreCounterValue_strings) - if ((vkGetSemaphoreCounterValue = (PFN_vkGetSemaphoreCounterValue)vkGetDeviceProcAddr(device, vkGetSemaphoreCounterValue_it))) + for (auto vkGetSemaphoreCounterValue_it : vkGetSemaphoreCounterValue_strings) { + if ((vkGetSemaphoreCounterValue = (PFN_vkGetSemaphoreCounterValue)vkGetDeviceProcAddr(device, vkGetSemaphoreCounterValue_it))) { break; - if (vkGetSemaphoreCounterValue == nullptr) + } + } + if (vkGetSemaphoreCounterValue == nullptr) { return false; + } + static constexpr std::array vkGetBufferDeviceAddress_strings{ +#if defined(VK_VERSION_1_2) + "vkGetBufferDeviceAddress", +#endif +#if defined(VK_KHR_buffer_device_address) + "vkGetBufferDeviceAddressKHR", +#endif +#if defined(VK_EXT_buffer_device_address) + "vkGetBufferDeviceAddressEXT", +#endif + }; + for (auto vkGetBufferDeviceAddress_it : vkGetBufferDeviceAddress_strings) { + if ((vkGetBufferDeviceAddress = (PFN_vkGetBufferDeviceAddress)vkGetDeviceProcAddr(device, vkGetBufferDeviceAddress_it))) { + break; + } + } + if (vkGetBufferDeviceAddress == nullptr) { + return false; + } static constexpr std::array vkCmdPipelineBarrier2_strings{ #if defined(VK_VERSION_1_3) "vkCmdPipelineBarrier2", @@ -678,11 +838,14 @@ struct VKMainDevice { "vkCmdPipelineBarrier2KHR", #endif }; - for (auto vkCmdPipelineBarrier2_it : vkCmdPipelineBarrier2_strings) - if ((vkCmdPipelineBarrier2 = (PFN_vkCmdPipelineBarrier2)vkGetDeviceProcAddr(device, vkCmdPipelineBarrier2_it))) + for (auto vkCmdPipelineBarrier2_it : vkCmdPipelineBarrier2_strings) { + if ((vkCmdPipelineBarrier2 = (PFN_vkCmdPipelineBarrier2)vkGetDeviceProcAddr(device, vkCmdPipelineBarrier2_it))) { break; - if (vkCmdPipelineBarrier2 == nullptr) + } + } + if (vkCmdPipelineBarrier2 == nullptr) { return false; + } static constexpr std::array vkQueueSubmit2_strings{ #if defined(VK_VERSION_1_3) "vkQueueSubmit2", @@ -691,27 +854,14 @@ struct VKMainDevice { "vkQueueSubmit2KHR", #endif }; - for (auto vkQueueSubmit2_it : vkQueueSubmit2_strings) - if ((vkQueueSubmit2 = (PFN_vkQueueSubmit2)vkGetDeviceProcAddr(device, vkQueueSubmit2_it))) - break; - if (vkQueueSubmit2 == nullptr) - return false; - static constexpr std::array vkGetBufferDeviceAddress_strings{ -#if defined(VK_VERSION_1_2) - "vkGetBufferDeviceAddress", -#endif -#if defined(VK_KHR_buffer_device_address) - "vkGetBufferDeviceAddressKHR", -#endif -#if defined(VK_EXT_buffer_device_address) - "vkGetBufferDeviceAddressEXT", -#endif - }; - for (auto vkGetBufferDeviceAddress_it : vkGetBufferDeviceAddress_strings) - if ((vkGetBufferDeviceAddress = (PFN_vkGetBufferDeviceAddress)vkGetDeviceProcAddr(device, vkGetBufferDeviceAddress_it))) + for (auto vkQueueSubmit2_it : vkQueueSubmit2_strings) { + if ((vkQueueSubmit2 = (PFN_vkQueueSubmit2)vkGetDeviceProcAddr(device, vkQueueSubmit2_it))) { break; - if (vkGetBufferDeviceAddress == nullptr) + } + } + if (vkQueueSubmit2 == nullptr) { return false; + } static constexpr std::array vkCmdBeginRendering_strings{ #if defined(VK_VERSION_1_3) "vkCmdBeginRendering", @@ -720,11 +870,14 @@ struct VKMainDevice { "vkCmdBeginRenderingKHR", #endif }; - for (auto vkCmdBeginRendering_it : vkCmdBeginRendering_strings) - if ((vkCmdBeginRendering = (PFN_vkCmdBeginRendering)vkGetDeviceProcAddr(device, vkCmdBeginRendering_it))) + for (auto vkCmdBeginRendering_it : vkCmdBeginRendering_strings) { + if ((vkCmdBeginRendering = (PFN_vkCmdBeginRendering)vkGetDeviceProcAddr(device, vkCmdBeginRendering_it))) { break; - if (vkCmdBeginRendering == nullptr) + } + } + if (vkCmdBeginRendering == nullptr) { return false; + } static constexpr std::array vkCmdEndRendering_strings{ #if defined(VK_VERSION_1_3) "vkCmdEndRendering", @@ -733,11 +886,14 @@ struct VKMainDevice { "vkCmdEndRenderingKHR", #endif }; - for (auto vkCmdEndRendering_it : vkCmdEndRendering_strings) - if ((vkCmdEndRendering = (PFN_vkCmdEndRendering)vkGetDeviceProcAddr(device, vkCmdEndRendering_it))) + for (auto vkCmdEndRendering_it : vkCmdEndRendering_strings) { + if ((vkCmdEndRendering = (PFN_vkCmdEndRendering)vkGetDeviceProcAddr(device, vkCmdEndRendering_it))) { break; - if (vkCmdEndRendering == nullptr) + } + } + if (vkCmdEndRendering == nullptr) { return false; + } static constexpr std::array vkCmdSetPrimitiveTopology_strings{ #if defined(VK_VERSION_1_3) "vkCmdSetPrimitiveTopology", @@ -746,11 +902,14 @@ struct VKMainDevice { "vkCmdSetPrimitiveTopologyEXT", #endif }; - for (auto vkCmdSetPrimitiveTopology_it : vkCmdSetPrimitiveTopology_strings) - if ((vkCmdSetPrimitiveTopology = (PFN_vkCmdSetPrimitiveTopology)vkGetDeviceProcAddr(device, vkCmdSetPrimitiveTopology_it))) + for (auto vkCmdSetPrimitiveTopology_it : vkCmdSetPrimitiveTopology_strings) { + if ((vkCmdSetPrimitiveTopology = (PFN_vkCmdSetPrimitiveTopology)vkGetDeviceProcAddr(device, vkCmdSetPrimitiveTopology_it))) { break; - if (vkCmdSetPrimitiveTopology == nullptr) + } + } + if (vkCmdSetPrimitiveTopology == nullptr) { return false; + } static constexpr std::array vkCmdBindVertexBuffers2_strings{ #if defined(VK_VERSION_1_3) "vkCmdBindVertexBuffers2", @@ -759,26 +918,34 @@ struct VKMainDevice { "vkCmdBindVertexBuffers2EXT", #endif }; - for (auto vkCmdBindVertexBuffers2_it : vkCmdBindVertexBuffers2_strings) - if ((vkCmdBindVertexBuffers2 = (PFN_vkCmdBindVertexBuffers2)vkGetDeviceProcAddr(device, vkCmdBindVertexBuffers2_it))) + for (auto vkCmdBindVertexBuffers2_it : vkCmdBindVertexBuffers2_strings) { + if ((vkCmdBindVertexBuffers2 = (PFN_vkCmdBindVertexBuffers2)vkGetDeviceProcAddr(device, vkCmdBindVertexBuffers2_it))) { break; - if (vkCmdBindVertexBuffers2 == nullptr) + } + } + if (vkCmdBindVertexBuffers2 == nullptr) { return false; + } vkCreateSwapchainKHR = (PFN_vkCreateSwapchainKHR)vkGetDeviceProcAddr(device, "vkCreateSwapchainKHR"); - if (vkCreateSwapchainKHR == nullptr) + if (vkCreateSwapchainKHR == nullptr) { return false; + } vkDestroySwapchainKHR = (PFN_vkDestroySwapchainKHR)vkGetDeviceProcAddr(device, "vkDestroySwapchainKHR"); - if (vkDestroySwapchainKHR == nullptr) + if (vkDestroySwapchainKHR == nullptr) { return false; + } vkGetSwapchainImagesKHR = (PFN_vkGetSwapchainImagesKHR)vkGetDeviceProcAddr(device, "vkGetSwapchainImagesKHR"); - if (vkGetSwapchainImagesKHR == nullptr) + if (vkGetSwapchainImagesKHR == nullptr) { return false; + } vkAcquireNextImageKHR = (PFN_vkAcquireNextImageKHR)vkGetDeviceProcAddr(device, "vkAcquireNextImageKHR"); - if (vkAcquireNextImageKHR == nullptr) + if (vkAcquireNextImageKHR == nullptr) { return false; + } vkQueuePresentKHR = (PFN_vkQueuePresentKHR)vkGetDeviceProcAddr(device, "vkQueuePresentKHR"); - if (vkQueuePresentKHR == nullptr) + if (vkQueuePresentKHR == nullptr) { return false; + } vkCmdPushDescriptorSetKHR = (PFN_vkCmdPushDescriptorSetKHR)vkGetDeviceProcAddr(device, "vkCmdPushDescriptorSetKHR"); vkWaitForPresentKHR = (PFN_vkWaitForPresentKHR)vkGetDeviceProcAddr(device, "vkWaitForPresentKHR"); return true; diff --git a/wisdom/include/wisdom/generated/vulkan/vk_structs.hpp b/wisdom/include/wisdom/generated/vulkan/vk_structs.hpp index 764942ef..52aabeda 100644 --- a/wisdom/include/wisdom/generated/vulkan/vk_structs.hpp +++ b/wisdom/include/wisdom/generated/vulkan/vk_structs.hpp @@ -4,6 +4,17 @@ #include namespace wis { +/** + * @brief Bottom level acceleration structure build description. + * */ +struct VKBottomLevelASBuildDesc { + wis::AccelerationStructureFlags flags; ///< Build flags. + uint32_t geometry_count; ///< Geometry count. + const wis::VKAcceleratedGeometryDesc* geometry_array; ///< Buffer of geometries. + const wis::VKAcceleratedGeometryDesc** geometry_indirect; ///< Buffer of pointers to geometry. geometry_array must be NULL for this to be used. + bool update; ///< true If the acceleration structure is being updated. +}; + /** * @brief Variant of BufferBarrier with BufferView. * */ @@ -31,6 +42,27 @@ struct VKGraphicsShaderStages { wis::VKShaderView pixel; ///< Pixel shader. }; +/** + * @brief Raytracing pipeline descriptor for pipeline creation. + * */ +struct VKRaytracingPipeineDesc { + wis::VKRootSignatureView root_signature; ///< Root signature. + const wis::VKShaderView* shaders; ///< Shader libraries. + uint32_t shader_count; ///< Shader library count. + const wis::ShaderExport* exports; ///< Shader library exports (entry points). + uint32_t export_count; ///< Shader export count. + /** + * @brief Hit group descriptions. + * Note: Raygen and miss shaders don't have their dedicated shader groups, instead groups are defined in order of appearance in . + * And groups for SBTs are exported as raygen:miss:hit. + * */ + const wis::HitGroupDesc* hit_groups; + uint32_t hit_group_count; ///< Hit group count. + uint32_t max_recursion_depth = 1; ///< Max recursion depth. Default is 1. + uint32_t max_payload_size = 0; ///< Max payload size. Default is 0. + uint32_t max_attribute_size = 0; ///< Max attribute size. Default is 0. +}; + /** * @brief Variant of PipelineStateDesc for graphics pipeline. * */ @@ -52,6 +84,14 @@ struct VKGraphicsPipelineDesc { wis::PipelineFlags flags; ///< Pipeline flags to add options to pipeline creation. }; +/** + * @brief Variant of PipelineStateDesc for compute pipeline. + * */ +struct VKComputePipelineDesc { + wis::VKRootSignatureView root_signature; ///< Root signature. + wis::VKShaderView shader; ///< Compute shader. +}; + /** * @brief Variant of RenderPassDesc for render target. * */ @@ -125,6 +165,36 @@ inline constexpr VkShaderStageFlagBits convert_vk(ShaderStages value) noexcept return VK_SHADER_STAGE_MESH_BIT_NV; } } +inline constexpr VkShaderStageFlagBits convert_vk(RaytracingShaderType value) noexcept +{ + switch (value) { + default: + return {}; + case RaytracingShaderType::Raygen: + return VK_SHADER_STAGE_RAYGEN_BIT_KHR; + case RaytracingShaderType::Miss: + return VK_SHADER_STAGE_MISS_BIT_KHR; + case RaytracingShaderType::ClosestHit: + return VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR; + case RaytracingShaderType::AnyHit: + return VK_SHADER_STAGE_ANY_HIT_BIT_KHR; + case RaytracingShaderType::Intersection: + return VK_SHADER_STAGE_INTERSECTION_BIT_KHR; + case RaytracingShaderType::Callable: + return VK_SHADER_STAGE_CALLABLE_BIT_KHR; + } +} +inline constexpr VkRayTracingShaderGroupTypeKHR convert_vk(HitGroupType value) noexcept +{ + switch (value) { + default: + return {}; + case HitGroupType::Triangles: + return VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR; + case HitGroupType::Procedural: + return VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR; + } +} inline constexpr VkDescriptorType convert_vk(DescriptorType value) noexcept { switch (value) { @@ -142,6 +212,8 @@ inline constexpr VkDescriptorType convert_vk(DescriptorType value) noexcept return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; case DescriptorType::Buffer: return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + case DescriptorType::AccelerationStructure: + return VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR; } } inline constexpr VkFormat convert_vk(DataFormat value) noexcept @@ -487,7 +559,7 @@ inline constexpr VkMemoryPropertyFlags convert_vk(MemoryType value) noexcept switch (value) { default: return {}; - case MemoryType::Default: + case MemoryType::DeviceLocal: return VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; case MemoryType::Upload: return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; @@ -683,6 +755,52 @@ inline constexpr VkIndexType convert_vk(IndexType value) noexcept return VK_INDEX_TYPE_UINT32; } } +inline constexpr VkGeometryTypeKHR convert_vk(ASGeometryType value) noexcept +{ + switch (value) { + default: + return {}; + case ASGeometryType::Triangles: + return VK_GEOMETRY_TYPE_TRIANGLES_KHR; + case ASGeometryType::AABBs: + return VK_GEOMETRY_TYPE_AABBS_KHR; + } +} +inline constexpr VkBufferUsageFlags convert_vk(BufferUsage value) noexcept +{ + VkBufferUsageFlags output = {}; + if (value & BufferUsage::CopySrc) { + output |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + } + if (value & BufferUsage::CopyDst) { + output |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; + } + if (value & BufferUsage::ConstantBuffer) { + output |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; + } + if (value & BufferUsage::IndexBuffer) { + output |= VK_BUFFER_USAGE_INDEX_BUFFER_BIT; + } + if (value & BufferUsage::VertexBuffer) { + output |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; + } + if (value & BufferUsage::IndirectBuffer) { + output |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT; + } + if (value & BufferUsage::StorageBuffer) { + output |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + } + if (value & BufferUsage::AccelerationStructureBuffer) { + output |= VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + } + if (value & BufferUsage::AccelerationStructureInput) { + output |= VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR; + } + if (value & BufferUsage::ShaderBindingTable) { + output |= VK_BUFFER_USAGE_SHADER_BINDING_TABLE_BIT_KHR; + } + return output; +} inline constexpr VmaAllocationCreateFlags convert_vk(MemoryFlags value) noexcept { VmaAllocationCreateFlags output = {}; @@ -798,10 +916,10 @@ inline constexpr VkAccessFlags2 convert_vk(ResourceAccess value) noexcept if (value & ResourceAccess::ConditionalRendering) { output |= VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT; } - if (value & ResourceAccess::AccelerationStrucureRead) { + if (value & ResourceAccess::AccelerationStructureRead) { output |= VK_ACCESS_2_ACCELERATION_STRUCTURE_READ_BIT_KHR; } - if (value & ResourceAccess::AccelerationStrucureWrite) { + if (value & ResourceAccess::AccelerationStructureWrite) { output |= VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR; } if (value & ResourceAccess::ShadingRate) { @@ -869,4 +987,52 @@ inline constexpr VkPipelineCreateFlags convert_vk(PipelineFlags value) noexcept } return output; } +inline constexpr VkGeometryFlagsKHR convert_vk(ASGeometryFlags value) noexcept +{ + VkGeometryFlagsKHR output = {}; + if (value & ASGeometryFlags::Opaque) { + output |= VK_GEOMETRY_OPAQUE_BIT_KHR; + } + if (value & ASGeometryFlags::NoDuplicateAnyHitInvocation) { + output |= VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_KHR; + } + return output; +} +inline constexpr VkBuildAccelerationStructureFlagsKHR convert_vk(AccelerationStructureFlags value) noexcept +{ + VkBuildAccelerationStructureFlagsKHR output = {}; + if (value & AccelerationStructureFlags::AllowUpdate) { + output |= VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR; + } + if (value & AccelerationStructureFlags::AllowCompaction) { + output |= VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR; + } + if (value & AccelerationStructureFlags::PreferFastTrace) { + output |= VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR; + } + if (value & AccelerationStructureFlags::PreferFastBuild) { + output |= VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_KHR; + } + if (value & AccelerationStructureFlags::MinimizeMemory) { + output |= VK_BUILD_ACCELERATION_STRUCTURE_LOW_MEMORY_BIT_KHR; + } + return output; +} +inline constexpr VkGeometryInstanceFlagsKHR convert_vk(ASInstanceFlags value) noexcept +{ + VkGeometryInstanceFlagsKHR output = {}; + if (value & ASInstanceFlags::TriangleCullDisable) { + output |= VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR; + } + if (value & ASInstanceFlags::TriangleFrontCounterClockwise) { + output |= VK_GEOMETRY_INSTANCE_TRIANGLE_FRONT_COUNTERCLOCKWISE_BIT_KHR; + } + if (value & ASInstanceFlags::ForceOpaque) { + output |= VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR; + } + if (value & ASInstanceFlags::ForceNoOpaque) { + output |= VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR; + } + return output; +} } // namespace wis diff --git a/wisdom/include/wisdom/global/definitions.h b/wisdom/include/wisdom/global/definitions.h index cc0aa5fb..a33f0a01 100644 --- a/wisdom/include/wisdom/global/definitions.h +++ b/wisdom/include/wisdom/global/definitions.h @@ -14,5 +14,6 @@ inline constexpr const unsigned max_vertex_bindings = 16u; inline constexpr const unsigned max_shader_stages = 5u; inline constexpr const unsigned max_push_constants = 5u; inline constexpr const unsigned max_push_descriptors = 8u; -inline constexpr const unsigned max_descriptor_space_overlap = 32u; +inline constexpr const unsigned max_descriptor_storage_resource_count = 4096u; +inline constexpr const unsigned max_descriptor_storage_sampler_count = 2048u; } // namespace wis diff --git a/wisdom/include/wisdom/vulkan/gen/vk_functions.in b/wisdom/include/wisdom/vulkan/gen/vk_functions.in index 2d8691b8..32cba19f 100644 --- a/wisdom/include/wisdom/vulkan/gen/vk_functions.in +++ b/wisdom/include/wisdom/vulkan/gen/vk_functions.in @@ -38,6 +38,7 @@ vkCreateCommandPool vkDestroyCommandPool vkAllocateCommandBuffers vkCreateGraphicsPipelines +vkCreateComputePipelines vkDestroyPipeline vkCreatePipelineLayout vkDestroyPipelineLayout @@ -48,6 +49,7 @@ vkBeginCommandBuffer vkCmdCopyBuffer vkCmdCopyBufferToImage2 vkCmdCopyImageToBuffer2 +vkCmdCopyImage2 vkResetCommandBuffer vkCmdBindPipeline vkEndCommandBuffer @@ -61,6 +63,7 @@ vkCmdBindVertexBuffers2 vkCmdBindIndexBuffer vkCmdDrawIndexed vkCmdDraw +vkCmdDispatch vkCmdPushConstants vkCreateFence vkDestroyFence diff --git a/wisdom/include/wisdom/vulkan/impl/vk_allocator.cpp b/wisdom/include/wisdom/vulkan/impl/vk_allocator.cpp index 56a6b3d9..ed3a4e2b 100644 --- a/wisdom/include/wisdom/vulkan/impl/vk_allocator.cpp +++ b/wisdom/include/wisdom/vulkan/impl/vk_allocator.cpp @@ -329,7 +329,7 @@ void wis::ImplVKResourceAllocator::VKFillBufferDesc(uint64_t size, wis::BufferUs info = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .size = size, - .usage = VkBufferUsageFlags(VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VkBufferUsageFlagBits(flags)), + .usage = VkBufferUsageFlags(VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | convert_vk(flags)), }; } void wis::ImplVKResourceAllocator::VKFillImageDesc(const wis::TextureDesc& desc, VkImageCreateInfo& info) noexcept diff --git a/wisdom/include/wisdom/vulkan/impl/vk_command_list.cpp b/wisdom/include/wisdom/vulkan/impl/vk_command_list.cpp index 2601afcd..7e94d006 100644 --- a/wisdom/include/wisdom/vulkan/impl/vk_command_list.cpp +++ b/wisdom/include/wisdom/vulkan/impl/vk_command_list.cpp @@ -86,6 +86,44 @@ void wis::ImplVKCommandList::CopyTextureToBuffer(VKTextureView src_texture, VKBu device.table().vkCmdCopyImageToBuffer2(command_list, ©); } +void wis::ImplVKCommandList::CopyTexture(VKTextureView src_texture, VKTextureView dst_texture, const wis::TextureCopyRegion* regions, uint32_t region_count) const noexcept +{ + wis::detail::limited_allocator allocator(region_count, true); + auto* copies = allocator.data(); + for (size_t i = 0; i < region_count; i++) { + auto& region = regions[i]; + copies[i] = VkImageCopy2{ + .sType = VK_STRUCTURE_TYPE_IMAGE_COPY_2, + .srcSubresource = { + .aspectMask = aspect_flags(std::get<1>(src_texture)), + .mipLevel = region.src.mip, + .baseArrayLayer = region.src.array_layer, + .layerCount = 1u, + }, + .srcOffset = { int(region.src.offset.width), int(region.src.offset.height), int(region.src.offset.depth_or_layers) }, + .dstSubresource = { + .aspectMask = aspect_flags(std::get<1>(dst_texture)), + .mipLevel = region.dst.mip, + .baseArrayLayer = region.dst.array_layer, + .layerCount = 1u, + }, + .dstOffset = { int(region.dst.offset.width), int(region.dst.offset.height), int(region.dst.offset.depth_or_layers) }, + .extent = { region.src.size.width, region.src.size.height, region.src.size.depth_or_layers }, + }; + } + VkCopyImageInfo2 copy{ + .sType = VK_STRUCTURE_TYPE_COPY_IMAGE_INFO_2, + .pNext = nullptr, + .srcImage = std::get<0>(src_texture), + .srcImageLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + .dstImage = std::get<0>(dst_texture), + .dstImageLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .regionCount = region_count, + .pRegions = copies, + }; + device.table().vkCmdCopyImage2(command_list, ©); +} + wis::Result wis::ImplVKCommandList::Reset(wis::VKPipelineView new_pipeline) noexcept { Close(); @@ -108,8 +146,9 @@ wis::Result wis::ImplVKCommandList::Reset(wis::VKPipelineView new_pipeline) noex return make_result(result); } closed = false; - if (pipeline) + if (pipeline) { dtable.vkCmdBindPipeline(command_list, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + } return wis::success; } @@ -174,8 +213,9 @@ inline VkImageMemoryBarrier2 to_vk(wis::TextureBarrier barrier, VkImage texture, void wis::ImplVKCommandList::BufferBarrier(wis::BufferBarrier barrier, VKBufferView buffer) noexcept { auto hbuffer = std::get<0>(buffer); - if (!hbuffer) + if (!hbuffer) { return; + } VkBufferMemoryBarrier2 desc = detail::to_vk(barrier, hbuffer); VkDependencyInfo depinfo{ @@ -209,8 +249,9 @@ void wis::ImplVKCommandList::BufferBarriers(const wis::VKBufferBarrier2* barrier void wis::ImplVKCommandList::TextureBarrier(wis::TextureBarrier barrier, VKTextureView texture) noexcept { auto htexture = std::get<0>(texture); - if (!htexture) + if (!htexture) { return; + } VkImageMemoryBarrier2 image_memory_barrier = detail::to_vk(barrier, htexture, std::get<1>(texture)); VkDependencyInfo depinfo{ @@ -261,10 +302,11 @@ void wis::ImplVKCommandList::BeginRenderPass(const wis::VKRenderPassDesc* pass_d .loadOp = convert_vk(target.load_op), .storeOp = convert_vk(target.store_op), }; - if (data[i].loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) + if (data[i].loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) { data[i].clearValue = { .color = { .float32{ target.clear_value[0], target.clear_value[1], target.clear_value[2], target.clear_value[3] } } }; + } } VkRenderingAttachmentInfo d_info{}; @@ -278,10 +320,11 @@ void wis::ImplVKCommandList::BeginRenderPass(const wis::VKRenderPassDesc* pass_d .loadOp = convert_vk(pass_desc->depth_stencil->load_op_depth), .storeOp = convert_vk(pass_desc->depth_stencil->store_op_depth), }; - if (d_info.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) + if (d_info.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) { d_info.clearValue = { .depthStencil = { .depth = pass_desc->depth_stencil->clear_depth, .stencil = pass_desc->depth_stencil->clear_stencil } }; + } } if (ds_selector & DSSelect::Stencil) { s_info = { @@ -292,10 +335,11 @@ void wis::ImplVKCommandList::BeginRenderPass(const wis::VKRenderPassDesc* pass_d .loadOp = convert_vk(pass_desc->depth_stencil->load_op_stencil), .storeOp = convert_vk(pass_desc->depth_stencil->store_op_stencil), }; - if (s_info.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) + if (s_info.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) { s_info.clearValue = { .depthStencil = { .depth = pass_desc->depth_stencil->clear_depth, .stencil = pass_desc->depth_stencil->clear_stencil } }; + } } VkRenderingInfo info{ @@ -383,6 +427,11 @@ void wis::ImplVKCommandList::SetRootSignature(wis::VKRootSignatureView root_sign pipeline_layout = std::get<0>(root_signature); } +void wis::ImplVKCommandList::SetComputeRootSignature(wis::VKRootSignatureView root_signature) noexcept +{ + pipeline_layout = std::get<0>(root_signature); +} + void wis::ImplVKCommandList::IASetVertexBuffers(const wis::VKVertexBufferBinding* resources, uint32_t count, uint32_t start_slot) noexcept { wis::detail::limited_allocator allocator(count, true); @@ -432,22 +481,17 @@ void wis::ImplVKCommandList::DrawInstanced(uint32_t vertex_count_per_instance, device.table().vkCmdDraw(command_list, vertex_count_per_instance, instance_count, base_vertex, start_instance); } -void wis::ImplVKCommandList::SetPushConstants(const void* data, uint32_t size_4bytes, uint32_t offset_4bytes, wis::ShaderStages stage) noexcept +void wis::ImplVKCommandList::Dispatch(uint32_t x, uint32_t y, uint32_t z) noexcept { - device.table().vkCmdPushConstants(command_list, pipeline_layout, convert_vk(stage), offset_4bytes * 4, size_4bytes * 4, data); + device.table().vkCmdDispatch(command_list, x, y, z); } -void wis::ImplVKCommandList::SetDescriptorStorage(wis::VKDescriptorStorageView desc_storage) noexcept +void wis::ImplVKCommandList::SetPushConstants(const void* data, uint32_t size_4bytes, uint32_t offset_4bytes, wis::ShaderStages stage) noexcept { - auto& set_span = std::get<0>(desc_storage); - device.table().vkCmdBindDescriptorSets(command_list, - VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline_layout, 1, // set 1, because set 0 is reserved for push descriptors - set_span.size(), set_span.data(), - 0, nullptr); + device.table().vkCmdPushConstants(command_list, pipeline_layout, convert_vk(stage), offset_4bytes * 4, size_4bytes * 4, data); } -void wis::ImplVKCommandList::PushDescriptor(wis::DescriptorType type, uint32_t binding, wis::VKBufferView view, uint32_t offset) noexcept +void wis::ImplVKCommandList::VKPushDescriptor(wis::DescriptorType type, uint32_t binding, wis::VKBufferView view, uint32_t offset, VkPipelineBindPoint binding_point) noexcept { VkDescriptorBufferInfo buffer_info{ .buffer = std::get<0>(view), @@ -465,11 +509,19 @@ void wis::ImplVKCommandList::PushDescriptor(wis::DescriptorType type, uint32_t b .pBufferInfo = &buffer_info }; device.table().vkCmdPushDescriptorSetKHR(command_list, - VK_PIPELINE_BIND_POINT_GRAPHICS, + binding_point, pipeline_layout, 0, // set 0, because set 0 is reserved for push descriptors 1, &descriptor); } - +void wis::ImplVKCommandList::VKSetDescriptorStorage(wis::VKDescriptorStorageView desc_storage, VkPipelineBindPoint binding_point) noexcept +{ + auto& set_span = std::get<0>(desc_storage); + device.table().vkCmdBindDescriptorSets(command_list, + binding_point, + pipeline_layout, 1, // set 1, because set 0 is reserved for push descriptors + set_span.size(), set_span.data(), + 0, nullptr); +} #endif // ! diff --git a/wisdom/include/wisdom/vulkan/impl/vk_device.cpp b/wisdom/include/wisdom/vulkan/impl/vk_device.cpp index 4bee0d64..7df7ed91 100644 --- a/wisdom/include/wisdom/vulkan/impl/vk_device.cpp +++ b/wisdom/include/wisdom/vulkan/impl/vk_device.cpp @@ -326,12 +326,6 @@ wis::ImplVKCreateDevice(wis::Result& result, wis::VKAdapter in_adapter, wis::VKD return out_device; } - // Init Default Layout - result = internal.InitDefaultLayout(); - if (result.status != wis::Status::Ok) { - return out_device; - } - // Create Default Allocator internal.allocator = out_device.VKCreateAllocator(result); if (result.status != wis::Status::Ok) { @@ -743,6 +737,31 @@ wis::ImplVKDevice::CreateGraphicsPipeline(wis::Result& result, const wis::VKGrap return out_pipeline; } +wis::VKPipelineState +wis::ImplVKDevice::CreateComputePipeline(wis::Result& result, const wis::VKComputePipelineDesc& desc) const noexcept +{ + wis::VKPipelineState out_pipeline; + auto& internal = out_pipeline.GetMutableInternal(); + + VkComputePipelineCreateInfo info{ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .stage = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = std::get<0>(desc.shader), + .pName = "main", + }, + .layout = std::get<0>(desc.root_signature), + .basePipelineHandle = VK_NULL_HANDLE, + .basePipelineIndex = -1, + }; + auto vr = device.table().vkCreateComputePipelines(device.get(), nullptr, 1u, &info, nullptr, internal.pipeline.put_unsafe(device, device.table().vkDestroyPipeline)); + if (!succeeded(vr)) { + result = wis::make_result(vr); + } + return out_pipeline; +} + wis::VKCommandList wis::ImplVKDevice::CreateCommandList(wis::Result& result, wis::QueueType type) const noexcept { @@ -1330,7 +1349,7 @@ wis::ImplVKDevice::CreateSampler(wis::Result& result, const wis::SamplerDesc& de } wis::VKShaderResource -wis::ImplVKDevice::CreateShaderResource(wis::Result& result, wis::VKTextureView texture, wis::ShaderResourceDesc desc) const noexcept +wis::ImplVKDevice::CreateShaderResource(wis::Result& result, wis::VKTextureView texture, const wis::ShaderResourceDesc& desc) const noexcept { VKShaderResource out_resource; auto& internal = out_resource.GetMutableInternal(); @@ -1408,39 +1427,45 @@ wis::ImplVKDevice::CreateShaderResource(wis::Result& result, wis::VKTextureView } wis::VKDescriptorStorage -wis::ImplVKDevice::CreateDescriptorStorage(wis::Result& result, const wis::DescriptorStorageDesc& desc) const noexcept +wis::ImplVKDevice::CreateDescriptorStorage(wis::Result& result, + const wis::DescriptorBindingDesc* descriptor_bindings, + uint32_t descriptor_bindings_count, + wis::DescriptorMemory) const noexcept { VKDescriptorStorage out_storage; auto& internal = out_storage.GetMutableInternal(); - constexpr static uint32_t num_sets = Internal::max_sets; - // Get max descriptor counts - std::array desc_counts{ - desc.sampler_count, - desc.cbuffer_count, - desc.texture_count, - desc.stexture_count, - desc.sbuffer_count, - desc.rbuffer_count - }; + uint32_t offset_pool_size = descriptor_bindings_count * sizeof(VkDescriptorPoolSize); + uint32_t offset_desc_layout = offset_pool_size + descriptor_bindings_count * sizeof(VkDescriptorSetLayout); - uint32_t iterator = 0; - VkDescriptorPoolSize pool_sizes[num_sets]{}; - for (size_t i = 0; i < num_sets; i++) { - if (desc_counts[i] != 0) { - pool_sizes[iterator].type = DefaultLayout::desc_types[i]; - pool_sizes[iterator].descriptorCount = desc_counts[i]; - iterator++; - } + std::unique_ptr memory = wis::detail::make_unique_for_overwrite( + descriptor_bindings_count * sizeof(VkDescriptorPoolSize) + + descriptor_bindings_count * sizeof(uint32_t)); + + if (!memory) { + result = wis::make_result(VK_ERROR_OUT_OF_HOST_MEMORY); + return out_storage; + } + // Allocate descriptor sets + internal.descriptor_sets = wis::detail::make_unique_for_overwrite(descriptor_bindings_count + descriptor_bindings_count); + + std::span pool_sizes{ reinterpret_cast(memory.get()), descriptor_bindings_count }; + std::span desc_layouts{ reinterpret_cast(internal.descriptor_sets.get() + descriptor_bindings_count), descriptor_bindings_count }; + std::span pool_size_data{ reinterpret_cast(pool_sizes.data() + descriptor_bindings_count), descriptor_bindings_count }; // For variable descriptor count + + for (size_t i = 0; i < descriptor_bindings_count; i++) { + pool_sizes[i].type = convert_vk(descriptor_bindings[i].binding_type); + pool_sizes[i].descriptorCount = descriptor_bindings[i].binding_count; + pool_size_data[i] = descriptor_bindings[i].binding_count; } VkDescriptorPoolCreateInfo pool_info{ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, .pNext = nullptr, .flags = 0, - .maxSets = num_sets, - .poolSizeCount = iterator, - .pPoolSizes = pool_sizes, + .maxSets = descriptor_bindings_count, + .poolSizeCount = descriptor_bindings_count, + .pPoolSizes = pool_sizes.data() }; wis::scoped_handle pool; auto res = device.table().vkCreateDescriptorPool(device.get(), &pool_info, nullptr, pool.put(device.get(), device.table().vkDestroyDescriptorPool)); @@ -1449,55 +1474,94 @@ wis::ImplVKDevice::CreateDescriptorStorage(wis::Result& result, const wis::Descr return out_storage; } + // Create descriptor set layouts + constexpr static VkDescriptorBindingFlags flags = VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT_EXT | VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT; + constexpr static VkDescriptorSetLayoutBindingFlagsCreateInfoEXT binding_flags_info{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO_EXT, + .pNext = nullptr, + .bindingCount = 1, + .pBindingFlags = &flags, + }; + VkDescriptorSetLayoutBinding binding_layout{ + .binding = 0, + .stageFlags = VkShaderStageFlagBits::VK_SHADER_STAGE_ALL, + }; + VkDescriptorSetLayoutCreateInfo desc_layout_info{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = &binding_flags_info, + .flags = 0, + .bindingCount = 1, + .pBindings = &binding_layout, + }; + + for (uint32_t i = 0; i < descriptor_bindings_count; i++) { + binding_layout.descriptorType = convert_vk(descriptor_bindings[i].binding_type); + binding_layout.descriptorCount = descriptor_bindings[i].binding_type == wis::DescriptorType::Sampler + ? wis::max_descriptor_storage_sampler_count + : wis::max_descriptor_storage_resource_count; // Max descriptor count + + res = device.table().vkCreateDescriptorSetLayout(device.get(), &desc_layout_info, nullptr, &desc_layouts[i]); + if (!succeeded(res)) { + result = wis::make_result(res); + for (uint32_t j = 0; j < i; j++) { + device.table().vkDestroyDescriptorSetLayout(device.get(), desc_layouts[j], nullptr); + } + + return out_storage; + } + } + + if (!internal.descriptor_sets) { + result = wis::make_result(VK_ERROR_OUT_OF_HOST_MEMORY); + return out_storage; + } VkDescriptorSetVariableDescriptorCountAllocateInfo variable_desc_info{ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO, .pNext = nullptr, - .descriptorSetCount = num_sets, - .pDescriptorCounts = desc_counts.data(), + .descriptorSetCount = descriptor_bindings_count, + .pDescriptorCounts = pool_size_data.data(), }; - VkDescriptorSetAllocateInfo desc_alloc_info{ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, .pNext = &variable_desc_info, .descriptorPool = pool.get(), - .descriptorSetCount = uint32_t(std::size(internal.set)), - .pSetLayouts = default_layout.desc_sets.data(), + .descriptorSetCount = descriptor_bindings_count, + .pSetLayouts = desc_layouts.data(), }; - res = device.table().vkAllocateDescriptorSets(device.get(), &desc_alloc_info, internal.set.data()); + res = device.table().vkAllocateDescriptorSets(device.get(), &desc_alloc_info, internal.descriptor_sets.get()); + + // Destroy descriptor set layouts if (!succeeded(res)) { result = wis::make_result(res); return out_storage; } internal.pool = pool.release(); internal.device = device; + internal.descriptor_count = descriptor_bindings_count; + return out_storage; } wis::VKRootSignature -wis::ImplVKDevice::CreateRootSignature(wis::Result& result, const wis::PushConstant* constants, - uint32_t constants_size, - const PushDescriptor* push_descriptors, - uint32_t push_descriptors_size, - uint32_t space_overlap_count) const noexcept +wis::ImplVKDevice::CreateRootSignature(wis::Result& result, const wis::PushConstant* push_constants, + uint32_t constants_count, + const wis::PushDescriptor* push_descriptors, + uint32_t push_descriptors_count, + const wis::DescriptorBindingDesc* descriptor_bindings, + uint32_t descriptor_bindings_count) const noexcept { VKRootSignature out_signature; auto& internal = out_signature.GetMutableInternal(); - - constexpr static uint32_t num_sets = uint32_t(wis::BindingIndex::Count); - if (constants_size > wis::max_push_constants) { + if (constants_count > wis::max_push_constants) { result = wis::make_result(VkResult::VK_ERROR_UNKNOWN); return out_signature; } - if (push_descriptors_size > wis::max_push_descriptors) { + if (push_descriptors_count > wis::max_push_descriptors) { result = wis::make_result(VkResult::VK_ERROR_UNKNOWN); return out_signature; } - if (space_overlap_count > wis::max_descriptor_space_overlap) { - result = wis::make_result(VkResult::VK_ERROR_UNKNOWN); - return out_signature; - } - if (internal.vk_dsls = wis::detail::make_unique_for_overwrite(num_sets + 1); !internal.vk_dsls) { + if (internal.vk_dsls = wis::detail::make_unique_for_overwrite(descriptor_bindings_count + 1); !internal.vk_dsls) { result = wis::make_result(VkResult::VK_ERROR_OUT_OF_HOST_MEMORY); return out_signature; } @@ -1505,7 +1569,7 @@ wis::ImplVKDevice::CreateRootSignature(wis::Result& result, const wis::PushConst // Create push descriptor set layout { VkDescriptorSetLayoutBinding push_bindings[wis::max_push_descriptors]{}; - for (uint32_t i = 0; i < push_descriptors_size; i++) { + for (uint32_t i = 0; i < push_descriptors_count; i++) { auto& r = push_descriptors[i]; auto& b = push_bindings[i]; b.binding = i; @@ -1517,7 +1581,7 @@ wis::ImplVKDevice::CreateRootSignature(wis::Result& result, const wis::PushConst .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pNext = nullptr, .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, - .bindingCount = push_descriptors_size, + .bindingCount = push_descriptors_count, .pBindings = push_bindings, }; auto res = device.table().vkCreateDescriptorSetLayout(device.get(), &push_desc_info, nullptr, &internal.vk_dsls[0]); @@ -1525,13 +1589,50 @@ wis::ImplVKDevice::CreateRootSignature(wis::Result& result, const wis::PushConst result = wis::make_result(res); return out_signature; } - std::copy_n(default_layout.desc_sets.begin(), num_sets, internal.vk_dsls.get() + 1); } - VkPushConstantRange push_constants[wis::max_push_constants]{}; - for (uint32_t i = 0; i < constants_size; i++) { - auto& c = push_constants[i]; - auto& r = constants[i]; + // Create descriptor set layouts + constexpr static VkDescriptorBindingFlags flags = VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT_EXT | VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT; + constexpr static VkDescriptorSetLayoutBindingFlagsCreateInfoEXT binding_flags_info{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO_EXT, + .pNext = nullptr, + .bindingCount = 1, + .pBindingFlags = &flags, + }; + VkDescriptorSetLayoutBinding binding_layout{ + .binding = 0, + .stageFlags = VkShaderStageFlagBits::VK_SHADER_STAGE_ALL, + }; + VkDescriptorSetLayoutCreateInfo desc_layout_info{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = &binding_flags_info, + .flags = 0, + .bindingCount = 1, + .pBindings = &binding_layout, + }; + + auto desc_layouts = internal.vk_dsls.get() + 1; + for (uint32_t i = 0; i < descriptor_bindings_count; i++) { + binding_layout.descriptorType = convert_vk(descriptor_bindings[i].binding_type); + binding_layout.descriptorCount = descriptor_bindings[i].binding_type == wis::DescriptorType::Sampler + ? wis::max_descriptor_storage_sampler_count + : wis::max_descriptor_storage_resource_count; + + auto res = device.table().vkCreateDescriptorSetLayout(device.get(), &desc_layout_info, nullptr, &desc_layouts[i]); + if (!succeeded(res)) { + result = wis::make_result(res); + for (uint32_t j = 0; j < i; j++) { + device.table().vkDestroyDescriptorSetLayout(device.get(), desc_layouts[j], nullptr); + } + + return out_signature; + } + } + + VkPushConstantRange xpush_constants[wis::max_push_constants]{}; + for (uint32_t i = 0; i < constants_count; i++) { + auto& c = xpush_constants[i]; + auto& r = push_constants[i]; c.stageFlags = convert_vk(r.stage); c.offset = 0; c.size = r.size_bytes; @@ -1541,27 +1642,21 @@ wis::ImplVKDevice::CreateRootSignature(wis::Result& result, const wis::PushConst .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .pNext = nullptr, .flags = 0, - .setLayoutCount = num_sets + 1, + .setLayoutCount = descriptor_bindings_count + 1, .pSetLayouts = internal.vk_dsls.get(), - .pushConstantRangeCount = constants_size, - .pPushConstantRanges = push_constants, + .pushConstantRangeCount = constants_count, + .pPushConstantRanges = xpush_constants, }; auto vr = device.table().vkCreatePipelineLayout(device.get(), &pipeline_layout_info, nullptr, internal.root.put(device, device.table().vkDestroyPipelineLayout)); if (!succeeded(vr)) { - device.table().vkDestroyDescriptorSetLayout(device.get(), internal.vk_dsls[0], nullptr); + // Destroy descriptor set layouts + push descriptor set layout + for (uint32_t i = 0; i < descriptor_bindings_count + 1; i++) { + device.table().vkDestroyDescriptorSetLayout(device.get(), desc_layouts[i], nullptr); + } result = wis::make_result(vr); } - internal.dsl_count = 1; // number of descriptor set layouts to destroy + internal.dsl_count = descriptor_bindings_count + 1; // number of descriptor set layouts to destroy return out_signature; } -wis::VKRootSignature -wis::ImplVKDevice::CreateRootSignature2(wis::Result& result, const wis::PushConstant* push_constants, - uint32_t constants_count, - const wis::PushDescriptor* push_descriptors, - uint32_t push_descriptors_count, - const wis::DescriptorSpacing* descriptor_spacing) const noexcept -{ - return CreateRootSignature(result, push_constants, constants_count, push_descriptors, push_descriptors_count, 1); -} #endif diff --git a/wisdom/include/wisdom/vulkan/vk_adapter.h b/wisdom/include/wisdom/vulkan/vk_adapter.h index e91a94a3..769d0716 100644 --- a/wisdom/include/wisdom/vulkan/vk_adapter.h +++ b/wisdom/include/wisdom/vulkan/vk_adapter.h @@ -13,7 +13,7 @@ class VKAdapter; template<> struct Internal { wis::SharedInstance instance; - VkPhysicalDevice adapter; + VkPhysicalDevice adapter = VK_NULL_HANDLE; }; class ImplVKAdapter : public QueryInternal diff --git a/wisdom/include/wisdom/vulkan/vk_command_list.h b/wisdom/include/wisdom/vulkan/vk_command_list.h index 776181ab..eb073e23 100644 --- a/wisdom/include/wisdom/vulkan/vk_command_list.h +++ b/wisdom/include/wisdom/vulkan/vk_command_list.h @@ -2,7 +2,6 @@ #define WIS_VK_COMMAND_LIST_H #include #include -#include #include #include #include @@ -77,6 +76,8 @@ class ImplVKCommandList : public QueryInternal WIS_INLINE void CopyTextureToBuffer(VKTextureView src_texture, VKBufferView dest_buffer, const wis::BufferTextureCopyRegion* regions, uint32_t region_count) const noexcept; + WIS_INLINE void CopyTexture(VKTextureView src_texture, VKTextureView dst_texture, const wis::TextureCopyRegion* regions, uint32_t region_count) const noexcept; + WIS_INLINE void BufferBarrier(wis::BufferBarrier barrier, VKBufferView buffer) noexcept; // 8 buffers at once max for efficiency WIS_INLINE void BufferBarriers(const wis::VKBufferBarrier2* barriers, uint32_t barrier_count) noexcept; @@ -91,6 +92,8 @@ class ImplVKCommandList : public QueryInternal WIS_INLINE void SetRootSignature(wis::VKRootSignatureView root_signature) noexcept; + WIS_INLINE void SetComputeRootSignature(wis::VKRootSignatureView root_signature) noexcept; + WIS_INLINE void SetPipelineState(wis::VKPipelineView pipeline_state) noexcept; WIS_INLINE void IASetPrimitiveTopology(wis::PrimitiveTopology topology) noexcept; @@ -119,11 +122,35 @@ class ImplVKCommandList : public QueryInternal uint32_t start_vertex = 0, uint32_t start_instance = 0) noexcept; + WIS_INLINE void Dispatch(uint32_t x, uint32_t y, uint32_t z) noexcept; + WIS_INLINE void SetPushConstants(const void* data, uint32_t size_4bytes, uint32_t offset_4bytes, wis::ShaderStages stage) noexcept; + WIS_INLINE void SetComputePushConstants(const void* data, uint32_t size_4bytes, uint32_t offset_4bytes) noexcept + { + SetPushConstants(data, size_4bytes, offset_4bytes, wis::ShaderStages::All); + } - WIS_INLINE void PushDescriptor(wis::DescriptorType type, uint32_t binding, wis::VKBufferView view, uint32_t offset = 0) noexcept; + void PushDescriptor(wis::DescriptorType type, uint32_t binding, wis::VKBufferView view, uint32_t offset = 0) noexcept + { + VKPushDescriptor(type, binding, view, offset, VK_PIPELINE_BIND_POINT_GRAPHICS); + } + void PushDescriptorCompute(wis::DescriptorType type, uint32_t binding, wis::VKBufferView view, uint32_t offset = 0) noexcept + { + VKPushDescriptor(type, binding, view, offset, VK_PIPELINE_BIND_POINT_COMPUTE); + } + + void SetDescriptorStorage(VKDescriptorStorageView desc_storage) noexcept + { + VKSetDescriptorStorage(desc_storage, VK_PIPELINE_BIND_POINT_GRAPHICS); + } + void SetComputeDescriptorStorage(VKDescriptorStorageView desc_storage) noexcept + { + VKSetDescriptorStorage(desc_storage, VK_PIPELINE_BIND_POINT_COMPUTE); + } - WIS_INLINE void SetDescriptorStorage(VKDescriptorStorageView desc_storage) noexcept; +public: + WIS_INLINE void VKPushDescriptor(wis::DescriptorType type, uint32_t binding, wis::VKBufferView view, uint32_t offset, VkPipelineBindPoint binding_point) noexcept; + WIS_INLINE void VKSetDescriptorStorage(VKDescriptorStorageView desc_storage, VkPipelineBindPoint binding_point) noexcept; protected: bool closed = false; @@ -203,6 +230,17 @@ class VKCommandList : public wis::ImplVKCommandList { wis::ImplVKCommandList::CopyTextureToBuffer(std::move(source), std::move(destination), regions, region_count); } + /** + * @brief Copies data from one texture to another. + * @param source The source texture to copy from. + * @param destination The destination texture to copy to. + * @param regions The regions to copy. + * @param region_count The number of regions to copy. + * */ + inline void CopyTexture(wis::VKTextureView source, wis::VKTextureView destination, const wis::TextureCopyRegion* regions, uint32_t region_count) noexcept + { + wis::ImplVKCommandList::CopyTexture(std::move(source), std::move(destination), regions, region_count); + } /** * @brief Sets the barrier on the buffer. * @param barrier The barrier to set. @@ -262,6 +300,15 @@ class VKCommandList : public wis::ImplVKCommandList { wis::ImplVKCommandList::SetRootSignature(std::move(root_signature)); } + /** + * @brief Sets the pipeline signature object to compute pipeline. Used to determine how to pick descriptors from descriptor buffer. + * May only work with compute pipelines. + * @param root_signature The root signature to set. + * */ + inline void SetComputeRootSignature(wis::VKRootSignatureView root_signature) noexcept + { + wis::ImplVKCommandList::SetComputeRootSignature(std::move(root_signature)); + } /** * @brief Sets the primitive topology. Detemines how vertices shall be processed. * @param topology The primitive topology to set. @@ -362,6 +409,16 @@ class VKCommandList : public wis::ImplVKCommandList { wis::ImplVKCommandList::DrawInstanced(vertex_count_per_instance, instance_count, start_vertex, start_instance); } + /** + * @brief Dispatches compute shader. + * @param group_count_x The number of groups to dispatch in X dimension. + * @param group_count_y The number of groups to dispatch in Y dimension. Default is 1. + * @param group_count_z The number of groups to dispatch in Z dimension. Default is 1. + * */ + inline void Dispatch(uint32_t group_count_x, uint32_t group_count_y = 1, uint32_t group_count_z = 1) noexcept + { + wis::ImplVKCommandList::Dispatch(group_count_x, group_count_y, group_count_z); + } /** * @brief Sets the root constants for the shader. * @param data The data to set the root constants with. @@ -373,6 +430,16 @@ class VKCommandList : public wis::ImplVKCommandList { wis::ImplVKCommandList::SetPushConstants(data, size_4bytes, offset_4bytes, stage); } + /** + * @brief Sets the root constants for the compute or raytracing shader. + * @param data The data to set the root constants with. + * @param size_4bytes The size of the data in 4-byte units. + * @param offset_4bytes The offset in the data in 4-byte units. + * */ + inline void SetComputePushConstants(void* data, uint32_t size_4bytes, uint32_t offset_4bytes) noexcept + { + wis::ImplVKCommandList::SetComputePushConstants(data, size_4bytes, offset_4bytes); + } /** * @brief Pushes descriptor directly to the command list, without putting it to the table. * Works only with buffer bindings. @@ -386,6 +453,36 @@ class VKCommandList : public wis::ImplVKCommandList { wis::ImplVKCommandList::PushDescriptor(type, root_index, std::move(buffer), offset); } + /** + * @brief Pushes descriptor directly to the command list, without putting it to the table. + * Works only with buffer bindings. + * Works with compute or raytracing pipelines. + * Buffer is always bound with full size. + * @param type The type of the descriptor to set. + * @param root_index The index of the root descriptor to set. + * @param buffer The buffer to set. + * @param offset The offset in the descriptor table to set the descriptor to. + * */ + inline void PushDescriptorCompute(wis::DescriptorType type, uint32_t root_index, wis::VKBufferView buffer, uint32_t offset) noexcept + { + wis::ImplVKCommandList::PushDescriptorCompute(type, root_index, std::move(buffer), offset); + } + /** + * @brief Sets the descriptor storage object for graphics pipeline. + * @param storage The descriptor storage to set. + * */ + inline void SetDescriptorStorage(wis::VKDescriptorStorageView storage) noexcept + { + wis::ImplVKCommandList::SetDescriptorStorage(std::move(storage)); + } + /** + * @brief Sets the descriptor storage object for compute pipeline. + * @param storage The descriptor storage to set. + * */ + inline void SetComputeDescriptorStorage(wis::VKDescriptorStorageView storage) noexcept + { + wis::ImplVKCommandList::SetComputeDescriptorStorage(std::move(storage)); + } }; #pragma endregion VKCommandList diff --git a/wisdom/include/wisdom/vulkan/vk_descriptor_storage.h b/wisdom/include/wisdom/vulkan/vk_descriptor_storage.h index 5a9f429c..877dcb3e 100644 --- a/wisdom/include/wisdom/vulkan/vk_descriptor_storage.h +++ b/wisdom/include/wisdom/vulkan/vk_descriptor_storage.h @@ -9,12 +9,11 @@ class VKDescriptorStorage; template<> struct Internal { - // sampler, Uniform buffer, storage RW buffer, sampled image, storage RW image, maybe storage read buffer will be needed. - constexpr static uint32_t max_sets = uint32_t(wis::BindingIndex::Count); - wis::SharedDevice device; h::VkDescriptorPool pool; - std::array set{}; // Big Descriptor set with only unbounded arrays + + std::unique_ptr descriptor_sets; // Big Descriptor set with only unbounded arrays + uint32_t descriptor_count = 0; public: Internal() noexcept = default; @@ -27,7 +26,8 @@ struct Internal { Destroy(); device = std::move(o.device); pool = std::move(o.pool); - set = std::move(o.set); + descriptor_sets = std::move(o.descriptor_sets); + descriptor_count = o.descriptor_count; return *this; } ~Internal() noexcept @@ -40,6 +40,9 @@ struct Internal { { if (pool) { device.table().vkDestroyDescriptorPool(device.get(), pool, nullptr); + for (uint32_t i = 0; i < descriptor_count; ++i) { + device.table().vkDestroyDescriptorSetLayout(device.get(), reinterpret_cast(descriptor_sets[i + descriptor_count]), nullptr); + } } } }; @@ -55,14 +58,14 @@ class ImplVKDescriptorStorage : public QueryInternal operator VKDescriptorStorageView() const noexcept { - std::span span{ - set + std::span span{ + descriptor_sets.get(), descriptor_count }; return VKDescriptorStorageView{ span }; } public: - void WriteSampler(uint32_t index, wis::VKSamplerView sampler) noexcept + void WriteSampler(uint32_t binding, uint32_t index, wis::VKSamplerView sampler) noexcept { VkDescriptorImageInfo info{ .sampler = std::get<0>(sampler), @@ -71,7 +74,7 @@ class ImplVKDescriptorStorage : public QueryInternal }; VkWriteDescriptorSet write{ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstSet = set[uint32_t(wis::BindingIndex::Sampler) - 1], + .dstSet = descriptor_sets[binding], .dstBinding = 0, .dstArrayElement = index, .descriptorCount = 1, @@ -80,7 +83,7 @@ class ImplVKDescriptorStorage : public QueryInternal }; device.table().vkUpdateDescriptorSets(device.get(), 1, &write, 0, nullptr); } - void WriteConstantBuffer(uint32_t index, wis::VKBufferView buffer, uint32_t size, uint32_t offset = 0) noexcept + void WriteConstantBuffer(uint32_t binding, uint32_t index, wis::VKBufferView buffer, uint32_t size, uint32_t offset = 0) noexcept { VkDescriptorBufferInfo info{ .buffer = std::get<0>(buffer), @@ -89,7 +92,7 @@ class ImplVKDescriptorStorage : public QueryInternal }; VkWriteDescriptorSet write{ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstSet = set[uint32_t(wis::BindingIndex::ConstantBuffer) - 1], + .dstSet = descriptor_sets[binding], .dstBinding = 0, .dstArrayElement = index, .descriptorCount = 1, @@ -98,7 +101,7 @@ class ImplVKDescriptorStorage : public QueryInternal }; device.table().vkUpdateDescriptorSets(device.get(), 1, &write, 0, nullptr); } - void WriteTexture(uint32_t index, wis::VKShaderResourceView srv) noexcept + void WriteTexture(uint32_t binding, uint32_t index, wis::VKShaderResourceView srv) noexcept { VkDescriptorImageInfo info{ .sampler = VK_NULL_HANDLE, @@ -107,7 +110,7 @@ class ImplVKDescriptorStorage : public QueryInternal }; VkWriteDescriptorSet write{ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstSet = set[uint32_t(wis::BindingIndex::Texture) - 1], + .dstSet = descriptor_sets[binding], .dstBinding = 0, .dstArrayElement = index, .descriptorCount = 1, @@ -116,6 +119,60 @@ class ImplVKDescriptorStorage : public QueryInternal }; device.table().vkUpdateDescriptorSets(device.get(), 1, &write, 0, nullptr); } + void WriteRWTexture(uint32_t binding, uint32_t index, wis::VKUnorderedAccessTextureView uav) noexcept + { + VkDescriptorImageInfo info{ + .sampler = VK_NULL_HANDLE, + .imageView = std::get<0>(uav), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL + }; + VkWriteDescriptorSet write{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = descriptor_sets[binding], + .dstBinding = 0, + .dstArrayElement = index, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .pImageInfo = &info + }; + device.table().vkUpdateDescriptorSets(device.get(), 1, &write, 0, nullptr); + } + void WriteRWStructuredBuffer(uint32_t binding, uint32_t index, wis::VKBufferView buffer, uint32_t stride, uint32_t element_count, uint32_t offset_elements = 0) noexcept + { + VkDescriptorBufferInfo info{ + .buffer = std::get<0>(buffer), + .offset = offset_elements * stride, + .range = element_count * stride + }; + VkWriteDescriptorSet write{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = descriptor_sets[binding], + .dstBinding = 0, + .dstArrayElement = index, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .pBufferInfo = &info + }; + device.table().vkUpdateDescriptorSets(device.get(), 1, &write, 0, nullptr); + } + void WriteStructuredBuffer(uint32_t binding, uint32_t index, wis::VKBufferView buffer, uint32_t stride, uint32_t element_count, uint32_t offset_elements = 0) noexcept + { + VkDescriptorBufferInfo info{ + .buffer = std::get<0>(buffer), + .offset = offset_elements * stride, + .range = element_count * stride + }; + VkWriteDescriptorSet write{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = descriptor_sets[binding], + .dstBinding = 0, + .dstArrayElement = index, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .pBufferInfo = &info + }; + device.table().vkUpdateDescriptorSets(device.get(), 1, &write, 0, nullptr); + } }; #pragma region VKDescriptorStorage @@ -131,33 +188,36 @@ class VKDescriptorStorage : public wis::ImplVKDescriptorStorage public: /** * @brief Writes the sampler to the sampler descriptor storage. - * @param index Index in array of samplers to fill. + * @param set_index Index in storage sets, defined by the place in the binding array at the creation. + * @param binding Index in array of samplers to fill. * @param sampler The sampler to write. * */ - inline void WriteSampler(uint32_t index, wis::VKSamplerView sampler) noexcept + inline void WriteSampler(uint32_t set_index, uint32_t binding, wis::VKSamplerView sampler) noexcept { - wis::ImplVKDescriptorStorage::WriteSampler(index, std::move(sampler)); + wis::ImplVKDescriptorStorage::WriteSampler(set_index, binding, std::move(sampler)); } /** * @brief Writes the constant buffer to the constant buffer descriptor storage. - * @param index Index in array of constant buffers to fill. + * @param set_index Index in storage sets, defined by the place in the binding array at the creation. + * @param binding Index in array of constant buffers to fill. * @param buffer The buffer to write. * @param size The size of the constant buffer in bytes. * @param offset The offset in the buffer to write the constant buffer to. * size + offset must be less or equal the overall size of the bound buffer. * */ - inline void WriteConstantBuffer(uint32_t index, wis::VKBufferView buffer, uint32_t size, uint32_t offset = 0) noexcept + inline void WriteConstantBuffer(uint32_t set_index, uint32_t binding, wis::VKBufferView buffer, uint32_t size, uint32_t offset = 0) noexcept { - wis::ImplVKDescriptorStorage::WriteConstantBuffer(index, std::move(buffer), size, offset); + wis::ImplVKDescriptorStorage::WriteConstantBuffer(set_index, binding, std::move(buffer), size, offset); } /** * @brief Writes the texture to the shader resource descriptor storage. - * @param index Index in array of shader resources to fill. + * @param set_index Index in storage sets, defined by the place in the binding array at the creation. + * @param binding Index in array of shader resources to fill. * @param resource The shader resource to write. * */ - inline void WriteTexture(uint32_t index, wis::VKShaderResourceView resource) noexcept + inline void WriteTexture(uint32_t set_index, uint32_t binding, wis::VKShaderResourceView resource) noexcept { - wis::ImplVKDescriptorStorage::WriteTexture(index, std::move(resource)); + wis::ImplVKDescriptorStorage::WriteTexture(set_index, binding, std::move(resource)); } }; #pragma endregion VKDescriptorStorage diff --git a/wisdom/include/wisdom/vulkan/vk_device.h b/wisdom/include/wisdom/vulkan/vk_device.h index d4d0ac83..48294fbe 100644 --- a/wisdom/include/wisdom/vulkan/vk_device.h +++ b/wisdom/include/wisdom/vulkan/vk_device.h @@ -26,121 +26,6 @@ struct Internal { wis::shared_handle allocator; detail::QueueResidency queues; - struct DefaultLayout { - constexpr static std::array::max_sets> desc_types{ - VK_DESCRIPTOR_TYPE_SAMPLER, - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - }; - - std::array::max_sets> desc_sets{}; - - public: - bool operator==(const DefaultLayout& o) const noexcept - { - return std::memcmp(desc_sets.data(), o.desc_sets.data(), sizeof(desc_sets)) == 0; - } - bool Valid() const noexcept - { - return *this != DefaultLayout{}; - } - void Destroy(PFN_vkDestroyDescriptorSetLayout vkDestroyDescriptorSetLayout, VkDevice device) noexcept - { - if (Valid()) { - for (auto& set : desc_sets) { - vkDestroyDescriptorSetLayout(device, set, nullptr); - } - } - } - wis::Result Init(PFN_vkCreateDescriptorSetLayout vkCreateDescriptorSetLayout, VkDevice device) - { - constexpr static size_t num_sets = Internal::max_sets; - constexpr static VkDescriptorSetLayoutBinding bindings[num_sets]{ - { .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER, - .descriptorCount = 2048, - .stageFlags = VK_SHADER_STAGE_ALL }, - { .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .descriptorCount = 4096, - .stageFlags = VK_SHADER_STAGE_ALL }, - { .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .descriptorCount = 4096, - .stageFlags = VK_SHADER_STAGE_ALL }, - { .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .descriptorCount = 4096, - .stageFlags = VK_SHADER_STAGE_ALL }, - { .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 4096, - .stageFlags = VK_SHADER_STAGE_ALL }, - { .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 4096, - .stageFlags = VK_SHADER_STAGE_ALL }, - }; - - constexpr static VkDescriptorBindingFlags flags = VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT_EXT | VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT; - constexpr static VkDescriptorSetLayoutBindingFlagsCreateInfoEXT binding_flags_info{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO_EXT, - .pNext = nullptr, - .bindingCount = 1, - .pBindingFlags = &flags, - }; - - std::array desc_info{}; - for (size_t i = 0; i < num_sets; i++) { - desc_info[i] = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .pNext = &binding_flags_info, - .flags = 0, - .bindingCount = 1, - .pBindings = &bindings[i], - }; - } - for (size_t i = 0; i < num_sets; i++) { - auto res = vkCreateDescriptorSetLayout(device, &desc_info[i], nullptr, &desc_sets[i]); - if (!succeeded(res)) { - return wis::make_result(res); - } - } - return wis::success; - } - } default_layout; - -public: - Internal() noexcept = default; - Internal(Internal&&) noexcept = default; - Internal& operator=(Internal&& o) noexcept - { - if (this == &o) { - return *this; - } - if (device.get()) { - default_layout.Destroy(device.table().vkDestroyDescriptorSetLayout, device.get()); - } - - adapter = std::move(o.adapter); - device = std::move(o.device); - ext1 = std::move(o.ext1); - allocator = std::move(o.allocator); - queues = std::move(o.queues); - std::memcpy(&default_layout, &o.default_layout, sizeof(default_layout)); - std::memset(&o.default_layout, 0, sizeof(default_layout)); - return *this; - } - ~Internal() noexcept - { - if (device.get() && default_layout.Valid()) { - default_layout.Destroy(device.table().vkDestroyDescriptorSetLayout, device.get()); - } - } - public: auto& GetInstanceTable() const noexcept { @@ -161,11 +46,6 @@ struct Internal { { return device.GetDeviceProcAddr(name); } - - wis::Result InitDefaultLayout() noexcept - { - return default_layout.Init(device.table().vkCreateDescriptorSetLayout, device.get()); - } }; class ImplVKDevice : public QueryInternal @@ -204,6 +84,9 @@ class ImplVKDevice : public QueryInternal [[nodiscard]] WIS_INLINE wis::VKPipelineState CreateGraphicsPipeline(wis::Result& result, const wis::VKGraphicsPipelineDesc& desc) const noexcept; + [[nodiscard]] WIS_INLINE wis::VKPipelineState + CreateComputePipeline(wis::Result& result, const wis::VKComputePipelineDesc& desc) const noexcept; + [[nodiscard]] WIS_INLINE wis::VKShader CreateShader(wis::Result& result, void* bytecode, uint32_t size) const noexcept; @@ -219,31 +102,39 @@ class ImplVKDevice : public QueryInternal return CreateRenderTarget(result, texture, desc); } - [[nodiscard]] WIS_INLINE VKSampler + [[nodiscard]] WIS_INLINE wis::VKSampler CreateSampler(wis::Result& result, const wis::SamplerDesc& desc) const noexcept; - [[nodiscard]] WIS_INLINE VKShaderResource - CreateShaderResource(wis::Result& result, VKTextureView texture, wis::ShaderResourceDesc desc) const noexcept; + [[nodiscard]] WIS_INLINE wis::VKShaderResource + CreateShaderResource(wis::Result& result, wis::VKTextureView texture, const wis::ShaderResourceDesc& desc) const noexcept; + + [[nodiscard]] WIS_INLINE wis::VKUnorderedAccessTexture + CreateUnorderedAccessTexture(wis::Result& result, wis::VKTextureView texture, const wis::UnorderedAccessDesc& desc) const noexcept + { + wis::ShaderResourceDesc re_desc{ + .format = desc.format, + .view_type = desc.view_type, + .subresource_range = desc.subresource_range, + }; + return CreateShaderResource(result, texture, re_desc); + } [[nodiscard]] WIS_INLINE bool QueryFeatureSupport(wis::DeviceFeature feature) const noexcept; [[nodiscard]] WIS_INLINE wis::VKDescriptorStorage - CreateDescriptorStorage(wis::Result& result, const wis::DescriptorStorageDesc& desc) const noexcept; + CreateDescriptorStorage(wis::Result& result, + const wis::DescriptorBindingDesc* descriptor_bindings = nullptr, + uint32_t descriptor_bindings_count = 0, + wis::DescriptorMemory = wis::DescriptorMemory::ShaderVisible) const noexcept; [[nodiscard]] WIS_INLINE wis::VKRootSignature - CreateRootSignature(wis::Result& result, const PushConstant* constants = nullptr, - uint32_t constants_size = 0, - const PushDescriptor* push_descriptors = nullptr, - uint32_t push_descriptors_size = 0, - [[maybe_unused]] uint32_t space_overlap_count = 1) const noexcept; - - [[nodiscard]] WIS_INLINE wis::VKRootSignature - CreateRootSignature2(wis::Result& result, const wis::PushConstant* push_constants = nullptr, - uint32_t constants_count = 0, - const wis::PushDescriptor* push_descriptors = nullptr, - uint32_t push_descriptors_count = 0, - const wis::DescriptorSpacing* descriptor_spacing = nullptr) const noexcept; + CreateRootSignature(wis::Result& result, const wis::PushConstant* push_constants = nullptr, + uint32_t constants_count = 0, + const wis::PushDescriptor* push_descriptors = nullptr, + uint32_t push_descriptors_count = 0, + const wis::DescriptorBindingDesc* descriptor_bindings = nullptr, + uint32_t descriptor_bindings_count = 0) const noexcept; public: [[nodiscard]] WIS_INLINE wis::VKSwapChain @@ -358,74 +249,70 @@ class VKDevice : public wis::ImplVKDevice return wis::ResultValue{ &wis::ImplVKDevice::CreateGraphicsPipeline, this, desc }; } /** - * @brief Creates a root signature object for use with DescriptorStorage. - * @param push_constants The root constants to create the root signature with. - * @param constants_count The number of push constants. Max is 5. - * @param push_descriptors The root descriptors to create the root signature with. - * In shader will appear in order of submission. e.g. push_descriptors[5] is [[vk::binding(5,0)]] ... : register(b5/t5/u5) - * @param descriptors_count The number of push descriptors. Max is 8. - * @param space_overlap_count Count of descriptor spaces to overlap for each of the DescriptorStorage types. - * Default is 1. Max is 16. This is used primarily for descriptor type aliasing. - * Example: If wis::VKDevice is 2, that means that 2 descriptor spaces will be allocated for each descriptor type. - * [[vk::binding(0,0)]] SamplerState samplers: register(s0,space1); // space1 can be used for different type of samplers e.g. SamplerComparisonState - * [[vk::binding(0,0)]] SamplerComparisonState shadow_samplers: register(s0,space2); // they use the same binding (works like overloading) - * [[vk::binding(0,1)]] ConstantBuffer cbuffers: register(b0,space3); // this type also has 2 spaces, next will be on space 4 etc. - * @return wis::VKRootSignature on success (wis::Status::Ok). + * @brief Creates a compute pipeline state object. + * @param desc The description of the compute pipeline to create. + * @return wis::VKPipelineState on success (wis::Status::Ok). * */ - [[nodiscard]] inline wis::VKRootSignature CreateRootSignature(wis::Result& result, const wis::PushConstant* push_constants = nullptr, uint32_t constants_count = 0, const wis::PushDescriptor* push_descriptors = nullptr, uint32_t descriptors_count = 0, uint32_t space_overlap_count = 1) const noexcept + [[nodiscard]] inline wis::VKPipelineState CreateComputePipeline(wis::Result& result, const wis::VKComputePipelineDesc& desc) const noexcept { - return wis::ImplVKDevice::CreateRootSignature(result, push_constants, constants_count, push_descriptors, descriptors_count, space_overlap_count); + return wis::ImplVKDevice::CreateComputePipeline(result, desc); } /** - * @brief Creates a root signature object for use with DescriptorStorage. - * @param push_constants The root constants to create the root signature with. - * @param constants_count The number of push constants. Max is 5. - * @param push_descriptors The root descriptors to create the root signature with. - * In shader will appear in order of submission. e.g. push_descriptors[5] is [[vk::binding(5,0)]] ... : register(b5/t5/u5) - * @param descriptors_count The number of push descriptors. Max is 8. - * @param space_overlap_count Count of descriptor spaces to overlap for each of the DescriptorStorage types. - * Default is 1. Max is 16. This is used primarily for descriptor type aliasing. - * Example: If wis::VKDevice is 2, that means that 2 descriptor spaces will be allocated for each descriptor type. - * [[vk::binding(0,0)]] SamplerState samplers: register(s0,space1); // space1 can be used for different type of samplers e.g. SamplerComparisonState - * [[vk::binding(0,0)]] SamplerComparisonState shadow_samplers: register(s0,space2); // they use the same binding (works like overloading) - * [[vk::binding(0,1)]] ConstantBuffer cbuffers: register(b0,space3); // this type also has 2 spaces, next will be on space 4 etc. - * @return wis::VKRootSignature on success (wis::Status::Ok). + * @brief Creates a compute pipeline state object. + * @param desc The description of the compute pipeline to create. + * @return wis::VKPipelineState on success (wis::Status::Ok). * */ - [[nodiscard]] inline wis::ResultValue CreateRootSignature(const wis::PushConstant* push_constants = nullptr, uint32_t constants_count = 0, const wis::PushDescriptor* push_descriptors = nullptr, uint32_t descriptors_count = 0, uint32_t space_overlap_count = 1) const noexcept + [[nodiscard]] inline wis::ResultValue CreateComputePipeline(const wis::VKComputePipelineDesc& desc) const noexcept { - return wis::ResultValue{ &wis::ImplVKDevice::CreateRootSignature, this, push_constants, constants_count, push_descriptors, descriptors_count, space_overlap_count }; + return wis::ResultValue{ &wis::ImplVKDevice::CreateComputePipeline, this, desc }; } /** * @brief Creates a root signature object for use with DescriptorStorage. - * Supplies number of types for each descriptor type separately. + * DescriptorStorage is used for bindless and non-uniform bindings. Don't combine with Descriptor buffers, this may reduce performance. + * Push constants and push descriptors are used for fast changing data. + * Spaces may not overlap, but can be in any order. Push descriptors always have space0 and [[vk::binding(x,0)]]. + * That means that all the binding numbers are off by 1. Meaning that if you have Descriptor Storage with 1 binding, it will be [[vk::binding(0,1)]] + * even though it is supposed to be binding 0. This is done for consistency. + * Set number is the position of binding in bindings array. e.g. bindings[5] is set 5 and on HLSL side it is [[vk::binding(0,5)]]. + * For several overlapping types e.g. 2D and 3D textures, use different spaces. + * Those are specified in the bindings array. Space overlap count means how many consecutive spaces are used by the binding. * @param push_constants The root constants to create the root signature with. - * @param constants_count The number of push constants. Max is 5. + * @param push_constant_count The number of push constants. Max is 5. * @param push_descriptors The root descriptors to create the root signature with. - * In shader will appear in order of submission. e.g. root_descriptors[5] is [[vk::binding(5,0)]] ... : register(b5/t5/u5) - * @param push_descriptors_count The number of push descriptors. Max is 8. - * @param descriptor_spacing Descriptor spacing allocation. - * nullptr means allocate 1 space for each. + * In shader will appear in order of submission. e.g. push_descriptors[5] is [[vk::binding(5,0)]] ... : register(b5/t5/u5) + * @param push_descriptor_count The number of push descriptors. Max is 8. + * @param bindings The bindings to allocate. Order matters, binding count is ignored. + * One block of bindings can contain up to 4096 descriptors. For Sampler blocks, max amount of samplers across all bindings is 2048. + * @param binding_count Count of bindings to allocate. Max is 64 - push_constant_count - push_descriptor_count * 2. * @return wis::VKRootSignature on success (wis::Status::Ok). * */ - [[nodiscard]] inline wis::VKRootSignature CreateRootSignature2(wis::Result& result, const wis::PushConstant* push_constants = nullptr, uint32_t constants_count = 0, const wis::PushDescriptor* push_descriptors = nullptr, uint32_t push_descriptors_count = 0, const wis::DescriptorSpacing* descriptor_spacing = nullptr) const noexcept + [[nodiscard]] inline wis::VKRootSignature CreateRootSignature(wis::Result& result, const wis::PushConstant* push_constants = nullptr, uint32_t push_constant_count = 0, const wis::PushDescriptor* push_descriptors = nullptr, uint32_t push_descriptor_count = 0, const wis::DescriptorBindingDesc* bindings = nullptr, uint32_t binding_count = 0) const noexcept { - return wis::ImplVKDevice::CreateRootSignature2(result, push_constants, constants_count, push_descriptors, push_descriptors_count, descriptor_spacing); + return wis::ImplVKDevice::CreateRootSignature(result, push_constants, push_constant_count, push_descriptors, push_descriptor_count, bindings, binding_count); } /** * @brief Creates a root signature object for use with DescriptorStorage. - * Supplies number of types for each descriptor type separately. + * DescriptorStorage is used for bindless and non-uniform bindings. Don't combine with Descriptor buffers, this may reduce performance. + * Push constants and push descriptors are used for fast changing data. + * Spaces may not overlap, but can be in any order. Push descriptors always have space0 and [[vk::binding(x,0)]]. + * That means that all the binding numbers are off by 1. Meaning that if you have Descriptor Storage with 1 binding, it will be [[vk::binding(0,1)]] + * even though it is supposed to be binding 0. This is done for consistency. + * Set number is the position of binding in bindings array. e.g. bindings[5] is set 5 and on HLSL side it is [[vk::binding(0,5)]]. + * For several overlapping types e.g. 2D and 3D textures, use different spaces. + * Those are specified in the bindings array. Space overlap count means how many consecutive spaces are used by the binding. * @param push_constants The root constants to create the root signature with. - * @param constants_count The number of push constants. Max is 5. + * @param push_constant_count The number of push constants. Max is 5. * @param push_descriptors The root descriptors to create the root signature with. - * In shader will appear in order of submission. e.g. root_descriptors[5] is [[vk::binding(5,0)]] ... : register(b5/t5/u5) - * @param push_descriptors_count The number of push descriptors. Max is 8. - * @param descriptor_spacing Descriptor spacing allocation. - * nullptr means allocate 1 space for each. + * In shader will appear in order of submission. e.g. push_descriptors[5] is [[vk::binding(5,0)]] ... : register(b5/t5/u5) + * @param push_descriptor_count The number of push descriptors. Max is 8. + * @param bindings The bindings to allocate. Order matters, binding count is ignored. + * One block of bindings can contain up to 4096 descriptors. For Sampler blocks, max amount of samplers across all bindings is 2048. + * @param binding_count Count of bindings to allocate. Max is 64 - push_constant_count - push_descriptor_count * 2. * @return wis::VKRootSignature on success (wis::Status::Ok). * */ - [[nodiscard]] inline wis::ResultValue CreateRootSignature2(const wis::PushConstant* push_constants = nullptr, uint32_t constants_count = 0, const wis::PushDescriptor* push_descriptors = nullptr, uint32_t push_descriptors_count = 0, const wis::DescriptorSpacing* descriptor_spacing = nullptr) const noexcept + [[nodiscard]] inline wis::ResultValue CreateRootSignature(const wis::PushConstant* push_constants = nullptr, uint32_t push_constant_count = 0, const wis::PushDescriptor* push_descriptors = nullptr, uint32_t push_descriptor_count = 0, const wis::DescriptorBindingDesc* bindings = nullptr, uint32_t binding_count = 0) const noexcept { - return wis::ResultValue{ &wis::ImplVKDevice::CreateRootSignature2, this, push_constants, constants_count, push_descriptors, push_descriptors_count, descriptor_spacing }; + return wis::ResultValue{ &wis::ImplVKDevice::CreateRootSignature, this, push_constants, push_constant_count, push_descriptors, push_descriptor_count, bindings, binding_count }; } /** * @brief Creates a shader object. @@ -553,22 +440,26 @@ class VKDevice : public wis::ImplVKDevice /** * @brief Creates a descriptor storage object with specified number of bindings to allocate. * Switching between several DescriptorStorage is slow, consider allocating one big set and copy descriptors to it. - * @param desc The description of the descriptor storage to create. + * @param bindings The bindings to allocate. Space and space overlap counts are ignored. + * @param bindings_count The number of bindings to allocate. + * @param memory The memory to allocate the descriptors in. * @return wis::VKDescriptorStorage on success (wis::Status::Ok). * */ - [[nodiscard]] inline wis::VKDescriptorStorage CreateDescriptorStorage(wis::Result& result, const wis::DescriptorStorageDesc& desc) const noexcept + [[nodiscard]] inline wis::VKDescriptorStorage CreateDescriptorStorage(wis::Result& result, const wis::DescriptorBindingDesc* bindings, uint32_t bindings_count, wis::DescriptorMemory memory = wis::DescriptorMemory::ShaderVisible) const noexcept { - return wis::ImplVKDevice::CreateDescriptorStorage(result, desc); + return wis::ImplVKDevice::CreateDescriptorStorage(result, bindings, bindings_count, memory); } /** * @brief Creates a descriptor storage object with specified number of bindings to allocate. * Switching between several DescriptorStorage is slow, consider allocating one big set and copy descriptors to it. - * @param desc The description of the descriptor storage to create. + * @param bindings The bindings to allocate. Space and space overlap counts are ignored. + * @param bindings_count The number of bindings to allocate. + * @param memory The memory to allocate the descriptors in. * @return wis::VKDescriptorStorage on success (wis::Status::Ok). * */ - [[nodiscard]] inline wis::ResultValue CreateDescriptorStorage(const wis::DescriptorStorageDesc& desc) const noexcept + [[nodiscard]] inline wis::ResultValue CreateDescriptorStorage(const wis::DescriptorBindingDesc* bindings, uint32_t bindings_count, wis::DescriptorMemory memory = wis::DescriptorMemory::ShaderVisible) const noexcept { - return wis::ResultValue{ &wis::ImplVKDevice::CreateDescriptorStorage, this, desc }; + return wis::ResultValue{ &wis::ImplVKDevice::CreateDescriptorStorage, this, bindings, bindings_count, memory }; } /** * @brief Queries if the device supports the feature. diff --git a/wisdom/include/wisdom/vulkan/vk_device_ext.h b/wisdom/include/wisdom/vulkan/vk_device_ext.h index 3d563fe9..f23ae529 100644 --- a/wisdom/include/wisdom/vulkan/vk_device_ext.h +++ b/wisdom/include/wisdom/vulkan/vk_device_ext.h @@ -58,6 +58,9 @@ struct XInternalFeatures { bool dynamic_vsync : 1 = false; // Shall remain false until the perf is not fixed bool dynamic_render_unused_attachments : 1 = false; bool multiview : 1 = false; + + // Dynamically enabled features from the extensions + bool raytracing : 1 = false; }; struct XBaseProperties { @@ -170,6 +173,8 @@ struct VKDeviceExtensionEmbedded1 : public QueryInternalExtension(structure_map.at(VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES)); features.multiview = vk_11_features.multiview; + + features.raytracing = structure_map.contains(VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_FEATURES_KHR); return {}; } diff --git a/wisdom/include/wisdom/vulkan/vk_resource.h b/wisdom/include/wisdom/vulkan/vk_resource.h index 62347c4e..bcc04eba 100644 --- a/wisdom/include/wisdom/vulkan/vk_resource.h +++ b/wisdom/include/wisdom/vulkan/vk_resource.h @@ -66,6 +66,17 @@ class ImplVKBuffer : public QueryInternal { memory.VKUnmap(); } + + [[nodiscard]] + uint64_t GetGPUAddress() const noexcept + { + auto& device = memory.GetInternal().allocator.header(); + VkBufferDeviceAddressInfo info{ + .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, + .buffer = buffer + }; + return device.table().vkGetBufferDeviceAddress(device.get(), &info); + } }; template<> @@ -186,6 +197,8 @@ class VKShaderResource : public QueryInternal } }; +using VKUnorderedAccessTexture = VKShaderResource; + #pragma region VKBuffer /** * @brief Represents buffer object for storing linear data. @@ -211,6 +224,14 @@ class VKBuffer : public wis::ImplVKBuffer { wis::ImplVKBuffer::Unmap(); } + /** + * @brief Returns the address of the resource in GPU memory. + * @return The address of the resource in GPU memory. + * */ + inline uint64_t GetGPUAddress() const noexcept + { + return wis::ImplVKBuffer::GetGPUAddress(); + } }; #pragma endregion VKBuffer diff --git a/wisdom/include/wisdom/vulkan/vk_views.h b/wisdom/include/wisdom/vulkan/vk_views.h index 2223021e..8923fd56 100644 --- a/wisdom/include/wisdom/vulkan/vk_views.h +++ b/wisdom/include/wisdom/vulkan/vk_views.h @@ -22,9 +22,11 @@ using VKBufferView = std::tuple; using VKRenderTargetView = std::tuple; using VKSamplerView = std::tuple; using VKShaderResourceView = std::tuple; +using VKUnorderedAccessTextureView = std::tuple; using VKMemoryView = std::tuple; using VKPipelineView = std::tuple; -using VKDescriptorStorageView = std::tuple>; +using VKDescriptorStorageView = std::tuple>; +using VKAcceleratedGeometryDesc = std::pair; } // namespace wis #endif // VK_VIEWS_H diff --git a/wisdom/include/wisdom/wisdom.hpp b/wisdom/include/wisdom/wisdom.hpp index 1bd4f2ec..8f1e67eb 100644 --- a/wisdom/include/wisdom/wisdom.hpp +++ b/wisdom/include/wisdom/wisdom.hpp @@ -49,6 +49,7 @@ using DebugMessenger = DX12DebugMessenger; using RenderTarget = DX12RenderTarget; using Sampler = DX12Sampler; using ShaderResource = DX12ShaderResource; +using UnorderedAccessTexture = DX12UnorderedAccessTexture; //------------------------------------------------------------------------- @@ -119,10 +120,15 @@ using MemoryView = DX12MemoryView; using PipelineView = DX12PipelineView; using SamplerView = DX12SamplerView; using ShaderResourceView = DX12ShaderResourceView; +using AcceleratedGeometryDesc = DX12AcceleratedGeometryDesc; +using DescriptorStorageView = DX12DescriptorStorageView; +using BottomLevelASBuildDesc = DX12BottomLevelASBuildDesc; using BufferBarrier2 = DX12BufferBarrier2; using TextureBarrier2 = DX12TextureBarrier2; using GraphicsShaderStages = DX12GraphicsShaderStages; +using RaytracingPipeineDesc = DX12RaytracingPipeineDesc; using GraphicsPipelineDesc = DX12GraphicsPipelineDesc; +using ComputePipelineDesc = DX12ComputePipelineDesc; using RenderPassRenderTargetDesc = DX12RenderPassRenderTargetDesc; using RenderPassDepthStencilDesc = DX12RenderPassDepthStencilDesc; using RenderPassDesc = DX12RenderPassDesc; @@ -157,6 +163,7 @@ using DebugMessenger = VKDebugMessenger; using RenderTarget = VKRenderTarget; using Sampler = VKSampler; using ShaderResource = VKShaderResource; +using UnorderedAccessTexture = VKUnorderedAccessTexture; //------------------------------------------------------------------------- @@ -227,10 +234,15 @@ using MemoryView = VKMemoryView; using PipelineView = VKPipelineView; using SamplerView = VKSamplerView; using ShaderResourceView = VKShaderResourceView; +using AcceleratedGeometryDesc = VKAcceleratedGeometryDesc; +using DescriptorStorageView = VKDescriptorStorageView; +using BottomLevelASBuildDesc = VKBottomLevelASBuildDesc; using BufferBarrier2 = VKBufferBarrier2; using TextureBarrier2 = VKTextureBarrier2; using GraphicsShaderStages = VKGraphicsShaderStages; +using RaytracingPipeineDesc = VKRaytracingPipeineDesc; using GraphicsPipelineDesc = VKGraphicsPipelineDesc; +using ComputePipelineDesc = VKComputePipelineDesc; using RenderPassRenderTargetDesc = VKRenderPassRenderTargetDesc; using RenderPassDepthStencilDesc = VKRenderPassDepthStencilDesc; using RenderPassDesc = VKRenderPassDesc; diff --git a/xml/enums.xml b/xml/enums.xml index 8232d419..e6ae11f2 100644 --- a/xml/enums.xml +++ b/xml/enums.xml @@ -91,6 +91,41 @@ Stages have no granularity, either all or one can be selected."> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -124,6 +159,10 @@ May be bigger than constant buffers, but slower."> + + + + @@ -826,7 +865,8 @@ Triangle culling depends on {WindingOrder::} option."> - + @@ -1189,20 +1229,22 @@ Platform extension values start from 2049 - - - - - - - - - + + + + + + + + + + + + + + + + @@ -1232,12 +1274,41 @@ Used for color blending operations."> + + - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - + @@ -1512,5 +1583,68 @@ Do not mix DescriptorBuffer and non-DescriptorBuffer pipelines."> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/xml/structs.xml b/xml/structs.xml index 3809feac..4ac46159 100644 --- a/xml/structs.xml +++ b/xml/structs.xml @@ -57,6 +57,9 @@ + + + @@ -336,6 +339,12 @@ Zero initialized range means all subresources are selected."/> + + + + + + - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -402,6 +487,23 @@ Used for RootSignature."> + + + + + + + + + + + + + + + @@ -419,6 +521,14 @@ Default is 0. 0 means regular rendering."/> + + + + + + + + diff --git a/xml/wis.xml b/xml/wis.xml index 1cdff94c..88aa2806 100644 --- a/xml/wis.xml +++ b/xml/wis.xml @@ -83,6 +83,7 @@ Used for bindless and non-uniform bindings. Don't combine with Descriptor buffer + @@ -148,31 +149,29 @@ Default is {MutiWaitFlags::All}"/> - + + + + + + - + - - - - - - - - - - - + + + @@ -220,7 +219,9 @@ Does not work with 3D textures."/> - + + + @@ -369,6 +370,17 @@ Note, the resulting Texture must be destroyed before Memory backing it up."> + + + + + + + + + + + @@ -405,6 +417,13 @@ Note, the resulting Texture must be destroyed before Memory backing it up."> + + + + + + @@ -469,6 +488,12 @@ Otherwise the first is chosen."> + + + + + + @@ -476,6 +501,12 @@ Otherwise the first is chosen."> + + + + + + @@ -487,7 +518,31 @@ Buffer is always bound with full size."> - + + + + + + + + + + + + + + + + + + + + + + " @@ -545,6 +600,9 @@ Can still be enqueued after the signal."> + + + @@ -607,14 +665,16 @@ If NULL, returns the amount of images swapchain has."/> - + + - + + @@ -624,7 +684,8 @@ size + offset must be less or equal the overall size of the bound buffer."/> - + +