Skip to content

Commit

Permalink
wgpu work (#54)
Browse files Browse the repository at this point in the history
* use wgpuQueueWriteBuffer for uploading initial buffer data

* request device with available limits/features

* add support for uploading initial texture data

* add missing queue releases

* support wgpu sampler comparison mode
  • Loading branch information
skallweitNV authored Sep 26, 2024
1 parent 6088af7 commit 88455c6
Show file tree
Hide file tree
Showing 5 changed files with 107 additions and 61 deletions.
59 changes: 2 additions & 57 deletions src/wgpu/wgpu-buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,64 +115,9 @@ Result DeviceImpl::createBuffer(const BufferDesc& desc, const void* initData, IB

if (initData)
{
WGPUBufferDescriptor stagingBufferDesc = {};
stagingBufferDesc.size = desc.size;
stagingBufferDesc.usage = WGPUBufferUsage_CopySrc | WGPUBufferUsage_MapWrite;
WGPUBuffer stagingBuffer = m_ctx.api.wgpuDeviceCreateBuffer(m_ctx.device, &stagingBufferDesc);
if (!stagingBuffer)
{
return SLANG_FAIL;
}
SLANG_RHI_DEFERRED({ m_ctx.api.wgpuBufferRelease(stagingBuffer); });

// Map the staging buffer
// TODO: we should switch to the new async API
{
WGPUBufferMapAsyncStatus status = WGPUBufferMapAsyncStatus_Unknown;
m_ctx.api.wgpuBufferMapAsync(
stagingBuffer,
WGPUMapMode_Write,
0,
desc.size,
[](WGPUBufferMapAsyncStatus status, void* userdata) { *(WGPUBufferMapAsyncStatus*)userdata = status; },
&status
);
while (status == WGPUBufferMapAsyncStatus_Unknown)
{
m_ctx.api.wgpuDeviceTick(m_ctx.device);
}
if (status != WGPUBufferMapAsyncStatus_Success)
{
return SLANG_FAIL;
}
}

void* data = m_ctx.api.wgpuBufferGetMappedRange(stagingBuffer, 0, desc.size);
if (!data)
{
m_ctx.api.wgpuBufferUnmap(stagingBuffer);
return SLANG_FAIL;
}
::memcpy(data, initData, desc.size);
m_ctx.api.wgpuBufferUnmap(stagingBuffer);

WGPUCommandEncoder encoder = m_ctx.api.wgpuDeviceCreateCommandEncoder(m_ctx.device, nullptr);
if (!encoder)
{
return SLANG_FAIL;
}
SLANG_RHI_DEFERRED({ m_ctx.api.wgpuCommandEncoderRelease(encoder); });

m_ctx.api.wgpuCommandEncoderCopyBufferToBuffer(encoder, stagingBuffer, 0, buffer->m_buffer, 0, desc.size);
WGPUCommandBuffer commandBuffer = m_ctx.api.wgpuCommandEncoderFinish(encoder, nullptr);
if (!commandBuffer)
{
return SLANG_FAIL;
}
SLANG_RHI_DEFERRED({ m_ctx.api.wgpuCommandBufferRelease(commandBuffer); });

WGPUQueue queue = m_ctx.api.wgpuDeviceGetQueue(m_ctx.device);
m_ctx.api.wgpuQueueSubmit(queue, 1, &commandBuffer);
m_ctx.api.wgpuQueueWriteBuffer(queue, buffer->m_buffer, 0, initData, desc.size);
SLANG_RHI_DEFERRED({ m_ctx.api.wgpuQueueRelease(queue); });

// Wait for the command buffer to finish executing
// TODO: we should switch to the new async API
Expand Down
31 changes: 28 additions & 3 deletions src/wgpu/wgpu-device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,15 @@ Result DeviceImpl::initialize(const Desc& desc)
return SLANG_FAIL;
}

// Query adapter limits.
WGPUSupportedLimits adapterLimits = {};
api.wgpuAdapterGetLimits(m_ctx.adapter, &adapterLimits);

// Query adapter features.
size_t adapterFeatureCount = api.wgpuAdapterEnumerateFeatures(m_ctx.adapter, nullptr);
std::vector<WGPUFeatureName> adapterFeatures(adapterFeatureCount);
api.wgpuAdapterEnumerateFeatures(m_ctx.adapter, adapterFeatures.data());

auto requestDeviceCallback =
[](WGPURequestDeviceStatus status, WGPUDevice device, char const* message, void* userdata)
{
Expand All @@ -101,7 +110,13 @@ Result DeviceImpl::initialize(const Desc& desc)
}
};

// We request a device with the maximum available limits and feature set.
WGPURequiredLimits requiredLimits = {};
requiredLimits.limits = adapterLimits.limits;
WGPUDeviceDescriptor deviceDesc = {};
deviceDesc.requiredFeatures = adapterFeatures.data();
deviceDesc.requiredFeatureCount = adapterFeatures.size();
deviceDesc.requiredLimits = &requiredLimits;
deviceDesc.uncapturedErrorCallbackInfo.callback = errorCallback;
deviceDesc.uncapturedErrorCallbackInfo.userdata = this;
api.wgpuAdapterRequestDevice(m_ctx.adapter, &deviceDesc, requestDeviceCallback, &m_ctx);
Expand All @@ -110,10 +125,18 @@ Result DeviceImpl::initialize(const Desc& desc)
return SLANG_FAIL;
}

WGPUSupportedLimits limits = {};
api.wgpuDeviceGetLimits(m_ctx.device, &limits);
// Query device limits.
WGPUSupportedLimits supportedLimits = {};
api.wgpuDeviceGetLimits(m_ctx.device, &supportedLimits);
m_ctx.limits = supportedLimits.limits;

m_info.limits.maxComputeDispatchThreadGroups[0] = m_ctx.limits.maxComputeWorkgroupSizeX;

m_info.limits.maxComputeDispatchThreadGroups[0] = limits.limits.maxComputeWorkgroupSizeX;
// Query device features.
size_t deviceFeatureCount = api.wgpuDeviceEnumerateFeatures(m_ctx.device, nullptr);
std::vector<WGPUFeatureName> deviceFeatures(deviceFeatureCount);
api.wgpuDeviceEnumerateFeatures(m_ctx.device, deviceFeatures.data());
m_ctx.features.insert(deviceFeatures.begin(), deviceFeatures.end());

return SLANG_OK;
}
Expand Down Expand Up @@ -196,6 +219,7 @@ Result DeviceImpl::readTexture(
SLANG_RHI_DEFERRED({ m_ctx.api.wgpuCommandBufferRelease(commandBuffer); });

WGPUQueue queue = m_ctx.api.wgpuDeviceGetQueue(m_ctx.device);
SLANG_RHI_DEFERRED({ m_ctx.api.wgpuQueueRelease(queue); });
m_ctx.api.wgpuQueueSubmit(queue, 1, &commandBuffer);

// Wait for the command buffer to finish executing
Expand Down Expand Up @@ -283,6 +307,7 @@ Result DeviceImpl::readBuffer(IBuffer* buffer, Offset offset, Size size, ISlangB
SLANG_RHI_DEFERRED({ m_ctx.api.wgpuCommandBufferRelease(commandBuffer); });

WGPUQueue queue = m_ctx.api.wgpuDeviceGetQueue(m_ctx.device);
SLANG_RHI_DEFERRED({ m_ctx.api.wgpuQueueRelease(queue); });
m_ctx.api.wgpuQueueSubmit(queue, 1, &commandBuffer);

// Wait for the command buffer to finish executing
Expand Down
4 changes: 4 additions & 0 deletions src/wgpu/wgpu-device.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

#include "wgpu-base.h"

#include <unordered_set>

namespace rhi::wgpu {

struct Context
Expand All @@ -10,6 +12,8 @@ struct Context
WGPUInstance instance = nullptr;
WGPUAdapter adapter = nullptr;
WGPUDevice device = nullptr;
WGPULimits limits = {};
std::unordered_set<WGPUFeatureName> features;

~Context();
};
Expand Down
5 changes: 4 additions & 1 deletion src/wgpu/wgpu-sampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@ Result DeviceImpl::createSampler(SamplerDesc const& desc, ISampler** outSampler)
samplerDesc.mipmapFilter = translateMipmapFilterMode(desc.mipFilter);
samplerDesc.lodMinClamp = desc.minLOD;
samplerDesc.lodMaxClamp = desc.maxLOD;
samplerDesc.compare = translateCompareFunction(desc.comparisonFunc);
if (desc.reductionOp == TextureReductionOp::Comparison)
{
samplerDesc.compare = translateCompareFunction(desc.comparisonFunc);
}
samplerDesc.maxAnisotropy = desc.maxAnisotropy;
samplerDesc.label = desc.label;
sampler->m_sampler = m_ctx.api.wgpuDeviceCreateSampler(m_ctx.device, &samplerDesc);
Expand Down
69 changes: 69 additions & 0 deletions src/wgpu/wgpu-texture.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
#include "wgpu-device.h"
#include "wgpu-util.h"

#include "core/deferred.h"

namespace rhi::wgpu {

TextureImpl::TextureImpl(DeviceImpl* device, const TextureDesc& desc)
Expand Down Expand Up @@ -41,6 +43,10 @@ Result DeviceImpl::createTexture(const TextureDesc& desc_, const SubresourceData
textureDesc.size.height = desc.size.height;
textureDesc.size.depthOrArrayLayers = desc.size.depth;
textureDesc.usage = translateTextureUsage(desc.usage);
if (initData)
{
textureDesc.usage |= WGPUTextureUsage_CopyDst;
}
textureDesc.dimension = translateTextureDimension(desc.type);
textureDesc.format = translateTextureFormat(desc.format);
textureDesc.mipLevelCount = desc.numMipLevels;
Expand All @@ -51,6 +57,69 @@ Result DeviceImpl::createTexture(const TextureDesc& desc_, const SubresourceData
{
return SLANG_FAIL;
}

if (initData)
{
FormatInfo formatInfo;
rhiGetFormatInfo(desc.format, &formatInfo);

WGPUQueue queue = m_ctx.api.wgpuDeviceGetQueue(m_ctx.device);
SLANG_RHI_DEFERRED({ m_ctx.api.wgpuQueueRelease(queue); });
int mipLevelCount = desc.numMipLevels;
int arrayLayerCount = desc.arrayLength * (desc.type == TextureType::TextureCube ? 6 : 1);

for (int arrayLayer = 0; arrayLayer < arrayLayerCount; ++arrayLayer)
{
for (int mipLevel = 0; mipLevel < mipLevelCount; ++mipLevel)
{
Extents mipSize = calcMipSize(desc.size, mipLevel);
int subresourceIndex = arrayLayer * mipLevelCount + mipLevel;
const SubresourceData& data = initData[subresourceIndex];

WGPUImageCopyTexture imageCopyTexture = {};
imageCopyTexture.texture = texture->m_texture;
imageCopyTexture.mipLevel = mipLevel;
imageCopyTexture.origin = {0, 0, 0};
imageCopyTexture.aspect = WGPUTextureAspect_All;

WGPUExtent3D writeSize = {};
writeSize.width =
((mipSize.width + formatInfo.blockWidth - 1) / formatInfo.blockWidth) * formatInfo.blockWidth;
writeSize.height =
((mipSize.height + formatInfo.blockHeight - 1) / formatInfo.blockHeight) * formatInfo.blockHeight;
writeSize.depthOrArrayLayers = mipSize.depth;

WGPUTextureDataLayout dataLayout = {};
dataLayout.offset = 0;
dataLayout.bytesPerRow = data.strideY;
dataLayout.rowsPerImage = writeSize.height / formatInfo.blockHeight;

size_t dataSize = dataLayout.bytesPerRow * dataLayout.rowsPerImage * mipSize.depth;

m_ctx.api.wgpuQueueWriteTexture(queue, &imageCopyTexture, data.data, dataSize, &dataLayout, &writeSize);
}
}

// Wait for queue to finish.
// TODO: we should switch to the new async API
{
WGPUQueueWorkDoneStatus status = WGPUQueueWorkDoneStatus_Unknown;
m_ctx.api.wgpuQueueOnSubmittedWorkDone(
queue,
[](WGPUQueueWorkDoneStatus status, void* userdata) { *(WGPUQueueWorkDoneStatus*)userdata = status; },
&status
);
while (status == WGPUQueueWorkDoneStatus_Unknown)
{
m_ctx.api.wgpuDeviceTick(m_ctx.device);
}
if (status != WGPUQueueWorkDoneStatus_Success)
{
return SLANG_FAIL;
}
}
}

returnComPtr(outTexture, texture);
return SLANG_OK;
}
Expand Down

0 comments on commit 88455c6

Please sign in to comment.