diff --git a/src/wgpu/wgpu-buffer.cpp b/src/wgpu/wgpu-buffer.cpp index 7d2564cb..87b326bf 100644 --- a/src/wgpu/wgpu-buffer.cpp +++ b/src/wgpu/wgpu-buffer.cpp @@ -115,64 +115,9 @@ Result DeviceImpl::createBuffer(const BufferDesc& desc, const void* initData, IB if (initData) { - WGPUBufferDescriptor stagingBufferDesc = {}; - stagingBufferDesc.size = desc.size; - stagingBufferDesc.usage = WGPUBufferUsage_CopySrc | WGPUBufferUsage_MapWrite; - WGPUBuffer stagingBuffer = m_ctx.api.wgpuDeviceCreateBuffer(m_ctx.device, &stagingBufferDesc); - if (!stagingBuffer) - { - return SLANG_FAIL; - } - SLANG_RHI_DEFERRED({ m_ctx.api.wgpuBufferRelease(stagingBuffer); }); - - // Map the staging buffer - // TODO: we should switch to the new async API - { - WGPUBufferMapAsyncStatus status = WGPUBufferMapAsyncStatus_Unknown; - m_ctx.api.wgpuBufferMapAsync( - stagingBuffer, - WGPUMapMode_Write, - 0, - desc.size, - [](WGPUBufferMapAsyncStatus status, void* userdata) { *(WGPUBufferMapAsyncStatus*)userdata = status; }, - &status - ); - while (status == WGPUBufferMapAsyncStatus_Unknown) - { - m_ctx.api.wgpuDeviceTick(m_ctx.device); - } - if (status != WGPUBufferMapAsyncStatus_Success) - { - return SLANG_FAIL; - } - } - - void* data = m_ctx.api.wgpuBufferGetMappedRange(stagingBuffer, 0, desc.size); - if (!data) - { - m_ctx.api.wgpuBufferUnmap(stagingBuffer); - return SLANG_FAIL; - } - ::memcpy(data, initData, desc.size); - m_ctx.api.wgpuBufferUnmap(stagingBuffer); - - WGPUCommandEncoder encoder = m_ctx.api.wgpuDeviceCreateCommandEncoder(m_ctx.device, nullptr); - if (!encoder) - { - return SLANG_FAIL; - } - SLANG_RHI_DEFERRED({ m_ctx.api.wgpuCommandEncoderRelease(encoder); }); - - m_ctx.api.wgpuCommandEncoderCopyBufferToBuffer(encoder, stagingBuffer, 0, buffer->m_buffer, 0, desc.size); - WGPUCommandBuffer commandBuffer = m_ctx.api.wgpuCommandEncoderFinish(encoder, nullptr); - if (!commandBuffer) - { - return SLANG_FAIL; - } - SLANG_RHI_DEFERRED({ m_ctx.api.wgpuCommandBufferRelease(commandBuffer); }); - WGPUQueue queue = m_ctx.api.wgpuDeviceGetQueue(m_ctx.device); - m_ctx.api.wgpuQueueSubmit(queue, 1, &commandBuffer); + m_ctx.api.wgpuQueueWriteBuffer(queue, buffer->m_buffer, 0, initData, desc.size); + SLANG_RHI_DEFERRED({ m_ctx.api.wgpuQueueRelease(queue); }); // Wait for the command buffer to finish executing // TODO: we should switch to the new async API diff --git a/src/wgpu/wgpu-device.cpp b/src/wgpu/wgpu-device.cpp index e761f250..61305238 100644 --- a/src/wgpu/wgpu-device.cpp +++ b/src/wgpu/wgpu-device.cpp @@ -91,6 +91,15 @@ Result DeviceImpl::initialize(const Desc& desc) return SLANG_FAIL; } + // Query adapter limits. + WGPUSupportedLimits adapterLimits = {}; + api.wgpuAdapterGetLimits(m_ctx.adapter, &adapterLimits); + + // Query adapter features. + size_t adapterFeatureCount = api.wgpuAdapterEnumerateFeatures(m_ctx.adapter, nullptr); + std::vector adapterFeatures(adapterFeatureCount); + api.wgpuAdapterEnumerateFeatures(m_ctx.adapter, adapterFeatures.data()); + auto requestDeviceCallback = [](WGPURequestDeviceStatus status, WGPUDevice device, char const* message, void* userdata) { @@ -101,7 +110,13 @@ Result DeviceImpl::initialize(const Desc& desc) } }; + // We request a device with the maximum available limits and feature set. + WGPURequiredLimits requiredLimits = {}; + requiredLimits.limits = adapterLimits.limits; WGPUDeviceDescriptor deviceDesc = {}; + deviceDesc.requiredFeatures = adapterFeatures.data(); + deviceDesc.requiredFeatureCount = adapterFeatures.size(); + deviceDesc.requiredLimits = &requiredLimits; deviceDesc.uncapturedErrorCallbackInfo.callback = errorCallback; deviceDesc.uncapturedErrorCallbackInfo.userdata = this; api.wgpuAdapterRequestDevice(m_ctx.adapter, &deviceDesc, requestDeviceCallback, &m_ctx); @@ -110,10 +125,18 @@ Result DeviceImpl::initialize(const Desc& desc) return SLANG_FAIL; } - WGPUSupportedLimits limits = {}; - api.wgpuDeviceGetLimits(m_ctx.device, &limits); + // Query device limits. + WGPUSupportedLimits supportedLimits = {}; + api.wgpuDeviceGetLimits(m_ctx.device, &supportedLimits); + m_ctx.limits = supportedLimits.limits; + + m_info.limits.maxComputeDispatchThreadGroups[0] = m_ctx.limits.maxComputeWorkgroupSizeX; - m_info.limits.maxComputeDispatchThreadGroups[0] = limits.limits.maxComputeWorkgroupSizeX; + // Query device features. + size_t deviceFeatureCount = api.wgpuDeviceEnumerateFeatures(m_ctx.device, nullptr); + std::vector deviceFeatures(deviceFeatureCount); + api.wgpuDeviceEnumerateFeatures(m_ctx.device, deviceFeatures.data()); + m_ctx.features.insert(deviceFeatures.begin(), deviceFeatures.end()); return SLANG_OK; } @@ -196,6 +219,7 @@ Result DeviceImpl::readTexture( SLANG_RHI_DEFERRED({ m_ctx.api.wgpuCommandBufferRelease(commandBuffer); }); WGPUQueue queue = m_ctx.api.wgpuDeviceGetQueue(m_ctx.device); + SLANG_RHI_DEFERRED({ m_ctx.api.wgpuQueueRelease(queue); }); m_ctx.api.wgpuQueueSubmit(queue, 1, &commandBuffer); // Wait for the command buffer to finish executing @@ -283,6 +307,7 @@ Result DeviceImpl::readBuffer(IBuffer* buffer, Offset offset, Size size, ISlangB SLANG_RHI_DEFERRED({ m_ctx.api.wgpuCommandBufferRelease(commandBuffer); }); WGPUQueue queue = m_ctx.api.wgpuDeviceGetQueue(m_ctx.device); + SLANG_RHI_DEFERRED({ m_ctx.api.wgpuQueueRelease(queue); }); m_ctx.api.wgpuQueueSubmit(queue, 1, &commandBuffer); // Wait for the command buffer to finish executing diff --git a/src/wgpu/wgpu-device.h b/src/wgpu/wgpu-device.h index 7a878aa2..f4c67400 100644 --- a/src/wgpu/wgpu-device.h +++ b/src/wgpu/wgpu-device.h @@ -2,6 +2,8 @@ #include "wgpu-base.h" +#include + namespace rhi::wgpu { struct Context @@ -10,6 +12,8 @@ struct Context WGPUInstance instance = nullptr; WGPUAdapter adapter = nullptr; WGPUDevice device = nullptr; + WGPULimits limits = {}; + std::unordered_set features; ~Context(); }; diff --git a/src/wgpu/wgpu-sampler.cpp b/src/wgpu/wgpu-sampler.cpp index 49ed1965..a443fcc5 100644 --- a/src/wgpu/wgpu-sampler.cpp +++ b/src/wgpu/wgpu-sampler.cpp @@ -37,7 +37,10 @@ Result DeviceImpl::createSampler(SamplerDesc const& desc, ISampler** outSampler) samplerDesc.mipmapFilter = translateMipmapFilterMode(desc.mipFilter); samplerDesc.lodMinClamp = desc.minLOD; samplerDesc.lodMaxClamp = desc.maxLOD; - samplerDesc.compare = translateCompareFunction(desc.comparisonFunc); + if (desc.reductionOp == TextureReductionOp::Comparison) + { + samplerDesc.compare = translateCompareFunction(desc.comparisonFunc); + } samplerDesc.maxAnisotropy = desc.maxAnisotropy; samplerDesc.label = desc.label; sampler->m_sampler = m_ctx.api.wgpuDeviceCreateSampler(m_ctx.device, &samplerDesc); diff --git a/src/wgpu/wgpu-texture.cpp b/src/wgpu/wgpu-texture.cpp index 621732da..66d5104e 100644 --- a/src/wgpu/wgpu-texture.cpp +++ b/src/wgpu/wgpu-texture.cpp @@ -2,6 +2,8 @@ #include "wgpu-device.h" #include "wgpu-util.h" +#include "core/deferred.h" + namespace rhi::wgpu { TextureImpl::TextureImpl(DeviceImpl* device, const TextureDesc& desc) @@ -41,6 +43,10 @@ Result DeviceImpl::createTexture(const TextureDesc& desc_, const SubresourceData textureDesc.size.height = desc.size.height; textureDesc.size.depthOrArrayLayers = desc.size.depth; textureDesc.usage = translateTextureUsage(desc.usage); + if (initData) + { + textureDesc.usage |= WGPUTextureUsage_CopyDst; + } textureDesc.dimension = translateTextureDimension(desc.type); textureDesc.format = translateTextureFormat(desc.format); textureDesc.mipLevelCount = desc.numMipLevels; @@ -51,6 +57,69 @@ Result DeviceImpl::createTexture(const TextureDesc& desc_, const SubresourceData { return SLANG_FAIL; } + + if (initData) + { + FormatInfo formatInfo; + rhiGetFormatInfo(desc.format, &formatInfo); + + WGPUQueue queue = m_ctx.api.wgpuDeviceGetQueue(m_ctx.device); + SLANG_RHI_DEFERRED({ m_ctx.api.wgpuQueueRelease(queue); }); + int mipLevelCount = desc.numMipLevels; + int arrayLayerCount = desc.arrayLength * (desc.type == TextureType::TextureCube ? 6 : 1); + + for (int arrayLayer = 0; arrayLayer < arrayLayerCount; ++arrayLayer) + { + for (int mipLevel = 0; mipLevel < mipLevelCount; ++mipLevel) + { + Extents mipSize = calcMipSize(desc.size, mipLevel); + int subresourceIndex = arrayLayer * mipLevelCount + mipLevel; + const SubresourceData& data = initData[subresourceIndex]; + + WGPUImageCopyTexture imageCopyTexture = {}; + imageCopyTexture.texture = texture->m_texture; + imageCopyTexture.mipLevel = mipLevel; + imageCopyTexture.origin = {0, 0, 0}; + imageCopyTexture.aspect = WGPUTextureAspect_All; + + WGPUExtent3D writeSize = {}; + writeSize.width = + ((mipSize.width + formatInfo.blockWidth - 1) / formatInfo.blockWidth) * formatInfo.blockWidth; + writeSize.height = + ((mipSize.height + formatInfo.blockHeight - 1) / formatInfo.blockHeight) * formatInfo.blockHeight; + writeSize.depthOrArrayLayers = mipSize.depth; + + WGPUTextureDataLayout dataLayout = {}; + dataLayout.offset = 0; + dataLayout.bytesPerRow = data.strideY; + dataLayout.rowsPerImage = writeSize.height / formatInfo.blockHeight; + + size_t dataSize = dataLayout.bytesPerRow * dataLayout.rowsPerImage * mipSize.depth; + + m_ctx.api.wgpuQueueWriteTexture(queue, &imageCopyTexture, data.data, dataSize, &dataLayout, &writeSize); + } + } + + // Wait for queue to finish. + // TODO: we should switch to the new async API + { + WGPUQueueWorkDoneStatus status = WGPUQueueWorkDoneStatus_Unknown; + m_ctx.api.wgpuQueueOnSubmittedWorkDone( + queue, + [](WGPUQueueWorkDoneStatus status, void* userdata) { *(WGPUQueueWorkDoneStatus*)userdata = status; }, + &status + ); + while (status == WGPUQueueWorkDoneStatus_Unknown) + { + m_ctx.api.wgpuDeviceTick(m_ctx.device); + } + if (status != WGPUQueueWorkDoneStatus_Success) + { + return SLANG_FAIL; + } + } + } + returnComPtr(outTexture, texture); return SLANG_OK; }