WGPU graphics support (#50)

* fix short_vector * add support for render pipelines + texture readback
shader-slang · Sep 24, 2024 · c34ac8d · c34ac8d
1 parent 081920a
commit c34ac8d
Show file tree

Hide file tree

Showing 10 changed files with 495 additions and 9 deletions.
diff --git a/src/core/short_vector.h b/src/core/short_vector.h
@@ -100,8 +100,8 @@ class short_vector
 
     /// Size constructor.
     short_vector(size_type size, const value_type& value)
-        : m_data(m_short_data)
-        , m_size(size)
+        : m_data((value_type*)m_short_data)
+        , m_size(0)
         , m_capacity(N)
     {
         if (size > m_capacity)
@@ -112,7 +112,7 @@ class short_vector
 
     /// Initializer list constructor.
     short_vector(std::initializer_list<value_type> list)
-        : m_data(m_short_data)
+        : m_data((value_type*)m_short_data)
         , m_size(0)
         , m_capacity(N)
     {

diff --git a/src/wgpu/wgpu-command-encoder.cpp b/src/wgpu/wgpu-command-encoder.cpp
@@ -6,6 +6,7 @@
 #include "wgpu-texture.h"
 #include "wgpu-device.h"
 #include "wgpu-transient-resource-heap.h"
+#include "wgpu-util.h"
 
 namespace rhi::wgpu {
 
@@ -295,12 +296,76 @@ void* RenderCommandEncoderImpl::getInterface(SlangUUID const& uuid)
 Result RenderCommandEncoderImpl::init(CommandBufferImpl* commandBuffer, const RenderPassDesc& desc)
 {
     CommandEncoderImpl::init(commandBuffer);
+
+    short_vector<WGPURenderPassColorAttachment, 8> colorAttachments(desc.colorAttachmentCount, {});
+    for (GfxIndex i = 0; i < desc.colorAttachmentCount; ++i)
+    {
+        const RenderPassColorAttachment& attachmentIn = desc.colorAttachments[i];
+        WGPURenderPassColorAttachment& attachment = colorAttachments[i];
+        attachment.view = static_cast<TextureViewImpl*>(attachmentIn.view)->m_textureView;
+        attachment.depthSlice = -1;         // TODO not provided
+        attachment.resolveTarget = nullptr; // TODO not provided
+        attachment.loadOp = translateLoadOp(attachmentIn.loadOp);
+        attachment.storeOp = translateStoreOp(attachmentIn.storeOp);
+        attachment.clearValue.r = attachmentIn.clearValue[0];
+        attachment.clearValue.g = attachmentIn.clearValue[1];
+        attachment.clearValue.b = attachmentIn.clearValue[2];
+        attachment.clearValue.a = attachmentIn.clearValue[3];
+    }
+
+    WGPURenderPassDepthStencilAttachment depthStencilAttachment = {};
+    if (desc.depthStencilAttachment)
+    {
+        const RenderPassDepthStencilAttachment& attachmentIn = *desc.depthStencilAttachment;
+        WGPURenderPassDepthStencilAttachment& attachment = depthStencilAttachment;
+        attachment.view = static_cast<TextureViewImpl*>(attachmentIn.view)->m_textureView;
+        attachment.depthLoadOp = translateLoadOp(attachmentIn.depthLoadOp);
+        attachment.depthStoreOp = translateStoreOp(attachmentIn.depthStoreOp);
+        attachment.depthClearValue = attachmentIn.depthClearValue;
+        attachment.depthReadOnly = attachmentIn.depthReadOnly;
+        attachment.stencilLoadOp = translateLoadOp(attachmentIn.stencilLoadOp);
+        attachment.stencilStoreOp = translateStoreOp(attachmentIn.stencilStoreOp);
+        attachment.stencilClearValue = attachmentIn.stencilClearValue;
+        attachment.stencilReadOnly = attachmentIn.stencilReadOnly;
+    }
+
     WGPURenderPassDescriptor passDesc = {};
+    passDesc.colorAttachmentCount = desc.colorAttachmentCount;
+    passDesc.colorAttachments = colorAttachments.data();
+    passDesc.depthStencilAttachment = desc.depthStencilAttachment ? &depthStencilAttachment : nullptr;
+    // passDesc.occlusionQuerySet not supported
+    // passDesc.timestampWrites not supported
+
     m_renderPassEncoder =
         m_device->m_ctx.api.wgpuCommandEncoderBeginRenderPass(m_commandBuffer->m_commandEncoder, &passDesc);
     return m_renderPassEncoder ? SLANG_OK : SLANG_FAIL;
 }
 
+Result RenderCommandEncoderImpl::prepareDraw()
+{
+    auto pipeline = static_cast<PipelineImpl*>(m_currentPipeline.Ptr());
+    if (!pipeline)
+    {
+        return SLANG_FAIL;
+    }
+
+    RootBindingContext context;
+    SLANG_RETURN_ON_FAIL(bindPipelineImpl(context));
+
+    m_device->m_ctx.api.wgpuRenderPassEncoderSetPipeline(m_renderPassEncoder, m_currentPipeline->m_renderPipeline);
+    for (uint32_t groupIndex = 0; groupIndex < context.bindGroups.size(); groupIndex++)
+    {
+        m_device->m_ctx.api.wgpuRenderPassEncoderSetBindGroup(
+            m_renderPassEncoder,
+            groupIndex,
+            context.bindGroups[groupIndex],
+            0,
+            nullptr
+        );
+    }
+    return SLANG_OK;
+}
+
 void RenderCommandEncoderImpl::endEncoding()
 {
     endEncodingImpl();
@@ -319,9 +384,35 @@ Result RenderCommandEncoderImpl::bindPipelineWithRootObject(IPipeline* pipeline,
     return setPipelineWithRootObjectImpl(pipeline, rootObject);
 }
 
-void RenderCommandEncoderImpl::setViewports(GfxCount count, const Viewport* viewports) {}
+void RenderCommandEncoderImpl::setViewports(GfxCount count, const Viewport* viewports)
+{
+    if (count < 1)
+        return;
+
+    m_device->m_ctx.api.wgpuRenderPassEncoderSetViewport(
+        m_renderPassEncoder,
+        viewports[0].originX,
+        viewports[0].originY,
+        viewports[0].extentX,
+        viewports[0].extentY,
+        viewports[0].minZ,
+        viewports[0].maxZ
+    );
+}
 
-void RenderCommandEncoderImpl::setScissorRects(GfxCount count, const ScissorRect* rects) {}
+void RenderCommandEncoderImpl::setScissorRects(GfxCount count, const ScissorRect* rects)
+{
+    if (count < 1)
+        return;
+
+    m_device->m_ctx.api.wgpuRenderPassEncoderSetScissorRect(
+        m_renderPassEncoder,
+        rects[0].minX,
+        rects[0].minY,
+        rects[0].maxX - rects[0].minX,
+        rects[0].maxY - rects[0].minY
+    );
+}
 
 void RenderCommandEncoderImpl::setPrimitiveTopology(PrimitiveTopology topology) {}
 
@@ -373,12 +464,14 @@ Result RenderCommandEncoderImpl::setSamplePositions(
 
 Result RenderCommandEncoderImpl::draw(GfxCount vertexCount, GfxIndex startVertex)
 {
+    SLANG_RETURN_ON_FAIL(prepareDraw());
     m_device->m_ctx.api.wgpuRenderPassEncoderDraw(m_renderPassEncoder, vertexCount, 1, startVertex, 0);
     return SLANG_OK;
 }
 
 Result RenderCommandEncoderImpl::drawIndexed(GfxCount indexCount, GfxIndex startIndex, GfxIndex baseVertex)
 {
+    SLANG_RETURN_ON_FAIL(prepareDraw());
     m_device->m_ctx.api.wgpuRenderPassEncoderDrawIndexed(m_renderPassEncoder, indexCount, 1, startIndex, baseVertex, 0);
     return SLANG_OK;
 }
@@ -422,6 +515,7 @@ Result RenderCommandEncoderImpl::drawInstanced(
     GfxIndex startInstanceLocation
 )
 {
+    SLANG_RETURN_ON_FAIL(prepareDraw());
     m_device->m_ctx.api
         .wgpuRenderPassEncoderDraw(m_renderPassEncoder, vertexCount, instanceCount, startVertex, startInstanceLocation);
     return SLANG_OK;
@@ -435,6 +529,7 @@ Result RenderCommandEncoderImpl::drawIndexedInstanced(
     GfxIndex startInstanceLocation
 )
 {
+    SLANG_RETURN_ON_FAIL(prepareDraw());
     m_device->m_ctx.api.wgpuRenderPassEncoderDrawIndexed(
         m_renderPassEncoder,
         indexCount,

diff --git a/src/wgpu/wgpu-command-encoder.h b/src/wgpu/wgpu-command-encoder.h
@@ -137,6 +137,8 @@ class RenderCommandEncoderImpl : public IRenderCommandEncoder, public CommandEnc
 
     Result init(CommandBufferImpl* commandBuffer, const RenderPassDesc& renderPass);
 
+    Result prepareDraw();
+
     // IRenderCommandEncoder implementation
 
     virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() override;

diff --git a/src/wgpu/wgpu-device.cpp b/src/wgpu/wgpu-device.cpp
@@ -136,7 +136,121 @@ Result DeviceImpl::readTexture(
     Size* outPixelSize
 )
 {
-    return SLANG_E_NOT_IMPLEMENTED;
+    TextureImpl* textureImpl = static_cast<TextureImpl*>(texture);
+
+    if (textureImpl->m_desc.sampleCount > 1)
+    {
+        return SLANG_E_NOT_IMPLEMENTED;
+    }
+
+    const TextureDesc& desc = textureImpl->m_desc;
+    GfxCount width = std::max(desc.size.width, 1);
+    GfxCount height = std::max(desc.size.height, 1);
+    GfxCount depth = std::max(desc.size.depth, 1);
+    FormatInfo formatInfo;
+    rhiGetFormatInfo(desc.format, &formatInfo);
+    Size bytesPerPixel = formatInfo.blockSizeInBytes / formatInfo.pixelsPerBlock;
+    Size bytesPerRow = Size(width) * bytesPerPixel;
+    Size bytesPerSlice = Size(height) * bytesPerRow;
+    Size bufferSize = Size(depth) * bytesPerSlice;
+    if (outRowPitch)
+        *outRowPitch = bytesPerRow;
+    if (outPixelSize)
+        *outPixelSize = bytesPerPixel;
+
+    // create staging buffer
+    WGPUBufferDescriptor stagingBufferDesc = {};
+    stagingBufferDesc.size = bufferSize;
+    stagingBufferDesc.usage = WGPUBufferUsage_CopyDst | WGPUBufferUsage_MapRead;
+    WGPUBuffer stagingBuffer = m_ctx.api.wgpuDeviceCreateBuffer(m_ctx.device, &stagingBufferDesc);
+    if (!stagingBuffer)
+    {
+        return SLANG_FAIL;
+    }
+    SLANG_RHI_DEFERRED({ m_ctx.api.wgpuBufferRelease(stagingBuffer); });
+
+    WGPUCommandEncoder encoder = m_ctx.api.wgpuDeviceCreateCommandEncoder(m_ctx.device, nullptr);
+    if (!encoder)
+    {
+        return SLANG_FAIL;
+    }
+    SLANG_RHI_DEFERRED({ m_ctx.api.wgpuCommandEncoderRelease(encoder); });
+
+    WGPUImageCopyTexture source = {};
+    source.texture = textureImpl->m_texture;
+    source.mipLevel = 0;
+    source.origin = {0, 0, 0};
+    source.aspect = WGPUTextureAspect_All;
+    WGPUImageCopyBuffer destination = {};
+    destination.layout.offset = 0;
+    destination.layout.bytesPerRow = bytesPerRow;
+    destination.layout.rowsPerImage = height;
+    destination.buffer = stagingBuffer;
+    WGPUExtent3D copySize = {(uint32_t)width, (uint32_t)height, (uint32_t)depth};
+    m_ctx.api.wgpuCommandEncoderCopyTextureToBuffer(encoder, &source, &destination, &copySize);
+    WGPUCommandBuffer commandBuffer = m_ctx.api.wgpuCommandEncoderFinish(encoder, nullptr);
+    if (!commandBuffer)
+    {
+        return SLANG_FAIL;
+    }
+    SLANG_RHI_DEFERRED({ m_ctx.api.wgpuCommandBufferRelease(commandBuffer); });
+
+    WGPUQueue queue = m_ctx.api.wgpuDeviceGetQueue(m_ctx.device);
+    m_ctx.api.wgpuQueueSubmit(queue, 1, &commandBuffer);
+
+    // Wait for the command buffer to finish executing
+    // TODO: we should switch to the new async API
+    {
+        WGPUQueueWorkDoneStatus status = WGPUQueueWorkDoneStatus_Unknown;
+        m_ctx.api.wgpuQueueOnSubmittedWorkDone(
+            queue,
+            [](WGPUQueueWorkDoneStatus status, void* userdata) { *(WGPUQueueWorkDoneStatus*)userdata = status; },
+            &status
+        );
+        while (status == WGPUQueueWorkDoneStatus_Unknown)
+        {
+            m_ctx.api.wgpuDeviceTick(m_ctx.device);
+        }
+        if (status != WGPUQueueWorkDoneStatus_Success)
+        {
+            return SLANG_FAIL;
+        }
+    }
+
+    // Map the staging buffer
+    // TODO: we should switch to the new async API
+    {
+        WGPUBufferMapAsyncStatus status = WGPUBufferMapAsyncStatus_Unknown;
+        m_ctx.api.wgpuBufferMapAsync(
+            stagingBuffer,
+            WGPUMapMode_Read,
+            0,
+            bufferSize,
+            [](WGPUBufferMapAsyncStatus status, void* userdata) { *(WGPUBufferMapAsyncStatus*)userdata = status; },
+            &status
+        );
+        while (status == WGPUBufferMapAsyncStatus_Unknown)
+        {
+            m_ctx.api.wgpuDeviceTick(m_ctx.device);
+        }
+        if (status != WGPUBufferMapAsyncStatus_Success)
+        {
+            return SLANG_FAIL;
+        }
+    }
+    SLANG_RHI_DEFERRED({ m_ctx.api.wgpuBufferUnmap(stagingBuffer); });
+
+    const void* data = m_ctx.api.wgpuBufferGetConstMappedRange(stagingBuffer, 0, bufferSize);
+    if (!data)
+    {
+        return SLANG_FAIL;
+    }
+
+    auto blob = OwnedBlob::create(bufferSize);
+    ::memcpy((void*)blob->getBufferPointer(), data, bufferSize);
+
+    returnComPtr(outBlob, blob);
+    return SLANG_OK;
 }
 
 Result DeviceImpl::readBuffer(IBuffer* buffer, Offset offset, Size size, ISlangBlob** outBlob)

diff --git a/src/wgpu/wgpu-input-layout.cpp b/src/wgpu/wgpu-input-layout.cpp
@@ -23,7 +23,7 @@ Result DeviceImpl::createInputLayout(InputLayoutDesc const& desc, IInputLayout**
         vertexAttribute.format = translateVertexFormat(elementDesc.format);
         vertexAttribute.offset = elementDesc.offset;
         // TODO determine shader location from name
-        vertexAttribute.shaderLocation = 0;
+        vertexAttribute.shaderLocation = i;
 
         layout->m_vertexAttributes[elementDesc.bufferSlotIndex].push_back(vertexAttribute);
     }