Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement ID3D12DeviceExt2 for cubin 64bit support #2348

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions include/vkd3d_device_vkd3d_ext.idl
Original file line number Diff line number Diff line change
Expand Up @@ -96,3 +96,17 @@ interface ID3D12DeviceExt1 : ID3D12DeviceExt
HRESULT CreateResourceFromBorrowedHandle(const D3D12_RESOURCE_DESC1 *desc, UINT64 vk_handle, ID3D12Resource **resource);
HRESULT GetVulkanQueueInfoEx(ID3D12CommandQueue *queue, VkQueue *vk_queue, UINT32 *vk_queue_index, UINT32 *vk_queue_flags, UINT32 *vk_queue_family);
}

[
uuid(e859c4ac-ba8f-41c4-8eac-1137fde6158d),
object,
local,
pointer_default(unique)
]
interface ID3D12DeviceExt2 : ID3D12DeviceExt1
{
BOOL SupportsCubin64bit();
HRESULT CreateCubinComputeShaderExV2(D3D12_CREATE_CUBIN_SHADER_PARAMS *params);
HRESULT GetCudaMergedTextureSamplerObject(D3D12_GET_CUDA_MERGED_TEXTURE_SAMPLER_OBJECT_PARAMS *params);
HRESULT GetCudaIndependentDescriptorObject(D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_PARAMS *params);
}
37 changes: 37 additions & 0 deletions include/vkd3d_vk_includes.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,5 +98,42 @@ typedef struct D3D12_LATENCY_RESULTS
UINT8 rsvd[32];
} D3D12_LATENCY_RESULTS;

typedef struct D3D12_CREATE_CUBIN_SHADER_PARAMS
{
void *pNext;
const void *pCubin;
UINT32 size;
UINT32 blockX;
UINT32 blockY;
UINT32 blockZ;
UINT32 dynSharedMemBytes;
const char *pShaderName;
UINT32 flags;
D3D12_CUBIN_DATA_HANDLE *hShader;
} D3D12_CREATE_CUBIN_SHADER_PARAMS;

typedef struct D3D12_GET_CUDA_MERGED_TEXTURE_SAMPLER_OBJECT_PARAMS
{
void *pNext;
SIZE_T texDesc;
SIZE_T smpDesc;
UINT64 textureHandle;
} D3D12_GET_CUDA_MERGED_TEXTURE_SAMPLER_OBJECT_PARAMS;

typedef enum D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_TYPE
{
D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_SURFACE = 0,
D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_TEXTURE = 1,
D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_SAMPLER = 2,
} D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_TYPE;

typedef struct D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_PARAMS
{
void *pNext;
D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_TYPE type;
SIZE_T desc;
UINT64 handle;
} D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_PARAMS;

#endif // __VKD3D_VK_INCLUDES_H

10 changes: 8 additions & 2 deletions libs/vkd3d/device.c
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,11 @@ static unsigned int vkd3d_check_extensions(const VkExtensionProperties *extensio
}
}

if (vulkan_info->NVX_binary_import && vulkan_info->NVX_image_view_handle &&
has_extension(extensions, count, VK_NVX_BINARY_IMPORT_EXTENSION_NAME, 2) &&
has_extension(extensions, count, VK_NVX_IMAGE_VIEW_HANDLE_EXTENSION_NAME, 3))
vulkan_info->supports_cubin_64bit = true;

for (i = 0; i < user_extension_count; ++i)
{
if (!has_extension(extensions, count, user_extensions[i], 0))
Expand Down Expand Up @@ -3707,7 +3712,8 @@ HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(d3d12_device_iface *iface,
}

if (IsEqualGUID(riid, &IID_ID3D12DeviceExt)
|| IsEqualGUID(riid, &IID_ID3D12DeviceExt1))
|| IsEqualGUID(riid, &IID_ID3D12DeviceExt1)
|| IsEqualGUID(riid, &IID_ID3D12DeviceExt2))
{
d3d12_device_vkd3d_ext_AddRef(&device->ID3D12DeviceExt_iface);
*object = &device->ID3D12DeviceExt_iface;
Expand Down Expand Up @@ -9073,7 +9079,7 @@ static void d3d12_device_replace_vtable(struct d3d12_device *device)
}
}

extern CONST_VTBL struct ID3D12DeviceExt1Vtbl d3d12_device_vkd3d_ext_vtbl;
extern CONST_VTBL struct ID3D12DeviceExt2Vtbl d3d12_device_vkd3d_ext_vtbl;
extern CONST_VTBL struct ID3D12DXVKInteropDevice1Vtbl d3d12_dxvk_interop_device_vtbl;
extern CONST_VTBL struct ID3DLowLatencyDeviceVtbl d3d_low_latency_device_vtbl;

Expand Down
169 changes: 158 additions & 11 deletions libs/vkd3d/device_vkd3d_ext.c
Original file line number Diff line number Diff line change
Expand Up @@ -88,31 +88,45 @@ static BOOL STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetExtensionSupport(d3d12_d
return ret_val;
}

static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_CreateCubinComputeShaderWithName(d3d12_device_vkd3d_ext_iface *iface, const void *cubin_data,
UINT32 cubin_size, UINT32 block_x, UINT32 block_y, UINT32 block_z, const char *shader_name, D3D12_CUBIN_DATA_HANDLE **out_handle)
static HRESULT d3d12_device_vkd3d_ext_create_cubin_compute_shader(struct d3d12_device *device,
const void *cubin_data, UINT32 cubin_size, UINT32 block_x, UINT32 block_y, UINT32 block_z,
const char *shader_name, bool use_64bit_texturing, UINT32 flags, D3D12_CUBIN_DATA_HANDLE **out_handle)
{
VkCuFunctionCreateInfoNVX functionCreateInfo = { VK_STRUCTURE_TYPE_CU_FUNCTION_CREATE_INFO_NVX };
VkCuModuleCreateInfoNVX moduleCreateInfo = { VK_STRUCTURE_TYPE_CU_MODULE_CREATE_INFO_NVX };
VkCuModuleTexturingModeCreateInfoNVX moduleTexturingModeCreateInfo;
const struct vkd3d_vk_device_procs *vk_procs;
D3D12_CUBIN_DATA_HANDLE *handle;
struct d3d12_device *device;
VkDevice vk_device;
VkResult vr;

TRACE("iface %p, cubin_data %p, cubin_size %u, shader_name %s\n", iface, cubin_data, cubin_size, shader_name);
if (!cubin_data || !cubin_size || !shader_name)
return E_INVALIDARG;

device = d3d12_device_from_ID3D12DeviceExt(iface);
if (!cubin_data || !cubin_size || !shader_name || flags)
return E_INVALIDARG;

vk_procs = &device->vk_procs;

if (use_64bit_texturing)
{
if (!device->vk_info.supports_cubin_64bit || !vk_procs->vkGetImageViewHandle64NVX)
return E_NOTIMPL;

moduleTexturingModeCreateInfo.sType = VK_STRUCTURE_TYPE_CU_MODULE_TEXTURING_MODE_CREATE_INFO_NVX;
moduleTexturingModeCreateInfo.pNext = NULL;
moduleTexturingModeCreateInfo.use64bitTexturing = VK_TRUE;
moduleCreateInfo.pNext = &moduleTexturingModeCreateInfo;
}

vk_device = device->vk_device;
handle = vkd3d_calloc(1, sizeof(D3D12_CUBIN_DATA_HANDLE));
if (!handle)
return E_OUTOFMEMORY;

handle->blockX = block_x;
handle->blockY = block_y;
handle->blockZ = block_z;

moduleCreateInfo.pData = cubin_data;
moduleCreateInfo.dataSize = cubin_size;
vk_procs = &device->vk_procs;
if ((vr = VK_CALL(vkCreateCuModuleNVX(vk_device, &moduleCreateInfo, NULL, &handle->vkCuModule))) < 0)
{
ERR("Failed to create cubin shader, vr %d.\n", vr);
Expand All @@ -130,11 +144,22 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_CreateCubinComputeShader
vkd3d_free(handle);
return hresult_from_vk_result(vr);
}

*out_handle = handle;
return S_OK;
}

static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_CreateCubinComputeShaderWithName(d3d12_device_vkd3d_ext_iface *iface, const void *cubin_data,
UINT32 cubin_size, UINT32 block_x, UINT32 block_y, UINT32 block_z, const char *shader_name, D3D12_CUBIN_DATA_HANDLE **out_handle)
{
TRACE("iface %p, cubin_data %p, cubin_size %"PRIu32", block_x %"PRIu32", block_y %"PRIu32", block_z %"PRIu32", shader_name %s\n",
iface, cubin_data, cubin_size, block_x, block_y, block_z, shader_name);

return d3d12_device_vkd3d_ext_create_cubin_compute_shader(d3d12_device_from_ID3D12DeviceExt(iface),
cubin_data, cubin_size, block_x, block_y, block_z,
shader_name, false, 0, out_handle);
}

static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_DestroyCubinComputeShader(d3d12_device_vkd3d_ext_iface *iface, D3D12_CUBIN_DATA_HANDLE *handle)
{
const struct vkd3d_vk_device_procs *vk_procs;
Expand Down Expand Up @@ -258,7 +283,123 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetVulkanQueueInfoEx(d3d
return S_OK;
}

CONST_VTBL struct ID3D12DeviceExt1Vtbl d3d12_device_vkd3d_ext_vtbl =
static BOOL STDMETHODCALLTYPE d3d12_device_vkd3d_ext_SupportsCubin64bit(d3d12_device_vkd3d_ext_iface *iface)
{
struct d3d12_device *device = d3d12_device_from_ID3D12DeviceExt(iface);
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;

TRACE("iface %p.\n", iface);

return device->vk_info.supports_cubin_64bit && vk_procs->vkGetImageViewHandle64NVX;
}

static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_CreateCubinComputeShaderExV2(d3d12_device_vkd3d_ext_iface *iface,
D3D12_CREATE_CUBIN_SHADER_PARAMS *params)
{
TRACE("iface %p, cubin_data %p, cubin_size %"PRIu32", block_x %"PRIu32", block_y %"PRIu32", block_z %"PRIu32", shader_name %s, flags %"PRIx32"\n",
iface, params->pCubin, params->size, params->blockX, params->blockY, params->blockZ, params->pShaderName, params->flags);

if (params->pNext)
{
FIXME("pNext not supported.\n");
params->pNext = NULL;
}

return d3d12_device_vkd3d_ext_create_cubin_compute_shader(d3d12_device_from_ID3D12DeviceExt(iface),
params->pCubin, params->size, params->blockX, params->blockY, params->blockZ,
params->pShaderName, true, params->flags, &params->hShader);
}

static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaMergedTextureSamplerObject(d3d12_device_vkd3d_ext_iface *iface,
D3D12_GET_CUDA_MERGED_TEXTURE_SAMPLER_OBJECT_PARAMS *params)
{
VkImageViewHandleInfoNVX imageViewHandleInfo = { VK_STRUCTURE_TYPE_IMAGE_VIEW_HANDLE_INFO_NVX };
struct d3d12_device *device = d3d12_device_from_ID3D12DeviceExt(iface);
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
struct d3d12_desc_split sampler_desc, texture_desc;

TRACE("iface %p, tex_desc %zu, smp_desc %zu.\n",
iface, (size_t)params->texDesc, (size_t)params->smpDesc);

if (!device->vk_info.supports_cubin_64bit || !vk_procs->vkGetImageViewHandle64NVX)
return E_NOTIMPL;

if (params->pNext)
{
FIXME("pNext not supported.\n");
params->pNext = NULL;
}

texture_desc = d3d12_desc_decode_va(params->texDesc);

if (!(texture_desc.view->info.flags & VKD3D_DESCRIPTOR_FLAG_IMAGE_VIEW))
return E_INVALIDARG;

imageViewHandleInfo.imageView = texture_desc.view->info.image.view->vk_image_view;

if (params->smpDesc)
{
sampler_desc = d3d12_desc_decode_va(params->smpDesc);
imageViewHandleInfo.sampler = sampler_desc.view->info.image.view->vk_sampler;
imageViewHandleInfo.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
}
else
{
imageViewHandleInfo.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
}

params->textureHandle = VK_CALL(vkGetImageViewHandle64NVX(device->vk_device, &imageViewHandleInfo));
return S_OK;
}

static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaIndependentDescriptorObject(d3d12_device_vkd3d_ext_iface *iface,
D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_PARAMS *params)
{
VkImageViewHandleInfoNVX imageViewHandleInfo = { VK_STRUCTURE_TYPE_IMAGE_VIEW_HANDLE_INFO_NVX };
struct d3d12_device *device = d3d12_device_from_ID3D12DeviceExt(iface);
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
struct d3d12_desc_split desc;

TRACE("iface %p, desc %zu, type %d.\n",
iface, (size_t)params->desc, params->type);

if (!device->vk_info.supports_cubin_64bit || !vk_procs->vkGetImageViewHandle64NVX)
return E_NOTIMPL;

if (params->pNext)
{
FIXME("pNext not supported.\n");
params->pNext = NULL;
}

desc = d3d12_desc_decode_va(params->desc);

if (!(desc.view->info.flags & VKD3D_DESCRIPTOR_FLAG_IMAGE_VIEW))
return E_INVALIDARG;

imageViewHandleInfo.imageView = desc.view->info.image.view->vk_image_view;

switch (params->type)
{
case D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_SURFACE:
imageViewHandleInfo.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
break;
case D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_TEXTURE:
FIXME("TEXTURE object type not supported.\n");
return E_FAIL;
case D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_SAMPLER:
imageViewHandleInfo.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
break;
Comment on lines +384 to +392
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@liam-middlebrook I'd appreciate a closer look at this, maybe you'll have an idea how to handle the TEXTURE case.

default:
ERR("Unsupported object type %d\n", params->type);
return E_INVALIDARG;
}

params->handle = VK_CALL(vkGetImageViewHandle64NVX(device->vk_device, &imageViewHandleInfo));
return S_OK;
}

CONST_VTBL struct ID3D12DeviceExt2Vtbl d3d12_device_vkd3d_ext_vtbl =
{
/* IUnknown methods */
d3d12_device_vkd3d_ext_QueryInterface,
Expand All @@ -277,6 +418,12 @@ CONST_VTBL struct ID3D12DeviceExt1Vtbl d3d12_device_vkd3d_ext_vtbl =
/* ID3D12DeviceExt1 methods */
d3d12_device_vkd3d_ext_CreateResourceFromBorrowedHandle,
d3d12_device_vkd3d_ext_GetVulkanQueueInfoEx,

/* ID3D12DeviceExt2 methods */
d3d12_device_vkd3d_ext_SupportsCubin64bit,
d3d12_device_vkd3d_ext_CreateCubinComputeShaderExV2,
d3d12_device_vkd3d_ext_GetCudaMergedTextureSamplerObject,
d3d12_device_vkd3d_ext_GetCudaIndependentDescriptorObject,
};


Expand Down
4 changes: 3 additions & 1 deletion libs/vkd3d/vkd3d_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,8 @@ struct vkd3d_vulkan_info
unsigned int extension_count;
const char* const* extension_names;

bool supports_cubin_64bit;

bool rasterization_stream;
unsigned int max_vertex_attrib_divisor;

Expand Down Expand Up @@ -4845,7 +4847,7 @@ struct vkd3d_descriptor_qa_global_info;
struct vkd3d_descriptor_qa_heap_buffer_data;

/* ID3D12DeviceExt */
typedef ID3D12DeviceExt1 d3d12_device_vkd3d_ext_iface;
typedef ID3D12DeviceExt2 d3d12_device_vkd3d_ext_iface;

/* ID3D12DXVKInteropDevice */
typedef ID3D12DXVKInteropDevice1 d3d12_dxvk_interop_device_iface;
Expand Down
1 change: 1 addition & 0 deletions libs/vkd3d/vulkan_procs.h
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,7 @@ VK_DEVICE_EXT_PFN(vkDestroyCuFunctionNVX)
VK_DEVICE_EXT_PFN(vkCmdCuLaunchKernelNVX)

/* VK_NVX_image_view_handle */
VK_DEVICE_EXT_PFN(vkGetImageViewHandle64NVX)
VK_DEVICE_EXT_PFN(vkGetImageViewHandleNVX)
VK_DEVICE_EXT_PFN(vkGetImageViewAddressNVX)

Expand Down
Loading