Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add debug ring support to ExecuteIndirect. #2315

Draft
wants to merge 8 commits into
base: master
Choose a base branch
from
2 changes: 1 addition & 1 deletion include/vkd3d.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ extern "C" {
#define VKD3D_CONFIG_FLAG_FORCE_RAW_VA_CBV (1ull << 28)
#define VKD3D_CONFIG_FLAG_ZERO_MEMORY_WORKAROUNDS_COMMITTED_BUFFER_UAV (1ull << 29)
#define VKD3D_CONFIG_FLAG_ALLOW_SBT_COLLECTION (1ull << 30)
/* Bit 31 is vacant */
#define VKD3D_CONFIG_FLAG_BREADCRUMBS_TRACE_INDIRECT (1ull << 31)
#define VKD3D_CONFIG_FLAG_USE_HOST_IMPORT_FALLBACK (1ull << 32)
#define VKD3D_CONFIG_FLAG_PREALLOCATE_SRV_MIP_CLAMPS (1ull << 33)
#define VKD3D_CONFIG_FLAG_FORCE_INITIAL_TRANSITION (1ull << 34)
Expand Down
114 changes: 107 additions & 7 deletions libs/vkd3d/command.c
Original file line number Diff line number Diff line change
Expand Up @@ -6994,6 +6994,68 @@ static bool d3d12_command_list_emit_multi_dispatch_indirect_count(struct d3d12_c
return true;
}

static void d3d12_command_list_emit_execute_indirect_debug_ring(struct d3d12_command_list *list,
struct d3d12_command_signature *signature,
VkDeviceAddress indirect_args, VkDeviceAddress count_arg, uint32_t max_commands)
{
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
struct vkd3d_execute_indirect_debug_ring_args args;
VkCommandBuffer vk_patch_cmd_buffer;
VkMemoryBarrier2 vk_barrier;
VkDependencyInfo dep_info;

memset(&args, 0, sizeof(args));
args.api_buffer_va = indirect_args;
args.indirect_count_va = count_arg;
args.api_buffer_word_stride = signature->desc.ByteStride / sizeof(uint32_t);

if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS_TRACE_INDIRECT)
{
args.debug_tag = signature->desc.pArgumentDescs[signature->desc.NumArgumentDescs - 1].Type;
args.implicit_instance = vkd3d_atomic_uint32_increment(
&list->device->debug_ring.implicit_instance_count, vkd3d_memory_order_relaxed) - 1;
}

/* Allow correlation against breadcrumb log. */
VKD3D_BREADCRUMB_TAG("Implicit instance (plain)");
VKD3D_BREADCRUMB_AUX32(args.implicit_instance);

d3d12_command_allocator_allocate_init_post_indirect_command_buffer(list->allocator, list);
vk_patch_cmd_buffer = list->cmd.vk_init_commands_post_indirect_barrier;

if (vk_patch_cmd_buffer == list->cmd.vk_command_buffer)
d3d12_command_list_end_current_render_pass(list, true);

VK_CALL(vkCmdBindPipeline(vk_patch_cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
signature->debug_ring_pipeline.vk_pipeline));
VK_CALL(vkCmdPushConstants(vk_patch_cmd_buffer,
signature->debug_ring_pipeline.vk_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT,
0, sizeof(args), &args));

VK_CALL(vkCmdDispatch(vk_patch_cmd_buffer, max_commands, 1, 1));

if (vk_patch_cmd_buffer == list->cmd.vk_command_buffer)
{
memset(&dep_info, 0, sizeof(dep_info));
dep_info.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO;
dep_info.memoryBarrierCount = 1;
dep_info.pMemoryBarriers = &vk_barrier;

memset(&vk_barrier, 0, sizeof(vk_barrier));
vk_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2;
vk_barrier.srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT;
vk_barrier.srcAccessMask = 0;
vk_barrier.dstStageMask = VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT;
vk_barrier.dstAccessMask = 0;
VK_CALL(vkCmdPipelineBarrier2(vk_patch_cmd_buffer, &dep_info));

d3d12_command_list_invalidate_current_pipeline(list, true);
d3d12_command_list_invalidate_root_parameters(list, &list->compute_bindings, true, &list->graphics_bindings);
}
else
list->cmd.indirect_meta->need_compute_to_indirect_barrier = true;
}

static bool d3d12_command_list_emit_multi_dispatch_indirect_count_state(struct d3d12_command_list *list,
struct d3d12_command_signature *signature,
VkDeviceAddress indirect_args,
Expand Down Expand Up @@ -7045,6 +7107,17 @@ static bool d3d12_command_list_emit_multi_dispatch_indirect_count_state(struct d
args.stride_words = stride / sizeof(uint32_t);
args.dispatch_offset_words = signature->state_template.compute.dispatch_offset_words;

if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS_TRACE_INDIRECT)
{
args.debug_tag = UINT32_MAX;
args.implicit_instance = vkd3d_atomic_uint32_increment(
&list->device->debug_ring.implicit_instance_count, vkd3d_memory_order_relaxed) - 1;
}

/* Allow correlation against breadcrumb log. */
VKD3D_BREADCRUMB_TAG("Implicit instance (compute template)");
VKD3D_BREADCRUMB_AUX32(args.implicit_instance);

d3d12_command_allocator_allocate_init_post_indirect_command_buffer(list->allocator, list);
vk_patch_cmd_buffer = list->cmd.vk_init_commands_post_indirect_barrier;

Expand Down Expand Up @@ -13584,7 +13657,7 @@ static void d3d12_command_list_execute_indirect_state_template_dgc(
current_pipeline = list->current_pipeline;

memset(&patch_args, 0, sizeof(patch_args));
patch_args.debug_tag = 0; /* Modify to non-zero value as desired when debugging. */
patch_args.debug_tag = (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS_TRACE_INDIRECT) ? UINT32_MAX : 0;

/* If everything regarding alignment works out, we can just reuse the app indirect buffer instead. */
require_ibo_update = false;
Expand Down Expand Up @@ -13664,9 +13737,12 @@ static void d3d12_command_list_execute_indirect_state_template_dgc(
if (patch_args.debug_tag != 0)
{
/* Makes log easier to understand since a sorted log will appear in-order. */
static uint32_t vkd3d_implicit_instance_count;
patch_args.implicit_instance = vkd3d_atomic_uint32_increment(
&vkd3d_implicit_instance_count, vkd3d_memory_order_relaxed) - 1;
&list->device->debug_ring.implicit_instance_count, vkd3d_memory_order_relaxed) - 1;

/* Allow correlation against breadcrumb log. */
VKD3D_BREADCRUMB_TAG("Implicit instance (template)");
VKD3D_BREADCRUMB_AUX32(patch_args.implicit_instance);
}

d3d12_command_allocator_allocate_init_post_indirect_command_buffer(list->allocator, list);
Expand Down Expand Up @@ -14062,6 +14138,16 @@ static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(d3d12_command_l
return;
}

d3d12_command_list_end_transfer_batch(list);

if (sig_impl->debug_ring_pipeline.vk_pipeline)
{
d3d12_command_list_emit_execute_indirect_debug_ring(list, sig_impl,
arg_impl->res.va + arg_buffer_offset,
count_impl ? count_impl->res.va + count_buffer_offset : 0,
max_command_count);
}

/* Temporary workaround, since we cannot parse non-draw arguments yet. Point directly
* to the first argument. Should avoid hard crashes for now. */
arg_buffer_offset += sig_impl->argument_buffer_offset_for_command;
Expand Down Expand Up @@ -14135,7 +14221,6 @@ static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(d3d12_command_l
scratch.va = arg_impl->res.va + arg_buffer_offset;
}

d3d12_command_list_end_transfer_batch(list);
switch (arg_desc->Type)
{
case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW:
Expand Down Expand Up @@ -20488,6 +20573,13 @@ static HRESULT d3d12_command_signature_init_state_template_dgc_nv(struct d3d12_c
VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_HI,
};

static const enum vkd3d_patch_command_token dispatch_types[] =
{
VKD3D_PATCH_COMMAND_TOKEN_COPY_DISPATCH_X,
VKD3D_PATCH_COMMAND_TOKEN_COPY_DISPATCH_Y,
VKD3D_PATCH_COMMAND_TOKEN_COPY_DISPATCH_Z,
};

static const VkIndexType vk_index_types[] = { VK_INDEX_TYPE_UINT32, VK_INDEX_TYPE_UINT16 };
static const uint32_t d3d_index_types[] = { DXGI_FORMAT_R32_UINT, DXGI_FORMAT_R16_UINT };

Expand Down Expand Up @@ -20661,9 +20753,8 @@ static HRESULT d3d12_command_signature_init_state_template_dgc_nv(struct d3d12_c
token.offset = stream_stride;
stream_stride += sizeof(VkDispatchIndirectCommand);
dst_word_offset = token.offset / sizeof(uint32_t);
/* TODO: Rebase on top of debug-ring-indirect. */
generic_u32_copy_count = 0;
generic_u32_copy_types = NULL;
generic_u32_copy_count = ARRAY_SIZE(dispatch_types);
generic_u32_copy_types = dispatch_types;
break;

default:
Expand Down Expand Up @@ -21244,6 +21335,15 @@ HRESULT d3d12_command_signature_create(struct d3d12_device *device, struct d3d12
* for optimal reordering. */
vkd3d_atomic_uint32_store_explicit(&device->device_has_dgc_templates, 1, vkd3d_memory_order_relaxed);
}
else
{
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS_TRACE_INDIRECT)
{
vkd3d_meta_get_execute_indirect_debug_ring_pipeline(&device->meta_ops,
signature_size / sizeof(uint32_t),
&object->debug_ring_pipeline);
}
}

object->argument_buffer_offset_for_command = argument_buffer_offset;
d3d_destruction_notifier_init(&object->destruction_notifier, (IUnknown*)&object->ID3D12CommandSignature_iface);
Expand Down
100 changes: 98 additions & 2 deletions libs/vkd3d/debug_ring.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ static const char *vkd3d_patch_command_token_str(enum vkd3d_patch_command_token
case VKD3D_PATCH_COMMAND_TOKEN_COPY_MESH_TASKS_X: return "Mesh Tasks (X)";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_MESH_TASKS_Y: return "Mesh Tasks (Y)";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_MESH_TASKS_Z: return "Mesh Tasks (Z)";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_DISPATCH_X: return "X";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_DISPATCH_Y: return "Y";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_DISPATCH_Z: return "Z";
default: return "???";
}
}
Expand All @@ -107,6 +110,29 @@ static bool vkd3d_patch_command_token_is_hex(enum vkd3d_patch_command_token toke
}
}

static const char *vkd3d_debug_tag_to_str(uint32_t value)
{
switch (value)
{
case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW:
return "Draw";
case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED:
return "DrawIndexed";
case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH:
return "Dispatch";
case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH_MESH:
return "Mesh";
case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH_RAYS:
return "RayGen";
case UINT32_MAX:
return "Template";
default:
break;
}

return "???";
}

static bool vkd3d_shader_debug_ring_print_message(struct vkd3d_shader_debug_ring *ring,
uint32_t word_offset, uint32_t message_word_count)
{
Expand Down Expand Up @@ -136,8 +162,8 @@ static bool vkd3d_shader_debug_ring_print_message(struct vkd3d_shader_debug_ring
* Make sure the log is sortable for easier debug.
* TODO: Might consider a callback system that listeners from different subsystems can listen to and print their own messages,
* but that is overengineering at this time ... */
snprintf(message_buffer, sizeof(message_buffer), "ExecuteIndirect: GlobalCommandIndex %010u, Debug tag %010u, DrawID %04u (ThreadID %04u): ",
debug_instance, debug_thread_id[0], debug_thread_id[1], debug_thread_id[2]);
snprintf(message_buffer, sizeof(message_buffer), "ExecuteIndirect: GlobalCommandIndex %010u, %s, DrawID %04u (ThreadID %04u): ",
debug_instance, vkd3d_debug_tag_to_str(debug_thread_id[0]), debug_thread_id[1], debug_thread_id[2]);

if (message_word_count == 2)
{
Expand All @@ -147,6 +173,76 @@ static bool vkd3d_shader_debug_ring_print_message(struct vkd3d_shader_debug_ring
READ_RING_WORD(word_offset + 0),
READ_RING_WORD(word_offset + 1));
}
else if (message_word_count == 3)
{
static const enum vkd3d_patch_command_token draw_types[] =
{
VKD3D_PATCH_COMMAND_TOKEN_COPY_VERTEX_COUNT,
VKD3D_PATCH_COMMAND_TOKEN_COPY_INSTANCE_COUNT,
VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_VERTEX,
VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_INSTANCE,
};

static const enum vkd3d_patch_command_token draw_indexed_types[] =
{
VKD3D_PATCH_COMMAND_TOKEN_COPY_INDEX_COUNT,
VKD3D_PATCH_COMMAND_TOKEN_COPY_INSTANCE_COUNT,
VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_INDEX,
VKD3D_PATCH_COMMAND_TOKEN_COPY_VERTEX_OFFSET,
VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_INSTANCE,
};

static const enum vkd3d_patch_command_token dispatch_types[] =
{
VKD3D_PATCH_COMMAND_TOKEN_COPY_DISPATCH_X,
VKD3D_PATCH_COMMAND_TOKEN_COPY_DISPATCH_Y,
VKD3D_PATCH_COMMAND_TOKEN_COPY_DISPATCH_Z,
};

static const enum vkd3d_patch_command_token mesh_types[] =
{
VKD3D_PATCH_COMMAND_TOKEN_COPY_MESH_TASKS_X,
VKD3D_PATCH_COMMAND_TOKEN_COPY_MESH_TASKS_Y,
VKD3D_PATCH_COMMAND_TOKEN_COPY_MESH_TASKS_Z,
};

const char *tag_str = "?";
uint32_t value, index;

len = strlen(message_buffer);
avail = sizeof(message_buffer) - len;
/* word 0 is a dummy value. */
index = READ_RING_WORD(word_offset + 1);
value = READ_RING_WORD(word_offset + 2);

switch (debug_thread_id[0])
{
case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW:
if (index < ARRAY_SIZE(draw_types))
tag_str = vkd3d_patch_command_token_str(draw_types[index]);
break;

case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED:
if (index < ARRAY_SIZE(draw_indexed_types))
tag_str = vkd3d_patch_command_token_str(draw_indexed_types[index]);
break;

case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH:
if (index < ARRAY_SIZE(dispatch_types))
tag_str = vkd3d_patch_command_token_str(dispatch_types[index]);
break;

case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH_MESH:
if (index < ARRAY_SIZE(mesh_types))
tag_str = vkd3d_patch_command_token_str(mesh_types[index]);
break;

default:
break;
}

snprintf(message_buffer + len, avail, "%s <- %u", tag_str, value);
}
else if (message_word_count == 4)
{
union { uint32_t u32; float f32; int32_t s32; } value;
Expand Down
13 changes: 7 additions & 6 deletions libs/vkd3d/device.c
Original file line number Diff line number Diff line change
Expand Up @@ -992,6 +992,7 @@ static const struct vkd3d_debug_option vkd3d_config_options[] =
{"preallocate_srv_mip_clamps", VKD3D_CONFIG_FLAG_PREALLOCATE_SRV_MIP_CLAMPS},
{"force_initial_transition", VKD3D_CONFIG_FLAG_FORCE_INITIAL_TRANSITION},
{"breadcrumbs_trace", VKD3D_CONFIG_FLAG_BREADCRUMBS | VKD3D_CONFIG_FLAG_BREADCRUMBS_TRACE},
{"breadcrumbs_trace_indirect", VKD3D_CONFIG_FLAG_BREADCRUMBS | VKD3D_CONFIG_FLAG_BREADCRUMBS_TRACE_INDIRECT},
{"requires_compute_indirect_templates", VKD3D_CONFIG_FLAG_REQUIRES_COMPUTE_INDIRECT_TEMPLATES},
{"skip_driver_workarounds", VKD3D_CONFIG_FLAG_SKIP_DRIVER_WORKAROUNDS},
{"enable_experimental_features", VKD3D_CONFIG_FLAG_ENABLE_EXPERIMENTAL_FEATURES},
Expand Down Expand Up @@ -9138,14 +9139,14 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
if (FAILED(hr = d3d12_device_create_sparse_init_timeline(device)))
goto out_cleanup_sampler_state;

if (FAILED(hr = vkd3d_meta_ops_init(&device->meta_ops, device)))
if (FAILED(hr = vkd3d_shader_debug_ring_init(&device->debug_ring, device)))
goto out_cleanup_sparse_timeline;

if (FAILED(hr = vkd3d_shader_debug_ring_init(&device->debug_ring, device)))
goto out_cleanup_meta_ops;
if (FAILED(hr = vkd3d_meta_ops_init(&device->meta_ops, device)))
goto out_cleanup_debug_ring;

if (FAILED(hr = vkd3d_queue_timeline_trace_init(&device->queue_timeline_trace, device)))
goto out_cleanup_debug_ring;
goto out_cleanup_meta_ops;

if (FAILED(hr = vkd3d_address_binding_tracker_init(&device->address_binding_tracker, device)))
goto out_cleanup_queue_timeline_trace;
Expand Down Expand Up @@ -9215,10 +9216,10 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
vkd3d_address_binding_tracker_cleanup(&device->address_binding_tracker, device);
out_cleanup_queue_timeline_trace:
vkd3d_queue_timeline_trace_cleanup(&device->queue_timeline_trace);
out_cleanup_debug_ring:
vkd3d_shader_debug_ring_cleanup(&device->debug_ring, device);
out_cleanup_meta_ops:
vkd3d_meta_ops_cleanup(&device->meta_ops, device);
out_cleanup_debug_ring:
vkd3d_shader_debug_ring_cleanup(&device->debug_ring, device);
out_cleanup_sparse_timeline:
vk_procs = &device->vk_procs;
VK_CALL(vkDestroySemaphore(device->vk_device, device->sparse_init_timeline, NULL));
Expand Down
5 changes: 4 additions & 1 deletion libs/vkd3d/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ vkd3d_shaders =[
'shaders/vs_swapchain_fullscreen.vert',
'shaders/fs_swapchain_fullscreen.frag',
'shaders/cs_execute_indirect_patch.comp',
'shaders/cs_execute_indirect_patch_debug_ring.comp',
'shaders/cs_execute_indirect_multi_dispatch.comp',
'shaders/cs_execute_indirect_multi_dispatch_state.comp',

Expand All @@ -60,6 +59,10 @@ vkd3d_shaders =[
'shaders/cs_workgraph_distribute_payload_offsets.comp',
'shaders/cs_workgraph_complete_compaction.comp',
'shaders/cs_workgraph_setup_gpu_input.comp',

'shaders/cs_execute_indirect_debug_ring.comp',
'shaders/cs_execute_indirect_patch_debug_ring.comp',
'shaders/cs_execute_indirect_multi_dispatch_state_debug_ring.comp',
]

vkd3d_src = [
Expand Down
Loading