diff --git a/include/vkd3d.h b/include/vkd3d.h index 80267177da..d99615df17 100644 --- a/include/vkd3d.h +++ b/include/vkd3d.h @@ -86,7 +86,7 @@ extern "C" { #define VKD3D_CONFIG_FLAG_FORCE_RAW_VA_CBV (1ull << 28) #define VKD3D_CONFIG_FLAG_ZERO_MEMORY_WORKAROUNDS_COMMITTED_BUFFER_UAV (1ull << 29) #define VKD3D_CONFIG_FLAG_ALLOW_SBT_COLLECTION (1ull << 30) -/* Bit 31 is vacant */ +#define VKD3D_CONFIG_FLAG_BREADCRUMBS_TRACE_INDIRECT (1ull << 31) #define VKD3D_CONFIG_FLAG_USE_HOST_IMPORT_FALLBACK (1ull << 32) #define VKD3D_CONFIG_FLAG_PREALLOCATE_SRV_MIP_CLAMPS (1ull << 33) #define VKD3D_CONFIG_FLAG_FORCE_INITIAL_TRANSITION (1ull << 34) diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index e396765a8e..dadfef8083 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -6994,6 +6994,68 @@ static bool d3d12_command_list_emit_multi_dispatch_indirect_count(struct d3d12_c return true; } +static void d3d12_command_list_emit_execute_indirect_debug_ring(struct d3d12_command_list *list, + struct d3d12_command_signature *signature, + VkDeviceAddress indirect_args, VkDeviceAddress count_arg, uint32_t max_commands) +{ + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + struct vkd3d_execute_indirect_debug_ring_args args; + VkCommandBuffer vk_patch_cmd_buffer; + VkMemoryBarrier2 vk_barrier; + VkDependencyInfo dep_info; + + memset(&args, 0, sizeof(args)); + args.api_buffer_va = indirect_args; + args.indirect_count_va = count_arg; + args.api_buffer_word_stride = signature->desc.ByteStride / sizeof(uint32_t); + + if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS_TRACE_INDIRECT) + { + args.debug_tag = signature->desc.pArgumentDescs[signature->desc.NumArgumentDescs - 1].Type; + args.implicit_instance = vkd3d_atomic_uint32_increment( + &list->device->debug_ring.implicit_instance_count, vkd3d_memory_order_relaxed) - 1; + } + + /* Allow correlation against breadcrumb log. */ + VKD3D_BREADCRUMB_TAG("Implicit instance (plain)"); + VKD3D_BREADCRUMB_AUX32(args.implicit_instance); + + d3d12_command_allocator_allocate_init_post_indirect_command_buffer(list->allocator, list); + vk_patch_cmd_buffer = list->cmd.vk_init_commands_post_indirect_barrier; + + if (vk_patch_cmd_buffer == list->cmd.vk_command_buffer) + d3d12_command_list_end_current_render_pass(list, true); + + VK_CALL(vkCmdBindPipeline(vk_patch_cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, + signature->debug_ring_pipeline.vk_pipeline)); + VK_CALL(vkCmdPushConstants(vk_patch_cmd_buffer, + signature->debug_ring_pipeline.vk_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(args), &args)); + + VK_CALL(vkCmdDispatch(vk_patch_cmd_buffer, max_commands, 1, 1)); + + if (vk_patch_cmd_buffer == list->cmd.vk_command_buffer) + { + memset(&dep_info, 0, sizeof(dep_info)); + dep_info.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO; + dep_info.memoryBarrierCount = 1; + dep_info.pMemoryBarriers = &vk_barrier; + + memset(&vk_barrier, 0, sizeof(vk_barrier)); + vk_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2; + vk_barrier.srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT; + vk_barrier.srcAccessMask = 0; + vk_barrier.dstStageMask = VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT; + vk_barrier.dstAccessMask = 0; + VK_CALL(vkCmdPipelineBarrier2(vk_patch_cmd_buffer, &dep_info)); + + d3d12_command_list_invalidate_current_pipeline(list, true); + d3d12_command_list_invalidate_root_parameters(list, &list->compute_bindings, true, &list->graphics_bindings); + } + else + list->cmd.indirect_meta->need_compute_to_indirect_barrier = true; +} + static bool d3d12_command_list_emit_multi_dispatch_indirect_count_state(struct d3d12_command_list *list, struct d3d12_command_signature *signature, VkDeviceAddress indirect_args, @@ -7045,6 +7107,17 @@ static bool d3d12_command_list_emit_multi_dispatch_indirect_count_state(struct d args.stride_words = stride / sizeof(uint32_t); args.dispatch_offset_words = signature->state_template.compute.dispatch_offset_words; + if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS_TRACE_INDIRECT) + { + args.debug_tag = UINT32_MAX; + args.implicit_instance = vkd3d_atomic_uint32_increment( + &list->device->debug_ring.implicit_instance_count, vkd3d_memory_order_relaxed) - 1; + } + + /* Allow correlation against breadcrumb log. */ + VKD3D_BREADCRUMB_TAG("Implicit instance (compute template)"); + VKD3D_BREADCRUMB_AUX32(args.implicit_instance); + d3d12_command_allocator_allocate_init_post_indirect_command_buffer(list->allocator, list); vk_patch_cmd_buffer = list->cmd.vk_init_commands_post_indirect_barrier; @@ -13584,7 +13657,7 @@ static void d3d12_command_list_execute_indirect_state_template_dgc( current_pipeline = list->current_pipeline; memset(&patch_args, 0, sizeof(patch_args)); - patch_args.debug_tag = 0; /* Modify to non-zero value as desired when debugging. */ + patch_args.debug_tag = (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS_TRACE_INDIRECT) ? UINT32_MAX : 0; /* If everything regarding alignment works out, we can just reuse the app indirect buffer instead. */ require_ibo_update = false; @@ -13664,9 +13737,12 @@ static void d3d12_command_list_execute_indirect_state_template_dgc( if (patch_args.debug_tag != 0) { /* Makes log easier to understand since a sorted log will appear in-order. */ - static uint32_t vkd3d_implicit_instance_count; patch_args.implicit_instance = vkd3d_atomic_uint32_increment( - &vkd3d_implicit_instance_count, vkd3d_memory_order_relaxed) - 1; + &list->device->debug_ring.implicit_instance_count, vkd3d_memory_order_relaxed) - 1; + + /* Allow correlation against breadcrumb log. */ + VKD3D_BREADCRUMB_TAG("Implicit instance (template)"); + VKD3D_BREADCRUMB_AUX32(patch_args.implicit_instance); } d3d12_command_allocator_allocate_init_post_indirect_command_buffer(list->allocator, list); @@ -14062,6 +14138,16 @@ static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(d3d12_command_l return; } + d3d12_command_list_end_transfer_batch(list); + + if (sig_impl->debug_ring_pipeline.vk_pipeline) + { + d3d12_command_list_emit_execute_indirect_debug_ring(list, sig_impl, + arg_impl->res.va + arg_buffer_offset, + count_impl ? count_impl->res.va + count_buffer_offset : 0, + max_command_count); + } + /* Temporary workaround, since we cannot parse non-draw arguments yet. Point directly * to the first argument. Should avoid hard crashes for now. */ arg_buffer_offset += sig_impl->argument_buffer_offset_for_command; @@ -14135,7 +14221,6 @@ static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(d3d12_command_l scratch.va = arg_impl->res.va + arg_buffer_offset; } - d3d12_command_list_end_transfer_batch(list); switch (arg_desc->Type) { case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW: @@ -20488,6 +20573,13 @@ static HRESULT d3d12_command_signature_init_state_template_dgc_nv(struct d3d12_c VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_HI, }; + static const enum vkd3d_patch_command_token dispatch_types[] = + { + VKD3D_PATCH_COMMAND_TOKEN_COPY_DISPATCH_X, + VKD3D_PATCH_COMMAND_TOKEN_COPY_DISPATCH_Y, + VKD3D_PATCH_COMMAND_TOKEN_COPY_DISPATCH_Z, + }; + static const VkIndexType vk_index_types[] = { VK_INDEX_TYPE_UINT32, VK_INDEX_TYPE_UINT16 }; static const uint32_t d3d_index_types[] = { DXGI_FORMAT_R32_UINT, DXGI_FORMAT_R16_UINT }; @@ -20661,9 +20753,8 @@ static HRESULT d3d12_command_signature_init_state_template_dgc_nv(struct d3d12_c token.offset = stream_stride; stream_stride += sizeof(VkDispatchIndirectCommand); dst_word_offset = token.offset / sizeof(uint32_t); - /* TODO: Rebase on top of debug-ring-indirect. */ - generic_u32_copy_count = 0; - generic_u32_copy_types = NULL; + generic_u32_copy_count = ARRAY_SIZE(dispatch_types); + generic_u32_copy_types = dispatch_types; break; default: @@ -21244,6 +21335,15 @@ HRESULT d3d12_command_signature_create(struct d3d12_device *device, struct d3d12 * for optimal reordering. */ vkd3d_atomic_uint32_store_explicit(&device->device_has_dgc_templates, 1, vkd3d_memory_order_relaxed); } + else + { + if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS_TRACE_INDIRECT) + { + vkd3d_meta_get_execute_indirect_debug_ring_pipeline(&device->meta_ops, + signature_size / sizeof(uint32_t), + &object->debug_ring_pipeline); + } + } object->argument_buffer_offset_for_command = argument_buffer_offset; d3d_destruction_notifier_init(&object->destruction_notifier, (IUnknown*)&object->ID3D12CommandSignature_iface); diff --git a/libs/vkd3d/debug_ring.c b/libs/vkd3d/debug_ring.c index e9cb7101be..b2f93e73ac 100644 --- a/libs/vkd3d/debug_ring.c +++ b/libs/vkd3d/debug_ring.c @@ -86,6 +86,9 @@ static const char *vkd3d_patch_command_token_str(enum vkd3d_patch_command_token case VKD3D_PATCH_COMMAND_TOKEN_COPY_MESH_TASKS_X: return "Mesh Tasks (X)"; case VKD3D_PATCH_COMMAND_TOKEN_COPY_MESH_TASKS_Y: return "Mesh Tasks (Y)"; case VKD3D_PATCH_COMMAND_TOKEN_COPY_MESH_TASKS_Z: return "Mesh Tasks (Z)"; + case VKD3D_PATCH_COMMAND_TOKEN_COPY_DISPATCH_X: return "X"; + case VKD3D_PATCH_COMMAND_TOKEN_COPY_DISPATCH_Y: return "Y"; + case VKD3D_PATCH_COMMAND_TOKEN_COPY_DISPATCH_Z: return "Z"; default: return "???"; } } @@ -107,6 +110,29 @@ static bool vkd3d_patch_command_token_is_hex(enum vkd3d_patch_command_token toke } } +static const char *vkd3d_debug_tag_to_str(uint32_t value) +{ + switch (value) + { + case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW: + return "Draw"; + case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED: + return "DrawIndexed"; + case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH: + return "Dispatch"; + case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH_MESH: + return "Mesh"; + case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH_RAYS: + return "RayGen"; + case UINT32_MAX: + return "Template"; + default: + break; + } + + return "???"; +} + static bool vkd3d_shader_debug_ring_print_message(struct vkd3d_shader_debug_ring *ring, uint32_t word_offset, uint32_t message_word_count) { @@ -136,8 +162,8 @@ static bool vkd3d_shader_debug_ring_print_message(struct vkd3d_shader_debug_ring * Make sure the log is sortable for easier debug. * TODO: Might consider a callback system that listeners from different subsystems can listen to and print their own messages, * but that is overengineering at this time ... */ - snprintf(message_buffer, sizeof(message_buffer), "ExecuteIndirect: GlobalCommandIndex %010u, Debug tag %010u, DrawID %04u (ThreadID %04u): ", - debug_instance, debug_thread_id[0], debug_thread_id[1], debug_thread_id[2]); + snprintf(message_buffer, sizeof(message_buffer), "ExecuteIndirect: GlobalCommandIndex %010u, %s, DrawID %04u (ThreadID %04u): ", + debug_instance, vkd3d_debug_tag_to_str(debug_thread_id[0]), debug_thread_id[1], debug_thread_id[2]); if (message_word_count == 2) { @@ -147,6 +173,76 @@ static bool vkd3d_shader_debug_ring_print_message(struct vkd3d_shader_debug_ring READ_RING_WORD(word_offset + 0), READ_RING_WORD(word_offset + 1)); } + else if (message_word_count == 3) + { + static const enum vkd3d_patch_command_token draw_types[] = + { + VKD3D_PATCH_COMMAND_TOKEN_COPY_VERTEX_COUNT, + VKD3D_PATCH_COMMAND_TOKEN_COPY_INSTANCE_COUNT, + VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_VERTEX, + VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_INSTANCE, + }; + + static const enum vkd3d_patch_command_token draw_indexed_types[] = + { + VKD3D_PATCH_COMMAND_TOKEN_COPY_INDEX_COUNT, + VKD3D_PATCH_COMMAND_TOKEN_COPY_INSTANCE_COUNT, + VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_INDEX, + VKD3D_PATCH_COMMAND_TOKEN_COPY_VERTEX_OFFSET, + VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_INSTANCE, + }; + + static const enum vkd3d_patch_command_token dispatch_types[] = + { + VKD3D_PATCH_COMMAND_TOKEN_COPY_DISPATCH_X, + VKD3D_PATCH_COMMAND_TOKEN_COPY_DISPATCH_Y, + VKD3D_PATCH_COMMAND_TOKEN_COPY_DISPATCH_Z, + }; + + static const enum vkd3d_patch_command_token mesh_types[] = + { + VKD3D_PATCH_COMMAND_TOKEN_COPY_MESH_TASKS_X, + VKD3D_PATCH_COMMAND_TOKEN_COPY_MESH_TASKS_Y, + VKD3D_PATCH_COMMAND_TOKEN_COPY_MESH_TASKS_Z, + }; + + const char *tag_str = "?"; + uint32_t value, index; + + len = strlen(message_buffer); + avail = sizeof(message_buffer) - len; + /* word 0 is a dummy value. */ + index = READ_RING_WORD(word_offset + 1); + value = READ_RING_WORD(word_offset + 2); + + switch (debug_thread_id[0]) + { + case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW: + if (index < ARRAY_SIZE(draw_types)) + tag_str = vkd3d_patch_command_token_str(draw_types[index]); + break; + + case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED: + if (index < ARRAY_SIZE(draw_indexed_types)) + tag_str = vkd3d_patch_command_token_str(draw_indexed_types[index]); + break; + + case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH: + if (index < ARRAY_SIZE(dispatch_types)) + tag_str = vkd3d_patch_command_token_str(dispatch_types[index]); + break; + + case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH_MESH: + if (index < ARRAY_SIZE(mesh_types)) + tag_str = vkd3d_patch_command_token_str(mesh_types[index]); + break; + + default: + break; + } + + snprintf(message_buffer + len, avail, "%s <- %u", tag_str, value); + } else if (message_word_count == 4) { union { uint32_t u32; float f32; int32_t s32; } value; diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index c11b877bde..9a5b79b59c 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -992,6 +992,7 @@ static const struct vkd3d_debug_option vkd3d_config_options[] = {"preallocate_srv_mip_clamps", VKD3D_CONFIG_FLAG_PREALLOCATE_SRV_MIP_CLAMPS}, {"force_initial_transition", VKD3D_CONFIG_FLAG_FORCE_INITIAL_TRANSITION}, {"breadcrumbs_trace", VKD3D_CONFIG_FLAG_BREADCRUMBS | VKD3D_CONFIG_FLAG_BREADCRUMBS_TRACE}, + {"breadcrumbs_trace_indirect", VKD3D_CONFIG_FLAG_BREADCRUMBS | VKD3D_CONFIG_FLAG_BREADCRUMBS_TRACE_INDIRECT}, {"requires_compute_indirect_templates", VKD3D_CONFIG_FLAG_REQUIRES_COMPUTE_INDIRECT_TEMPLATES}, {"skip_driver_workarounds", VKD3D_CONFIG_FLAG_SKIP_DRIVER_WORKAROUNDS}, {"enable_experimental_features", VKD3D_CONFIG_FLAG_ENABLE_EXPERIMENTAL_FEATURES}, @@ -9138,14 +9139,14 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, if (FAILED(hr = d3d12_device_create_sparse_init_timeline(device))) goto out_cleanup_sampler_state; - if (FAILED(hr = vkd3d_meta_ops_init(&device->meta_ops, device))) + if (FAILED(hr = vkd3d_shader_debug_ring_init(&device->debug_ring, device))) goto out_cleanup_sparse_timeline; - if (FAILED(hr = vkd3d_shader_debug_ring_init(&device->debug_ring, device))) - goto out_cleanup_meta_ops; + if (FAILED(hr = vkd3d_meta_ops_init(&device->meta_ops, device))) + goto out_cleanup_debug_ring; if (FAILED(hr = vkd3d_queue_timeline_trace_init(&device->queue_timeline_trace, device))) - goto out_cleanup_debug_ring; + goto out_cleanup_meta_ops; if (FAILED(hr = vkd3d_address_binding_tracker_init(&device->address_binding_tracker, device))) goto out_cleanup_queue_timeline_trace; @@ -9215,10 +9216,10 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, vkd3d_address_binding_tracker_cleanup(&device->address_binding_tracker, device); out_cleanup_queue_timeline_trace: vkd3d_queue_timeline_trace_cleanup(&device->queue_timeline_trace); -out_cleanup_debug_ring: - vkd3d_shader_debug_ring_cleanup(&device->debug_ring, device); out_cleanup_meta_ops: vkd3d_meta_ops_cleanup(&device->meta_ops, device); +out_cleanup_debug_ring: + vkd3d_shader_debug_ring_cleanup(&device->debug_ring, device); out_cleanup_sparse_timeline: vk_procs = &device->vk_procs; VK_CALL(vkDestroySemaphore(device->vk_device, device->sparse_init_timeline, NULL)); diff --git a/libs/vkd3d/meson.build b/libs/vkd3d/meson.build index 9f43cc123d..02b04d477f 100644 --- a/libs/vkd3d/meson.build +++ b/libs/vkd3d/meson.build @@ -34,7 +34,6 @@ vkd3d_shaders =[ 'shaders/vs_swapchain_fullscreen.vert', 'shaders/fs_swapchain_fullscreen.frag', 'shaders/cs_execute_indirect_patch.comp', - 'shaders/cs_execute_indirect_patch_debug_ring.comp', 'shaders/cs_execute_indirect_multi_dispatch.comp', 'shaders/cs_execute_indirect_multi_dispatch_state.comp', @@ -60,6 +59,10 @@ vkd3d_shaders =[ 'shaders/cs_workgraph_distribute_payload_offsets.comp', 'shaders/cs_workgraph_complete_compaction.comp', 'shaders/cs_workgraph_setup_gpu_input.comp', + + 'shaders/cs_execute_indirect_debug_ring.comp', + 'shaders/cs_execute_indirect_patch_debug_ring.comp', + 'shaders/cs_execute_indirect_multi_dispatch_state_debug_ring.comp', ] vkd3d_src = [ diff --git a/libs/vkd3d/meta.c b/libs/vkd3d/meta.c index aa2542c0d7..f59e83c9f4 100644 --- a/libs/vkd3d/meta.c +++ b/libs/vkd3d/meta.c @@ -1536,8 +1536,11 @@ static HRESULT vkd3d_multi_dispatch_indirect_ops_init( struct vkd3d_multi_dispatch_indirect_ops *meta_multi_dispatch_indirect_ops, struct d3d12_device *device) { + struct vkd3d_shader_debug_ring_spec_info debug_ring_info; VkPushConstantRange push_constant_range; + const VkSpecializationInfo *spec; VkResult vr; + bool debug; memset(meta_multi_dispatch_indirect_ops, 0, sizeof(*meta_multi_dispatch_indirect_ops)); push_constant_range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; @@ -1554,6 +1557,20 @@ static HRESULT vkd3d_multi_dispatch_indirect_ops_init( &push_constant_range, &meta_multi_dispatch_indirect_ops->vk_multi_dispatch_indirect_state_layout)) < 0) goto fail; + debug = device->debug_ring.active && + !!(vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS_TRACE_INDIRECT); + + if (debug) + { + vkd3d_shader_debug_ring_init_spec_constant(device, &debug_ring_info, + 0 /* Reserve this hash for internal debug streams. */); + spec = &debug_ring_info.spec_info; + } + else + { + spec = NULL; + } + if ((vr = vkd3d_meta_create_compute_pipeline(device, sizeof(cs_execute_indirect_multi_dispatch), cs_execute_indirect_multi_dispatch, meta_multi_dispatch_indirect_ops->vk_multi_dispatch_indirect_layout, NULL, true, NULL, @@ -1561,8 +1578,9 @@ static HRESULT vkd3d_multi_dispatch_indirect_ops_init( goto fail; if ((vr = vkd3d_meta_create_compute_pipeline(device, - sizeof(cs_execute_indirect_multi_dispatch_state), cs_execute_indirect_multi_dispatch_state, - meta_multi_dispatch_indirect_ops->vk_multi_dispatch_indirect_state_layout, NULL, true, NULL, + debug ? sizeof(cs_execute_indirect_multi_dispatch_state_debug_ring) : sizeof(cs_execute_indirect_multi_dispatch_state), + debug ? cs_execute_indirect_multi_dispatch_state_debug_ring : cs_execute_indirect_multi_dispatch_state, + meta_multi_dispatch_indirect_ops->vk_multi_dispatch_indirect_state_layout, spec, true, NULL, &meta_multi_dispatch_indirect_ops->vk_multi_dispatch_indirect_state_pipeline)) < 0) goto fail; @@ -1723,7 +1741,16 @@ static HRESULT vkd3d_execute_indirect_ops_init(struct vkd3d_execute_indirect_ops push_constant_range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; if ((vr = vkd3d_meta_create_pipeline_layout(device, 0, NULL, 1, - &push_constant_range, &meta_indirect_ops->vk_pipeline_layout)) < 0) + &push_constant_range, &meta_indirect_ops->vk_pipeline_layout_patch)) < 0) + { + pthread_mutex_destroy(&meta_indirect_ops->mutex); + return hresult_from_vk_result(vr); + } + + push_constant_range.size = sizeof(struct vkd3d_execute_indirect_debug_ring_args); + + if ((vr = vkd3d_meta_create_pipeline_layout(device, 0, NULL, 1, + &push_constant_range, &meta_indirect_ops->vk_pipeline_layout_debug_ring)) < 0) { pthread_mutex_destroy(&meta_indirect_ops->mutex); return hresult_from_vk_result(vr); @@ -1764,15 +1791,17 @@ HRESULT vkd3d_meta_get_execute_indirect_pipeline(struct vkd3d_meta_ops *meta_ops for (i = 0; i < meta_indirect_ops->pipelines_count; i++) { - if (meta_indirect_ops->pipelines[i].workgroup_size_x == patch_command_count) + if (meta_indirect_ops->pipelines[i].workgroup_size_x == patch_command_count && + !meta_indirect_ops->pipelines[i].pure_debug) { - info->vk_pipeline_layout = meta_indirect_ops->vk_pipeline_layout; + info->vk_pipeline_layout = meta_indirect_ops->vk_pipeline_layout_patch; info->vk_pipeline = meta_indirect_ops->pipelines[i].vk_pipeline; goto out; } } - debug = meta_ops->device->debug_ring.active; + debug = meta_ops->device->debug_ring.active && + !!(vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS_TRACE_INDIRECT); /* If we have debug ring, we can dump indirect command buffer data to the ring as well. * Vital for debugging broken execute indirect data with templates. */ @@ -1812,11 +1841,88 @@ HRESULT vkd3d_meta_get_execute_indirect_pipeline(struct vkd3d_meta_ops *meta_ops meta_indirect_ops->pipelines_count + 1, sizeof(*meta_indirect_ops->pipelines)); meta_indirect_ops->pipelines[meta_indirect_ops->pipelines_count].workgroup_size_x = patch_command_count; + meta_indirect_ops->pipelines[meta_indirect_ops->pipelines_count].pure_debug = false; vr = vkd3d_meta_create_compute_pipeline(meta_ops->device, debug ? sizeof(cs_execute_indirect_patch_debug_ring) : sizeof(cs_execute_indirect_patch), debug ? cs_execute_indirect_patch_debug_ring : cs_execute_indirect_patch, - meta_indirect_ops->vk_pipeline_layout, &spec, + meta_indirect_ops->vk_pipeline_layout_patch, &spec, + true, NULL, &meta_indirect_ops->pipelines[meta_indirect_ops->pipelines_count].vk_pipeline); + + if (vr) + { + hr = hresult_from_vk_result(vr); + goto out; + } + + info->vk_pipeline_layout = meta_indirect_ops->vk_pipeline_layout_patch; + info->vk_pipeline = meta_indirect_ops->pipelines[meta_indirect_ops->pipelines_count].vk_pipeline; + meta_indirect_ops->pipelines_count++; + +out: + pthread_mutex_unlock(&meta_indirect_ops->mutex); + return hr; +} + +HRESULT vkd3d_meta_get_execute_indirect_debug_ring_pipeline(struct vkd3d_meta_ops *meta_ops, + uint32_t patch_command_count, struct vkd3d_execute_indirect_info *info) +{ + struct vkd3d_meta_execute_indirect_spec_constant_data execute_indirect_spec_constants; + VkSpecializationMapEntry map_entry[VKD3D_SHADER_DEBUG_RING_SPEC_INFO_MAP_ENTRIES + 1]; + struct vkd3d_execute_indirect_ops *meta_indirect_ops = &meta_ops->execute_indirect; + struct vkd3d_shader_debug_ring_spec_info debug_ring_info; + + VkSpecializationInfo spec; + HRESULT hr = S_OK; + VkResult vr; + size_t i; + int rc; + + if ((rc = pthread_mutex_lock(&meta_indirect_ops->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return hresult_from_errno(rc); + } + + for (i = 0; i < meta_indirect_ops->pipelines_count; i++) + { + if (meta_indirect_ops->pipelines[i].workgroup_size_x == patch_command_count && + meta_indirect_ops->pipelines[i].pure_debug) + { + info->vk_pipeline_layout = meta_indirect_ops->vk_pipeline_layout_debug_ring; + info->vk_pipeline = meta_indirect_ops->pipelines[i].vk_pipeline; + goto out; + } + } + + vkd3d_shader_debug_ring_init_spec_constant(meta_ops->device, &debug_ring_info, + 0 /* Reserve this hash for internal debug streams. */); + + memset(&execute_indirect_spec_constants, 0, sizeof(execute_indirect_spec_constants)); + execute_indirect_spec_constants.constants = debug_ring_info.constants; + execute_indirect_spec_constants.workgroup_size_x = patch_command_count; + + memcpy(map_entry, debug_ring_info.map_entries, sizeof(debug_ring_info.map_entries)); + map_entry[VKD3D_SHADER_DEBUG_RING_SPEC_INFO_MAP_ENTRIES].constantID = 4; + map_entry[VKD3D_SHADER_DEBUG_RING_SPEC_INFO_MAP_ENTRIES].offset = + offsetof(struct vkd3d_meta_execute_indirect_spec_constant_data, workgroup_size_x); + map_entry[VKD3D_SHADER_DEBUG_RING_SPEC_INFO_MAP_ENTRIES].size = sizeof(patch_command_count); + + spec.pMapEntries = map_entry; + spec.pData = &execute_indirect_spec_constants; + spec.mapEntryCount = ARRAY_SIZE(map_entry); + spec.dataSize = sizeof(execute_indirect_spec_constants); + + vkd3d_array_reserve((void**)&meta_indirect_ops->pipelines, &meta_indirect_ops->pipelines_size, + meta_indirect_ops->pipelines_count + 1, sizeof(*meta_indirect_ops->pipelines)); + + meta_indirect_ops->pipelines[meta_indirect_ops->pipelines_count].workgroup_size_x = patch_command_count; + meta_indirect_ops->pipelines[meta_indirect_ops->pipelines_count].pure_debug = true; + + vr = vkd3d_meta_create_compute_pipeline(meta_ops->device, + sizeof(cs_execute_indirect_debug_ring), + cs_execute_indirect_debug_ring, + meta_indirect_ops->vk_pipeline_layout_debug_ring, &spec, true, NULL, &meta_indirect_ops->pipelines[meta_indirect_ops->pipelines_count].vk_pipeline); if (vr) @@ -1825,7 +1931,7 @@ HRESULT vkd3d_meta_get_execute_indirect_pipeline(struct vkd3d_meta_ops *meta_ops goto out; } - info->vk_pipeline_layout = meta_indirect_ops->vk_pipeline_layout; + info->vk_pipeline_layout = meta_indirect_ops->vk_pipeline_layout_debug_ring; info->vk_pipeline = meta_indirect_ops->pipelines[meta_indirect_ops->pipelines_count].vk_pipeline; meta_indirect_ops->pipelines_count++; @@ -1842,7 +1948,8 @@ static void vkd3d_execute_indirect_ops_cleanup(struct vkd3d_execute_indirect_ops for (i = 0; i < meta_indirect_ops->pipelines_count; i++) VK_CALL(vkDestroyPipeline(device->vk_device, meta_indirect_ops->pipelines[i].vk_pipeline, NULL)); - VK_CALL(vkDestroyPipelineLayout(device->vk_device, meta_indirect_ops->vk_pipeline_layout, NULL)); + VK_CALL(vkDestroyPipelineLayout(device->vk_device, meta_indirect_ops->vk_pipeline_layout_patch, NULL)); + VK_CALL(vkDestroyPipelineLayout(device->vk_device, meta_indirect_ops->vk_pipeline_layout_debug_ring, NULL)); pthread_mutex_destroy(&meta_indirect_ops->mutex); vkd3d_free(meta_indirect_ops->pipelines); } diff --git a/libs/vkd3d/shaders/cs_execute_indirect_debug_ring.comp b/libs/vkd3d/shaders/cs_execute_indirect_debug_ring.comp new file mode 100644 index 0000000000..d2de296692 --- /dev/null +++ b/libs/vkd3d/shaders/cs_execute_indirect_debug_ring.comp @@ -0,0 +1,52 @@ +#version 450 +#extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference_uvec2 : require +#extension GL_GOOGLE_include_directive : require +#include "../../../include/shader-debug/debug_channel.h" + +layout(local_size_x_id = 4) in; + +layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer SrcBuffer { + uint values[]; +}; + +layout(buffer_reference, std430, buffer_reference_align = 4) writeonly buffer DstBuffer { + uint values[]; +}; + +layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer IndirectCount { + uint count; +}; + +layout(buffer_reference, std430, buffer_reference_align = 4) writeonly buffer IndirectCountWrite { + uint count; +}; + +layout(push_constant) uniform Registers +{ + SrcBuffer src_buffer_va; + uvec2 indirect_count_va; + uint src_stride; + uint debug_tag; + uint implicit_instance; +}; + +void main() +{ + DEBUG_CHANNEL_INIT_IMPLICIT_INSTANCE(uvec3(debug_tag, gl_WorkGroupID.x, gl_LocalInvocationIndex), implicit_instance); + + uint draw_id = gl_WorkGroupID.x; + uint max_draws = gl_NumWorkGroups.x; + if (any(notEqual(indirect_count_va, uvec2(0)))) + max_draws = min(max_draws, IndirectCount(indirect_count_va).count); + + if (gl_GlobalInvocationID.x == 0u) + DEBUG_CHANNEL_MSG(int(max_draws), int(gl_NumWorkGroups.x)); + + if (draw_id < max_draws) + { + uint src_offset = src_stride * draw_id + gl_LocalInvocationIndex; + uint src_value = src_buffer_va.values[src_offset]; + DEBUG_CHANNEL_MSG(uint(-1), gl_LocalInvocationIndex, src_value); + } +} diff --git a/libs/vkd3d/shaders/cs_execute_indirect_multi_dispatch_state_debug_ring.comp b/libs/vkd3d/shaders/cs_execute_indirect_multi_dispatch_state_debug_ring.comp new file mode 100644 index 0000000000..8400c40446 --- /dev/null +++ b/libs/vkd3d/shaders/cs_execute_indirect_multi_dispatch_state_debug_ring.comp @@ -0,0 +1,120 @@ +#version 450 +#extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference_uvec2 : require +#extension GL_GOOGLE_include_directive : require +#include "../../../include/shader-debug/debug_channel.h" + +// Always emit a full 256 byte payload for each UBO. We have to assume worst case alignment anyways, +// little point in trying to be smart about it. +layout(local_size_x = 64) in; + +layout(buffer_reference_align = 4, std430, buffer_reference) readonly buffer Indirect +{ + uint values[]; +}; + +layout(buffer_reference_align = 4, std430, buffer_reference) writeonly buffer IndirectOutput +{ + uint values[]; +}; + +layout(buffer_reference_align = 4, std430, buffer_reference) readonly buffer IndirectCount +{ + uint value; +}; + +struct RootParameterBlock +{ + uint values[64]; +}; + +layout(buffer_reference_align = 16, std430, buffer_reference) writeonly buffer OutputRootParameters +{ + RootParameterBlock blocks[]; +}; + +layout(buffer_reference_align = 4, std430, buffer_reference) readonly buffer Template +{ + uint input_root_parameters[64]; + int source_word_offsets[64]; +}; + +layout(push_constant) uniform Registers +{ + Indirect indirect; + uvec2 count_va; + IndirectOutput out_dispatch; + OutputRootParameters out_root_parameters; + Template in_template; + uint stride_words; + uint dispatch_offset_words; + + // Debug metadata here + uint debug_tag; + uint implicit_instance; +}; + +void main() +{ + if (debug_tag != 0u) + DEBUG_CHANNEL_INIT_IMPLICIT_INSTANCE(uvec3(debug_tag, gl_WorkGroupID.x, gl_LocalInvocationIndex), implicit_instance); + + bool active_dispatch; + uint dispatch_count; + if (any(notEqual(count_va, uvec2(0)))) + { + dispatch_count = min(gl_NumWorkGroups.x, IndirectCount(count_va).value); + active_dispatch = gl_WorkGroupID.x < dispatch_count; + } + else + { + dispatch_count = gl_NumWorkGroups.x; + active_dispatch = true; + } + + if (debug_tag != 0u && gl_GlobalInvocationID.x == 0) + DEBUG_CHANNEL_MSG(int(dispatch_count), int(gl_NumWorkGroups.x)); + + uint input_offset_base = gl_WorkGroupID.x * stride_words; + + uint new_root_parameter; + int source_offset = in_template.source_word_offsets[gl_LocalInvocationIndex]; + if (source_offset >= 0) + { + new_root_parameter = indirect.values[input_offset_base + source_offset]; + + if (active_dispatch) + { + const uint COPY_CONST_U32 = 0u; + DEBUG_CHANNEL_MSG(COPY_CONST_U32, gl_LocalInvocationIndex, uint(source_offset), new_root_parameter); + } + } + else + new_root_parameter = in_template.input_root_parameters[gl_LocalInvocationIndex]; + + out_root_parameters.blocks[gl_WorkGroupID.x].values[gl_LocalInvocationIndex] = new_root_parameter; + + // TODO: We can change this constant to suit our needs if we need multi-indirect any kind of dispatch really. + // Spec constant is good for that scenario. + + const uint DISPATCH_NUM_WORDS = 3; + if (gl_LocalInvocationIndex < DISPATCH_NUM_WORDS) + { + uint input_value; + if (active_dispatch) + input_value = indirect.values[input_offset_base + dispatch_offset_words + gl_LocalInvocationIndex]; + else + input_value = 0u; + + if (active_dispatch) + { + const uint DISPATCH_X_TYPE = 18u; + DEBUG_CHANNEL_MSG( + DISPATCH_X_TYPE + gl_LocalInvocationIndex, gl_LocalInvocationIndex, + dispatch_offset_words + gl_LocalInvocationIndex, input_value); + } + + out_dispatch.values[DISPATCH_NUM_WORDS * gl_WorkGroupID.x + gl_LocalInvocationIndex] = input_value; + } +} + diff --git a/libs/vkd3d/shaders/cs_execute_indirect_patch_debug_ring.comp b/libs/vkd3d/shaders/cs_execute_indirect_patch_debug_ring.comp index 33e245a6ea..2c86b13fb5 100644 --- a/libs/vkd3d/shaders/cs_execute_indirect_patch_debug_ring.comp +++ b/libs/vkd3d/shaders/cs_execute_indirect_patch_debug_ring.comp @@ -65,8 +65,8 @@ void main() dst_indirect_count_va.count = max_draws; } - if (debug_tag != 0u && gl_WorkGroupID.x == 0) - DEBUG_CHANNEL_MSG_UNIFORM(int(max_draws), int(gl_NumWorkGroups.x)); + if (debug_tag != 0u && gl_GlobalInvocationID.x == 0) + DEBUG_CHANNEL_MSG(int(max_draws), int(gl_NumWorkGroups.x)); if (draw_id < max_draws) { diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 69a51b6022..e3707fcf50 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -3417,6 +3417,10 @@ enum vkd3d_patch_command_token VKD3D_PATCH_COMMAND_TOKEN_COPY_MESH_TASKS_X = 18, VKD3D_PATCH_COMMAND_TOKEN_COPY_MESH_TASKS_Y = 19, VKD3D_PATCH_COMMAND_TOKEN_COPY_MESH_TASKS_Z = 20, + VKD3D_PATCH_COMMAND_TOKEN_COPY_DISPATCH_X = 21, + VKD3D_PATCH_COMMAND_TOKEN_COPY_DISPATCH_Y = 22, + VKD3D_PATCH_COMMAND_TOKEN_COPY_DISPATCH_Z = 23, + VKD3D_PATCH_COMMAND_TOKEN_GENERIC_INDIRECT_ARG = -1, VKD3D_PATCH_COMMAND_INT_MAX = 0x7fffffff }; @@ -3452,6 +3456,7 @@ struct d3d12_command_signature } state_template; bool requires_state_template_dgc; bool requires_state_template; + struct vkd3d_execute_indirect_info debug_ring_pipeline; enum vkd3d_pipeline_type pipeline_type; struct d3d12_device *device; @@ -3498,6 +3503,7 @@ struct vkd3d_shader_debug_ring VkDeviceAddress atomic_device_address; size_t ring_size; size_t control_block_size; + uint32_t implicit_instance_count; pthread_t ring_thread; pthread_mutex_t ring_lock; @@ -4390,6 +4396,8 @@ struct vkd3d_multi_dispatch_indirect_state_args VkDeviceAddress root_parameter_template_va; uint32_t stride_words; uint32_t dispatch_offset_words; + uint32_t debug_tag; + uint32_t implicit_instance; }; struct vkd3d_multi_dispatch_indirect_ops @@ -4415,15 +4423,26 @@ struct vkd3d_execute_indirect_args uint32_t implicit_instance; }; +struct vkd3d_execute_indirect_debug_ring_args +{ + VkDeviceAddress api_buffer_va; + VkDeviceAddress indirect_count_va; + uint32_t api_buffer_word_stride; + uint32_t debug_tag; + uint32_t implicit_instance; +}; + struct vkd3d_execute_indirect_pipeline { VkPipeline vk_pipeline; uint32_t workgroup_size_x; + bool pure_debug; }; struct vkd3d_execute_indirect_ops { - VkPipelineLayout vk_pipeline_layout; + VkPipelineLayout vk_pipeline_layout_patch; + VkPipelineLayout vk_pipeline_layout_debug_ring; struct vkd3d_execute_indirect_pipeline *pipelines; size_t pipelines_count; size_t pipelines_size; @@ -4611,6 +4630,8 @@ static inline uint32_t vkd3d_meta_get_multi_dispatch_indirect_workgroup_size(voi HRESULT vkd3d_meta_get_execute_indirect_pipeline(struct vkd3d_meta_ops *meta_ops, uint32_t patch_command_count, struct vkd3d_execute_indirect_info *info); +HRESULT vkd3d_meta_get_execute_indirect_debug_ring_pipeline(struct vkd3d_meta_ops *meta_ops, + uint32_t patch_command_count, struct vkd3d_execute_indirect_info *info); void vkd3d_meta_get_sampler_feedback_resolve_pipeline(struct vkd3d_meta_ops *meta_ops, enum vkd3d_sampler_feedback_resolve_type type, struct vkd3d_sampler_feedback_resolve_info *info); diff --git a/libs/vkd3d/vkd3d_shaders.h b/libs/vkd3d/vkd3d_shaders.h index 29c2942ade..6dd0f66705 100644 --- a/libs/vkd3d/vkd3d_shaders.h +++ b/libs/vkd3d/vkd3d_shaders.h @@ -50,8 +50,10 @@ enum vkd3d_meta_copy_mode #include #include #include +#include #include #include +#include #include #include #include