From 3fc5f6349283dcfd3a053187a520b1007d6d9315 Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Mon, 7 Oct 2024 13:01:38 +0100 Subject: [PATCH] [SYCL][Graph] Dynamic command-groups Implement Dynamic Command-Group feature for https://github.com/intel/llvm/pull/14896 to enable updating `ur_kernel_handle_t` objects in graph nodes between executions. --- .../sycl/ext/oneapi/experimental/graph.hpp | 50 ++++++ sycl/include/sycl/handler.hpp | 1 + sycl/source/detail/graph_impl.cpp | 82 ++++++++- sycl/source/detail/graph_impl.hpp | 40 ++++- sycl/source/detail/scheduler/commands.cpp | 22 ++- sycl/source/detail/scheduler/commands.hpp | 1 + .../Graph/Update/dyn_cgf_accessor.cpp | 58 +++++++ .../Graph/Update/dyn_cgf_accessor_deps.cpp | 74 ++++++++ .../Graph/Update/dyn_cgf_event_deps.cpp | 73 ++++++++ .../test-e2e/Graph/Update/dyn_cgf_ndrange.cpp | 72 ++++++++ .../Graph/Update/dyn_cgf_overwrite.cpp | 59 +++++++ .../Graph/Update/dyn_cgf_parameters.cpp | 70 ++++++++ .../Graph/Update/dyn_cgf_shared_nodes.cpp | 72 ++++++++ .../Update/dyn_cgf_update_before_finalize.cpp | 48 ++++++ sycl/test-e2e/Graph/Update/dyn_cgf_usm.cpp | 55 ++++++ .../Extensions/CommandGraph/Exceptions.cpp | 158 ++++++++++++++++++ 16 files changed, 928 insertions(+), 7 deletions(-) create mode 100644 sycl/test-e2e/Graph/Update/dyn_cgf_accessor.cpp create mode 100644 sycl/test-e2e/Graph/Update/dyn_cgf_accessor_deps.cpp create mode 100644 sycl/test-e2e/Graph/Update/dyn_cgf_event_deps.cpp create mode 100644 sycl/test-e2e/Graph/Update/dyn_cgf_ndrange.cpp create mode 100644 sycl/test-e2e/Graph/Update/dyn_cgf_overwrite.cpp create mode 100644 sycl/test-e2e/Graph/Update/dyn_cgf_parameters.cpp create mode 100644 sycl/test-e2e/Graph/Update/dyn_cgf_shared_nodes.cpp create mode 100644 sycl/test-e2e/Graph/Update/dyn_cgf_update_before_finalize.cpp create mode 100644 sycl/test-e2e/Graph/Update/dyn_cgf_usm.cpp diff --git a/sycl/include/sycl/ext/oneapi/experimental/graph.hpp b/sycl/include/sycl/ext/oneapi/experimental/graph.hpp index e15d5ed5a6b7a..0e3cc834ee6f5 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/graph.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/graph.hpp @@ -93,6 +93,7 @@ class node_impl; class graph_impl; class exec_graph_impl; class dynamic_parameter_impl; +class dynamic_command_group_impl; } // namespace detail enum class node_type { @@ -213,6 +214,27 @@ class depends_on_all_leaves : public ::sycl::detail::DataLessProperty< } // namespace node } // namespace property +class __SYCL_EXPORT dynamic_command_group { +public: + dynamic_command_group( + const context &SyclContext, const device &SyclDevice, + const std::vector> &CGFList); + + dynamic_command_group( + const queue &SyclQueue, + const std::vector> &CGFList); + + size_t get_active_cgf() const; + void set_active_cgf(size_t Index); + +private: + template + friend const decltype(Obj::impl) & + sycl::detail::getSyclObjImpl(const Obj &SyclObject); + + std::shared_ptr impl; +}; + namespace detail { // Templateless modifiable command-graph base class. class __SYCL_EXPORT modifiable_command_graph { @@ -269,6 +291,28 @@ class __SYCL_EXPORT modifiable_command_graph { return Node; } + /// Add a Dynamic command-group node to the graph. + /// @param DynamicCG Dynamic command-group function to create node with. + /// @param PropList Property list used to pass [0..n] predecessor nodes. + /// @return Constructed node which has been added to the graph. + node add(dynamic_command_group &DynamicCG, + const property_list &PropList = {}) { + if (PropList.has_property()) { + auto Deps = PropList.get_property(); + node Node = addImpl(DynamicCG, Deps.get_dependencies()); + if (PropList.has_property()) { + addGraphLeafDependencies(Node); + } + return Node; + } + + node Node = addImpl(DynamicCG, {}); + if (PropList.has_property()) { + addGraphLeafDependencies(Node); + } + return Node; + } + /// Add a dependency between two nodes. /// @param Src Node which will be a dependency of \p Dest. /// @param Dest Node which will be dependent on \p Src. @@ -328,6 +372,12 @@ class __SYCL_EXPORT modifiable_command_graph { modifiable_command_graph(const std::shared_ptr &Impl) : impl(Impl) {} + /// Template-less implementation of add() for dynamic command-group nodes. + /// @param DynCGF Dynamic Command-group function object to add. + /// @param Dep List of predecessor nodes. + /// @return Node added to the graph. + node addImpl(dynamic_command_group &DynCGF, const std::vector &Dep); + /// Template-less implementation of add() for CGF nodes. /// @param CGF Command-group function to add. /// @param Dep List of predecessor nodes. diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 4f443a2103eb4..f3b015287ad1b 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -3376,6 +3376,7 @@ class __SYCL_EXPORT handler { size_t Size, bool Block = false); friend class ext::oneapi::experimental::detail::graph_impl; friend class ext::oneapi::experimental::detail::dynamic_parameter_impl; + friend class ext::oneapi::experimental::detail::dynamic_command_group_impl; bool DisableRangeRounding(); diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index 4df70fc78c1b1..9c70cf97f5cd1 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -700,10 +700,18 @@ exec_graph_impl::enqueueNodeDirect(sycl::context Ctx, StreamID, InstanceID, CmdTraceEvent, xpti::trace_task_begin, nullptr); #endif + std::vector KernelAlternatives{}; + if (Node->MDynCG) { + for (auto &CG : Node->MDynCG->MKernels) { + KernelAlternatives.push_back( + static_cast(CG.get())); + } + } + ur_result_t Res = sycl::detail::enqueueImpCommandBufferKernel( Ctx, DeviceImpl, CommandBuffer, *static_cast((Node->MCommandGroup.get())), - Deps, &NewSyncPoint, &NewCommand, nullptr); + KernelAlternatives, Deps, &NewSyncPoint, &NewCommand, nullptr); MCommandMap[Node] = NewCommand; @@ -1376,8 +1384,11 @@ void exec_graph_impl::updateImpl(std::shared_ptr Node) { auto DeviceImpl = sycl::detail::getSyclObjImpl(MGraphImpl->getDevice()); // Gather arg information from Node - auto &ExecCG = - *(static_cast(Node->MCommandGroup.get())); + sycl::detail::CG *NodeCG = (Node->MDynCG) + ? Node->MDynCG->getActiveKernel().get() + : Node->MCommandGroup.get(); + auto ExecCG = *(static_cast(NodeCG)); + // Copy args because we may modify them std::vector NodeArgs = ExecCG.getArguments(); // Copy NDR desc since we need to modify it @@ -1560,6 +1571,27 @@ modifiable_command_graph::modifiable_command_graph( : impl(std::make_shared( SyclQueue.get_context(), SyclQueue.get_device(), PropList)) {} +node modifiable_command_graph::addImpl(dynamic_command_group &DynCGF, + const std::vector &Deps) { + impl->throwIfGraphRecordingQueue("Explicit API \"Add()\" function"); + + std::vector> DepImpls; + for (auto &D : Deps) { + DepImpls.push_back(sycl::detail::getSyclObjImpl(D)); + } + + auto DynCGFImpl = sycl::detail::getSyclObjImpl(DynCGF); + const std::function &CGF = DynCGFImpl->getActiveCGF(); + + graph_impl::WriteLock Lock(impl->MMutex); + std::shared_ptr NodeImpl = impl->add(CGF, {}, DepImpls); + + // Track the dynamic command-group used inside the node object + NodeImpl->MDynCG = DynCGFImpl; + + return sycl::detail::createSyclObjFromImpl(NodeImpl); +} + node modifiable_command_graph::addImpl(const std::vector &Deps) { impl->throwIfGraphRecordingQueue("Explicit API \"Add()\" function"); std::vector> DepImpls; @@ -1760,6 +1792,31 @@ void dynamic_parameter_base::updateAccessor( impl->updateAccessor(Acc); } +dynamic_command_group_impl::dynamic_command_group_impl( + const context &Context, const device &Device, + const std::vector> &CGFList) + : MContext(Context), MDevice(Device), MActiveCGF(0), MCGFList(CGFList) { + + // Create a placeholder graph object so we can use it to construct a handler + // object to process the CGFs. + auto TmpGraph = std::make_shared(MContext, MDevice); + for (const auto &CGF : MCGFList) { + // Handler defined inside the loop so it doesn't appear to the runtime + // as a single command-group with multiple commands inside. + sycl::handler Handler{TmpGraph}; + CGF(Handler); + + if (Handler.getType() != sycl::detail::CGType::Kernel) { + throw sycl::exception( + make_error_code(errc::invalid), + "The only type of command-groups that can be used in " + "dynamic command-groups is kernels."); + } + + Handler.finalize(); + MKernels.push_back(std::move(Handler.impl->MGraphNodeCG)); + } +} } // namespace detail node_type node::get_type() const { return impl->MNodeType; } @@ -1798,6 +1855,25 @@ template <> __SYCL_EXPORT void node::update_range<2>(range<2> Range) { template <> __SYCL_EXPORT void node::update_range<3>(range<3> Range) { impl->updateRange(Range); } + +dynamic_command_group::dynamic_command_group( + const context &SyclContext, const device &SyclDevice, + const std::vector> &CGFList) + : impl(std::make_shared( + SyclContext, SyclDevice, CGFList)) {} + +dynamic_command_group::dynamic_command_group( + const queue &SyclQueue, + const std::vector> &CGFList) + : impl(std::make_shared( + SyclQueue.get_context(), SyclQueue.get_device(), CGFList)) {} + +size_t dynamic_command_group::get_active_cgf() const { + return impl->getActiveIndex(); +} +void dynamic_command_group::set_active_cgf(size_t Index) { + return impl->setActiveIndex(Index); +} } // namespace experimental } // namespace oneapi } // namespace ext diff --git a/sycl/source/detail/graph_impl.hpp b/sycl/source/detail/graph_impl.hpp index 4ee34830f39a2..5cc0166bfec20 100644 --- a/sycl/source/detail/graph_impl.hpp +++ b/sycl/source/detail/graph_impl.hpp @@ -99,6 +99,8 @@ class node_impl : public std::enable_shared_from_this { /// Stores the executable graph impl associated with this node if it is a /// subgraph node. std::shared_ptr MSubGraphImpl; + /// For Dynamic command-group nodes, stores the dynamic command-group object. + std::shared_ptr MDynCG; /// Used for tracking visited status during cycle checks. bool MVisited = false; @@ -160,7 +162,7 @@ class node_impl : public std::enable_shared_from_this { : enable_shared_from_this(Other), MSuccessors(Other.MSuccessors), MPredecessors(Other.MPredecessors), MCGType(Other.MCGType), MNodeType(Other.MNodeType), MCommandGroup(Other.getCGCopy()), - MSubGraphImpl(Other.MSubGraphImpl) {} + MSubGraphImpl(Other.MSubGraphImpl), MDynCG(Other.MDynCG) {} /// Copy-assignment operator. This will perform a deep-copy of the /// command group object associated with this node. @@ -172,6 +174,7 @@ class node_impl : public std::enable_shared_from_this { MNodeType = Other.MNodeType; MCommandGroup = Other.getCGCopy(); MSubGraphImpl = Other.MSubGraphImpl; + MDynCG = Other.MDynCG; } return *this; } @@ -1579,6 +1582,41 @@ class dynamic_parameter_impl { std::vector MValueStorage; }; +class dynamic_command_group_impl { +public: + dynamic_command_group_impl( + const sycl::context &Context, const sycl::device &Device, + const std::vector> &CGFList); + + size_t getActiveIndex() const { return MActiveCGF; } + void setActiveIndex(size_t Index) { + if (Index >= MCGFList.size()) { + throw sycl::exception(sycl::make_error_code(errc::invalid), + "Index is out of range."); + } + + MActiveCGF = Index; + } + + const std::function &getActiveCGF() const { + return MCGFList[MActiveCGF]; + } + + const std::unique_ptr &getActiveKernel() const { + return MKernels[MActiveCGF]; + } + + sycl::context MContext; // TODO - verify + sycl::device MDevice; // TODO - verify + size_t MActiveCGF; // TODO Thread safe? + + // List of CGFs. Initialized on creation of dynamic command-group object by + // copying by value the list of std::functions passed by the user. + const std::vector> MCGFList; + + /// List of kernel command-groups for dynamic command-group nodes + std::vector> MKernels; +}; } // namespace detail } // namespace experimental } // namespace oneapi diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 10d42792d87e8..9d5f2aef7f516 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2479,6 +2479,7 @@ ur_result_t enqueueImpCommandBufferKernel( context Ctx, DeviceImplPtr DeviceImpl, ur_exp_command_buffer_handle_t CommandBuffer, const CGExecKernel &CommandGroup, + const std::vector &AlternativeKernels, std::vector &SyncPoints, ur_exp_command_buffer_sync_point_t *OutSyncPoint, ur_exp_command_buffer_command_handle_t *OutCommand, @@ -2520,6 +2521,18 @@ ur_result_t enqueueImpCommandBufferKernel( ContextImpl, DeviceImpl, CommandGroup.MKernelName); } + // TODO - refactor this naive impl + std::vector AltK; + for (const auto &CGKernel : AlternativeKernels) { + ur_kernel_handle_t URK; + std::tie(URK, std::ignore, std::ignore, std::ignore) = + sycl::detail::ProgramManager::getInstance().getOrCreateKernel( + ContextImpl, DeviceImpl, CGKernel->MKernelName); + if (URK != UrKernel) { + AltK.push_back(URK); + } + } + auto SetFunc = [&Adapter, &UrKernel, &DeviceImageImpl, &Ctx, &getMemAllocationFunc](sycl::detail::ArgDesc &Arg, size_t NextTrueIndex) { @@ -2561,7 +2574,8 @@ ur_result_t enqueueImpCommandBufferKernel( ur_result_t Res = Adapter->call_nocheck( CommandBuffer, UrKernel, NDRDesc.Dims, &NDRDesc.GlobalOffset[0], - &NDRDesc.GlobalSize[0], LocalSize, 0, nullptr, SyncPoints.size(), + &NDRDesc.GlobalSize[0], LocalSize, AltK.size(), + AltK.size() ? AltK.data() : nullptr, SyncPoints.size(), SyncPoints.size() ? SyncPoints.data() : nullptr, OutSyncPoint, OutCommand); @@ -2777,10 +2791,12 @@ ur_result_t ExecCGCommand::enqueueImpCommandBuffer() { return AllocaCmd->getMemAllocation(); }; + // TODO + std::vector AlternativeKernels{}; auto result = enqueueImpCommandBufferKernel( MQueue->get_context(), MQueue->getDeviceImplPtr(), MCommandBuffer, - *ExecKernel, MSyncPointDeps, &OutSyncPoint, &OutCommand, - getMemAllocationFunc); + *ExecKernel, AlternativeKernels, MSyncPointDeps, &OutSyncPoint, + &OutCommand, getMemAllocationFunc); MEvent->setSyncPoint(OutSyncPoint); MEvent->setCommandBufferCommand(OutCommand); return result; diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 1aecf5ed4eabb..5f020e5650226 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -733,6 +733,7 @@ ur_result_t enqueueImpCommandBufferKernel( context Ctx, DeviceImplPtr DeviceImpl, ur_exp_command_buffer_handle_t CommandBuffer, const CGExecKernel &CommandGroup, + const std::vector &AlternativeKernels, std::vector &SyncPoints, ur_exp_command_buffer_sync_point_t *OutSyncPoint, ur_exp_command_buffer_command_handle_t *OutCommand, diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_accessor.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_accessor.cpp new file mode 100644 index 0000000000000..84de4f6f97029 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_accessor.cpp @@ -0,0 +1,58 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// + +// XFAIL: level_zero + +// Tests using dynamic command-group objects with buffer accessors + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + const size_t N = 1024; + std::vector HostData(N, 0); + buffer Buf{HostData}; + Buf.set_write_back(false); + auto Acc = Buf.get_access(); + + exp_ext::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {exp_ext::property::graph::assume_buffer_outlives_graph{}}}; + + int PatternA = 42; + auto CGFA = [&](handler &CGH) { + CGH.parallel_for(N, [=](item<1> Item) { Acc[Item.get_id()] = PatternA; }); + }; + + int PatternB = 0xA; + auto CGFB = [&](handler &CGH) { + CGH.parallel_for(N, [=](item<1> Item) { Acc[Item.get_id()] = PatternB; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Queue, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Acc, HostData.data()).wait(); + for (size_t i = 0; i < N; i++) { + assert(HostData[i] == PatternA); + } + + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(Acc, HostData.data()).wait(); + for (size_t i = 0; i < N; i++) { + assert(HostData[i] == PatternB); + } + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_deps.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_deps.cpp new file mode 100644 index 0000000000000..9ccb4c46cf70b --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_deps.cpp @@ -0,0 +1,74 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// + +// XFAIL: level_zero + +// Tests adding a dynamic command-group node to a graph using buffer +// accessors for the node edges. + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + const size_t N = 1024; + int *Ptr = (int *)sycl::malloc_device(N, Queue); + std::vector HostData(N, 0); + buffer Buf{HostData}; + Buf.set_write_back(false); + + exp_ext::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {exp_ext::property::graph::assume_buffer_outlives_graph{}}}; + + auto RootNode = Graph.add([&](handler &CGH) { + auto Acc = Buf.get_access(CGH); + CGH.parallel_for(N, [=](item<1> Item) { Acc[Item.get_id()] = 1; }); + }); + + int PatternA = 42; + auto CGFA = [&](handler &CGH) { + auto Acc = Buf.get_access(CGH); + CGH.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] += PatternA; }); + }; + + int PatternB = 0xA; + auto CGFB = [&](handler &CGH) { + auto Acc = Buf.get_access(CGH); + CGH.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] += PatternB; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Queue, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + + auto LeafNode = Graph.add([&](handler &CGH) { + auto Acc = Buf.get_access(CGH); + CGH.parallel_for( + N, [=](item<1> Item) { Ptr[Item.get_id()] = Acc[Item.get_id()]; }); + }); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), N).wait(); + for (size_t i = 0; i < N; i++) { + assert(HostData[i] == (PatternA + 1)); + } + + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), N).wait(); + for (size_t i = 0; i < N; i++) { + assert(HostData[i] == (PatternB + 1)); + } + + sycl::free(Ptr, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_event_deps.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_event_deps.cpp new file mode 100644 index 0000000000000..f0f7dda3e3416 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_event_deps.cpp @@ -0,0 +1,73 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// + +// XFAIL: level_zero + +// Tests adding a dynamic command-group node to a graph using graph limited +// events for dependencies. + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + const size_t N = 1024; + int *PtrA = malloc_device(N, Queue); + int *PtrB = malloc_device(N, Queue); + int *PtrC = malloc_device(N, Queue); + std::vector HostData(N); + + Graph.begin_recording(Queue); + int PatternA = 42; + auto EventA = Queue.fill(PtrA, PatternA, N); + int PatternB = 0xA; + auto EventB = Queue.fill(PtrA, PatternB, N); + Graph.end_recording(Queue); + + auto CGFA = [&](handler &CGH) { + CGH.depends_on({EventA, EventB}); + CGH.parallel_for(N, [=](item<1> Item) { + auto I = Item.get_id(); + PtrC[I] = PtrA[I] * PtrB[I]; + }); + }; + + auto CGFB = [&](handler &CGH) { + CGH.depends_on({EventA, EventB}); + CGH.parallel_for(N, [=](item<1> Item) { + auto I = Item.get_id(); + PtrC[I] = PtrA[I] + PtrB[I]; + }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Queue, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(PtrC, HostData.data(), N).wait(); + for (size_t i = 0; i < N; i++) { + assert(HostData[i] == PatternA * PatternB); + } + + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(PtrC, HostData.data(), N).wait(); + for (size_t i = 0; i < N; i++) { + assert(HostData[i] == PatternA + PatternB); + } + + sycl::free(PtrA, Queue); + sycl::free(PtrB, Queue); + sycl::free(PtrC, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_ndrange.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_ndrange.cpp new file mode 100644 index 0000000000000..a72a20558d24f --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_ndrange.cpp @@ -0,0 +1,72 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// + +// XFAIL: level_zero + +// Tests updating a dynamic command-group node where the dynamic command-groups +// have different nd-ranges + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + const size_t N = 1024; + int *Ptr = malloc_device(N, Queue); + std::vector HostData(N); + + auto RootNode = Graph.add([&](handler &cgh) { cgh.memset(Ptr, N, 0); }); + + int PatternA = 42; + sycl::range<1> RangeA{512}; + auto CGFA = [&](handler &CGH) { + CGH.parallel_for(RangeA, + [=](item<1> Item) { Ptr[Item.get_id()] = PatternA; }); + }; + + int PatternB = 0xA; + sycl::range<1> RangeB{256}; + auto CGFB = [&](handler &CGH) { + CGH.parallel_for(RangeB, + [=](item<1> Item) { Ptr[Item.get_id()] = PatternB; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Queue, {CGFA, CGFB}); + auto DynamicCGNode = + Graph.add(DynamicCG, exp_ext::property::node::depends_on(RootNode)); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), N).wait(); + for (size_t i = 0; i < N; i++) { + if (i < RangeA.get(0)) { + assert(HostData[i] == PatternA); + } else { + assert(HostData[i] == 0); + } + } + + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), N).wait(); + for (size_t i = 0; i < N; i++) { + if (i < RangeB.get(0)) { + assert(HostData[i] == PatternB); + } else if (i < RangeA.get(0)) { + assert(HostData[i] == PatternA); + } else { + assert(HostData[i] == 0); + } + } + + sycl::free(Ptr, Queue); + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_overwrite.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_overwrite.cpp new file mode 100644 index 0000000000000..479e7be5ca7e0 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_overwrite.cpp @@ -0,0 +1,59 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// + +// XFAIL: level_zero + +// Tests how the nd-range of a node is preserved when it is updated alongside +// the active command-group + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + const size_t N = 1024; + std::vector HostData(N); + int *Ptr = malloc_device(N, Queue); + Queue.memset(Ptr, N, 0).wait(); + + int PatternA = 42; + auto CGFA = [&](handler &CGH) { + CGH.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] = PatternA; }); + }; + + int PatternB = 0xA; + auto CGFB = [&](handler &CGH) { + CGH.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] = PatternB; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Queue, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + + sycl::range<1> UpdateRange(512); + DynamicCGNode.update_range(UpdateRange); + + DynamicCG.set_active_cgf(1); + + // Check if UpdateRange is still used + DynamicCG.set_active_cgf(0); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), N).wait(); + for (size_t i = 0; i < N; i++) { + if (i < UpdateRange.get(0)) { + assert(HostData[i] == PatternA); + } else { + assert(HostData[i] == 0); + } + } + + sycl::free(Ptr, Queue); + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_parameters.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_parameters.cpp new file mode 100644 index 0000000000000..7709b78cceab2 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_parameters.cpp @@ -0,0 +1,70 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// + +// XFAIL: level_zero + +// Tests updating kernel code using dynamic command-groups that have different +// parameters in each command-group. + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + const size_t N = 1024; + int *PtrA = malloc_device(N, Queue); + int *PtrB = malloc_device(N, Queue); + + std::vector HostDataA(N); + std::vector HostDataB(N); + + Queue.memset(PtrA, N, 0); + Queue.memset(PtrB, N, 0); + Queue.wait(); + + int PatternA = 0xA; + auto CGFA = [&](handler &CGH) { + CGH.parallel_for(N, [=](item<1> Item) { PtrA[Item.get_id()] = PatternA; }); + }; + + int PatternB = 42; + auto CGFB = [&](handler &CGH) { + CGH.parallel_for(N, [=](item<1> Item) { PtrB[Item.get_id()] = PatternB; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Queue, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(PtrA, HostDataA.data(), N); + Queue.copy(PtrB, HostDataB.data(), N); + Queue.wait(); + for (size_t i = 0; i < N; i++) { + assert(HostDataA[i] == PatternA); + assert(HostDataB[i] == 0); + } + + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(PtrA, HostDataA.data(), N); + Queue.copy(PtrB, HostDataB.data(), N); + Queue.wait(); + for (size_t i = 0; i < N; i++) { + assert(HostDataA[i] == PatternA); + assert(HostDataB[i] == PatternB); + } + + sycl::free(PtrA, Queue); + sycl::free(PtrB, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_shared_nodes.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_shared_nodes.cpp new file mode 100644 index 0000000000000..065e9266ff107 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_shared_nodes.cpp @@ -0,0 +1,72 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// + +// XFAIL: level_zero + +// Tests using the same dynamic command-group in more than one graph node. + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + const size_t N = 1024; + std::vector HostData(N); + int *Ptr = malloc_device(N, Queue); + Queue.memset(Ptr, N, 0).wait(); + + int PatternA = 42; + auto CGFA = [&](handler &CGH) { + CGH.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] += PatternA; }); + }; + + int PatternB = 0xA; + auto CGFB = [&](handler &CGH) { + CGH.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] += PatternB; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Queue, {CGFA, CGFB}); + auto Node1 = Graph.add(DynamicCG); + + auto Node2 = Graph.add( + [&](handler &cgh) { + cgh.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] *= 2; }); + }, + exp_ext::property::node::depends_on(Node1)); + + auto Node3 = Graph.add(DynamicCG, exp_ext::property::node::depends_on(Node2)); + sycl::range<1> Node3Range(512); + Node3.update_range(Node3Range); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), N).wait(); + for (size_t i = 0; i < N; i++) { + int Ref = PatternA * 2; + if (i < Node3Range.get(0)) { + Ref += PatternA; + } + assert(HostData[i] == Ref); + } + + DynamicCG.set_active_cgf(1); + ExecGraph.update(Node1); + ExecGraph.update(Node2); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), N).wait(); + for (size_t i = 0; i < N; i++) { + int Ref = (PatternB * 2) + PatternB; + assert(HostData[i] == PatternB); + } + + sycl::free(Ptr, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_update_before_finalize.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_update_before_finalize.cpp new file mode 100644 index 0000000000000..1e8edf5ef3022 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_update_before_finalize.cpp @@ -0,0 +1,48 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// + +// XFAIL: level_zero + +// Tests using updating a dynamic command-group node after it has been added to +// a graph but before the graph has been finalized + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + const size_t N = 1024; + int *Ptr = malloc_device(N, Queue); + std::vector HostData(N); + + int PatternA = 42; + auto CGFA = [&](handler &CGH) { + CGH.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] = PatternA; }); + }; + + int PatternB = 0xA; + auto CGFB = [&](handler &CGH) { + CGH.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] = PatternB; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Queue, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + auto ExecGraph = Graph.finalize(); + + DynamicCG.set_active_cgf(1); + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), N).wait(); + for (size_t i = 0; i < N; i++) { + assert(HostData[i] == PatternB); + } + + sycl::free(Ptr, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_usm.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_usm.cpp new file mode 100644 index 0000000000000..45986891fabf4 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_usm.cpp @@ -0,0 +1,55 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// + +// XFAIL: level_zero + +// Tests updating usm kernel code using dynamic command-groups + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + const size_t N = 1024; + int *Ptr = malloc_device(N, Queue); + std::vector HostData(N); + + int PatternA = 42; + auto CGFA = [&](handler &CGH) { + CGH.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] = PatternA; }); + }; + + int PatternB = 0xA; + auto CGFB = [&](handler &CGH) { + CGH.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] = PatternB; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Queue, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), N).wait(); + for (size_t i = 0; i < N; i++) { + assert(HostData[i] == PatternA); + } + + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), N).wait(); + for (size_t i = 0; i < N; i++) { + assert(HostData[i] == PatternB); + } + + sycl::free(Ptr, Queue); + + return 0; +} diff --git a/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp b/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp index 1542b5f34d7dc..05053de9ef3c8 100644 --- a/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp @@ -700,3 +700,161 @@ TEST_F(CommandGraphTest, RecordingWrongGraphDep) { }), sycl::exception); } + +// Error when a dynamic command-group is used with a graph belonging to a +// different context. +TEST_F(CommandGraphTest, DynamicCommandGroupWrongContext) { + device Dev; + context Ctx{Dev}; + context Ctx2{Dev}; + queue Q1{Ctx, Dev}; + queue Q2{Ctx2, Dev}; + + experimental::command_graph Graph{Q1.get_context(), Q1.get_device()}; + auto CGF = [&](sycl::handler &CGH) { + CGH.single_task>([]() {}); + }; + { + experimental::dynamic_command_group DynCG(Q2, {CGF}); + ASSERT_THROW(Graph.add(DynCG), sycl::exception); + } + + { + experimental::dynamic_command_group DynCG(Ctx2, Dev, {CGF}); + ASSERT_THROW(Graph.add(DynCG), sycl::exception); + } +} + +// Error when a dynamic command-group is used with a graph belonging to a +// different device. +TEST_F(CommandGraphTest, DynamicCommandGroupWrongDevice) { + auto Devices = device::get_devices(); + + // Test needs at least 2 devices available. + if (Devices.size() < 2) { + GTEST_SKIP(); + } + + device &Dev1 = Devices[0]; + device &Dev2 = Devices[1]; + context Ctx{{Dev1, Dev2}}; + queue Q1{Ctx, Dev1}; + queue Q2{Ctx, Dev2}; + + experimental::command_graph Graph{Q1.get_context(), Q1.get_device()}; + auto CGF = [&](sycl::handler &CGH) { + CGH.single_task>([]() {}); + }; + { + ext::oneapi::experimental::dynamic_command_group DynCG(Q2, {CGF}); + ASSERT_THROW(Graph.add(DynCG), sycl::exception); + } + + { + experimental::dynamic_command_group DynCG(Ctx, Dev2, {CGF}); + ASSERT_THROW(Graph.add(DynCG), sycl::exception); + } +} + +// Error when a dynamic command-group contains no command-groups. +TEST_F(CommandGraphTest, DynamicCommandGroupEmpty) { + ASSERT_THROW(experimental::dynamic_command_group DynCG(Queue, {}), + sycl::exception); +} + +// Error when an out of bounds index is selected by the user +TEST_F(CommandGraphTest, DynamicCommandGroupInvalidIndex) { + auto CGF = [&](sycl::handler &CGH) { + CGH.single_task>([]() {}); + }; + experimental::dynamic_command_group DynCG(Queue, {CGF}); + ASSERT_THROW(DynCG.set_active_cgf(1), sycl::exception); +} + +// Error when a non-kernel command-group is included in a dynamic command-group +TEST_F(CommandGraphTest, DynamicCommandGroupNotKernel) { + int *Ptr = malloc_device(1, Queue); + auto CGF = [&](sycl::handler &CGH) { CGH.memset(Ptr, 1, 0); }; + + ASSERT_THROW(experimental::dynamic_command_group DynCG(Queue, {CGF}), + sycl::exception); + sycl::free(Ptr, Queue); +} + +// Error if edges are not the same for all command-groups in dynamic command +// group, test using graph limited events to create edges +TEST_F(CommandGraphTest, DynamicCommandGroupMismatchEventEdges) { + size_t N = 32; + int *PtrA = malloc_device(N, Queue); + int *PtrB = malloc_device(N, Queue); + + experimental::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + Graph.begin_recording(Queue); + + auto EventA = Queue.submit([&](handler &CGH) { + CGH.parallel_for(N, [=](item<1> Item) { PtrA[Item.get_id()] = 1; }); + }); + + auto EventB = Queue.submit([&](handler &CGH) { + CGH.parallel_for(N, [=](item<1> Item) { PtrB[Item.get_id()] = 4; }); + }); + + Graph.end_recording(); + + auto CGFA = [&](handler &CGH) { + CGH.depends_on(EventA); + CGH.parallel_for(N, [=](item<1> Item) { PtrA[Item.get_id()] += 2; }); + }; + + auto CGFB = [&](handler &CGH) { + CGH.depends_on(EventB); + CGH.parallel_for(N, [=](item<1> Item) { PtrB[Item.get_id()] += 0xA; }); + }; + + ASSERT_THROW(experimental::dynamic_command_group DynCG(Queue, {CGFA, CGFB}), + sycl::exception); +} + +// Error if edges are not the same for all command-groups in dynamic command +// group, test using accessors to create edges +TEST_F(CommandGraphTest, DynamicCommandGroupMismatchAccessorEdges) { + size_t N = 32; + std::vector HostData(N, 0); + buffer BufA{HostData}; + buffer BufB{HostData}; + BufA.set_write_back(false); + BufB.set_write_back(false); + + experimental::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {experimental::property::graph::assume_buffer_outlives_graph{}}}; + + Graph.begin_recording(Queue); + + Queue.submit([&](handler &CGH) { + auto AccA = BufA.get_access(CGH); + CGH.parallel_for(N, [=](item<1> Item) { AccA[Item.get_id()] = 1; }); + }); + + Queue.submit([&](handler &CGH) { + auto AccB = BufB.get_access(CGH); + CGH.parallel_for(N, [=](item<1> Item) { AccB[Item.get_id()] = 4; }); + }); + + Graph.end_recording(); + + auto CGFA = [&](handler &CGH) { + auto AccA = BufA.get_access(CGH); + CGH.parallel_for(N, [=](item<1> Item) { AccA[Item.get_id()] += 2; }); + }; + + auto CGFB = [&](handler &CGH) { + auto AccB = BufB.get_access(CGH); + CGH.parallel_for(N, [=](item<1> Item) { AccB[Item.get_id()] += 0xA; }); + }; + + ASSERT_THROW(experimental::dynamic_command_group DynCG(Queue, {CGFA, CGFB}), + sycl::exception); +}