diff --git a/sycl/include/sycl/ext/oneapi/experimental/graph.hpp b/sycl/include/sycl/ext/oneapi/experimental/graph.hpp index e15d5ed5a6b7a..0e3cc834ee6f5 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/graph.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/graph.hpp @@ -93,6 +93,7 @@ class node_impl; class graph_impl; class exec_graph_impl; class dynamic_parameter_impl; +class dynamic_command_group_impl; } // namespace detail enum class node_type { @@ -213,6 +214,27 @@ class depends_on_all_leaves : public ::sycl::detail::DataLessProperty< } // namespace node } // namespace property +class __SYCL_EXPORT dynamic_command_group { +public: + dynamic_command_group( + const context &SyclContext, const device &SyclDevice, + const std::vector> &CGFList); + + dynamic_command_group( + const queue &SyclQueue, + const std::vector> &CGFList); + + size_t get_active_cgf() const; + void set_active_cgf(size_t Index); + +private: + template + friend const decltype(Obj::impl) & + sycl::detail::getSyclObjImpl(const Obj &SyclObject); + + std::shared_ptr impl; +}; + namespace detail { // Templateless modifiable command-graph base class. class __SYCL_EXPORT modifiable_command_graph { @@ -269,6 +291,28 @@ class __SYCL_EXPORT modifiable_command_graph { return Node; } + /// Add a Dynamic command-group node to the graph. + /// @param DynamicCG Dynamic command-group function to create node with. + /// @param PropList Property list used to pass [0..n] predecessor nodes. + /// @return Constructed node which has been added to the graph. + node add(dynamic_command_group &DynamicCG, + const property_list &PropList = {}) { + if (PropList.has_property()) { + auto Deps = PropList.get_property(); + node Node = addImpl(DynamicCG, Deps.get_dependencies()); + if (PropList.has_property()) { + addGraphLeafDependencies(Node); + } + return Node; + } + + node Node = addImpl(DynamicCG, {}); + if (PropList.has_property()) { + addGraphLeafDependencies(Node); + } + return Node; + } + /// Add a dependency between two nodes. /// @param Src Node which will be a dependency of \p Dest. /// @param Dest Node which will be dependent on \p Src. @@ -328,6 +372,12 @@ class __SYCL_EXPORT modifiable_command_graph { modifiable_command_graph(const std::shared_ptr &Impl) : impl(Impl) {} + /// Template-less implementation of add() for dynamic command-group nodes. + /// @param DynCGF Dynamic Command-group function object to add. + /// @param Dep List of predecessor nodes. + /// @return Node added to the graph. + node addImpl(dynamic_command_group &DynCGF, const std::vector &Dep); + /// Template-less implementation of add() for CGF nodes. /// @param CGF Command-group function to add. /// @param Dep List of predecessor nodes. diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 4f443a2103eb4..f3b015287ad1b 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -3376,6 +3376,7 @@ class __SYCL_EXPORT handler { size_t Size, bool Block = false); friend class ext::oneapi::experimental::detail::graph_impl; friend class ext::oneapi::experimental::detail::dynamic_parameter_impl; + friend class ext::oneapi::experimental::detail::dynamic_command_group_impl; bool DisableRangeRounding(); diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index 4df70fc78c1b1..a884f4142b8ba 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -700,10 +700,18 @@ exec_graph_impl::enqueueNodeDirect(sycl::context Ctx, StreamID, InstanceID, CmdTraceEvent, xpti::trace_task_begin, nullptr); #endif + std::vector KernelAlternatives{}; + if (Node->MDynCG) { + for (auto &CG : Node->MDynCG->MKernels) { + KernelAlternatives.push_back( + static_cast(CG.get())); + } + } + ur_result_t Res = sycl::detail::enqueueImpCommandBufferKernel( Ctx, DeviceImpl, CommandBuffer, *static_cast((Node->MCommandGroup.get())), - Deps, &NewSyncPoint, &NewCommand, nullptr); + KernelAlternatives, Deps, &NewSyncPoint, &NewCommand, nullptr); MCommandMap[Node] = NewCommand; @@ -736,10 +744,18 @@ ur_exp_command_buffer_sync_point_t exec_graph_impl::enqueueNode( findRealDeps(Deps, N.lock(), MPartitionNodes[Node]); } + std::vector KernelAlternatives{}; + if (Node->MDynCG) { + for (auto &CG : Node->MDynCG->MKernels) { + KernelAlternatives.push_back( + static_cast(CG.get())); + } + } + sycl::detail::EventImplPtr Event = sycl::detail::Scheduler::getInstance().addCG( Node->getCGCopy(), AllocaQueue, /*EventNeeded=*/true, CommandBuffer, - Deps); + Deps, KernelAlternatives); MCommandMap[Node] = Event->getCommandBufferCommand(); return Event->getSyncPoint(); @@ -1376,8 +1392,11 @@ void exec_graph_impl::updateImpl(std::shared_ptr Node) { auto DeviceImpl = sycl::detail::getSyclObjImpl(MGraphImpl->getDevice()); // Gather arg information from Node - auto &ExecCG = - *(static_cast(Node->MCommandGroup.get())); + sycl::detail::CG *NodeCG = (Node->MDynCG) + ? Node->MDynCG->getActiveKernel().get() + : Node->MCommandGroup.get(); + auto ExecCG = *(static_cast(NodeCG)); + // Copy args because we may modify them std::vector NodeArgs = ExecCG.getArguments(); // Copy NDR desc since we need to modify it @@ -1560,6 +1579,39 @@ modifiable_command_graph::modifiable_command_graph( : impl(std::make_shared( SyclQueue.get_context(), SyclQueue.get_device(), PropList)) {} +node modifiable_command_graph::addImpl(dynamic_command_group &DynCGF, + const std::vector &Deps) { + impl->throwIfGraphRecordingQueue("Explicit API \"Add()\" function"); + auto DynCGFImpl = sycl::detail::getSyclObjImpl(DynCGF); + + if (DynCGFImpl->MContext != impl->getContext()) { + throw sycl::exception( + make_error_code(sycl::errc::invalid), + "Context of dynamic command-group does not match graph."); + } + + if (DynCGFImpl->MDevice != impl->getDevice()) { + throw sycl::exception( + make_error_code(sycl::errc::invalid), + "Device of dynamic command-group does not match graph."); + } + + std::vector> DepImpls; + for (auto &D : Deps) { + DepImpls.push_back(sycl::detail::getSyclObjImpl(D)); + } + + const std::function &CGF = DynCGFImpl->getActiveCGF(); + + graph_impl::WriteLock Lock(impl->MMutex); + std::shared_ptr NodeImpl = impl->add(CGF, {}, DepImpls); + + // Track the dynamic command-group used inside the node object + NodeImpl->MDynCG = DynCGFImpl; + + return sycl::detail::createSyclObjFromImpl(NodeImpl); +} + node modifiable_command_graph::addImpl(const std::vector &Deps) { impl->throwIfGraphRecordingQueue("Explicit API \"Add()\" function"); std::vector> DepImpls; @@ -1760,6 +1812,31 @@ void dynamic_parameter_base::updateAccessor( impl->updateAccessor(Acc); } +dynamic_command_group_impl::dynamic_command_group_impl( + const context &Context, const device &Device, + const std::vector> &CGFList) + : MContext(Context), MDevice(Device), MActiveCGF(0), MCGFList(CGFList) { + + // Create a placeholder graph object so we can use it to construct a handler + // object to process the CGFs. + auto TmpGraph = std::make_shared(MContext, MDevice); + for (const auto &CGF : MCGFList) { + // Handler defined inside the loop so it doesn't appear to the runtime + // as a single command-group with multiple commands inside. + sycl::handler Handler{TmpGraph}; + CGF(Handler); + + if (Handler.getType() != sycl::detail::CGType::Kernel) { + throw sycl::exception( + make_error_code(errc::invalid), + "The only type of command-groups that can be used in " + "dynamic command-groups is kernels."); + } + + Handler.finalize(); + MKernels.push_back(std::move(Handler.impl->MGraphNodeCG)); + } +} } // namespace detail node_type node::get_type() const { return impl->MNodeType; } @@ -1798,6 +1875,37 @@ template <> __SYCL_EXPORT void node::update_range<2>(range<2> Range) { template <> __SYCL_EXPORT void node::update_range<3>(range<3> Range) { impl->updateRange(Range); } + +dynamic_command_group::dynamic_command_group( + const context &SyclContext, const device &SyclDevice, + const std::vector> &CGFList) + : impl(std::make_shared( + SyclContext, SyclDevice, CGFList)) { + if (CGFList.empty()) { + throw sycl::exception(sycl::make_error_code(errc::invalid), + "Dynamic command-group cannot be created with an " + "empty CGF list."); + } +} + +dynamic_command_group::dynamic_command_group( + const queue &SyclQueue, + const std::vector> &CGFList) + : impl(std::make_shared( + SyclQueue.get_context(), SyclQueue.get_device(), CGFList)) { + if (CGFList.empty()) { + throw sycl::exception(sycl::make_error_code(errc::invalid), + "Dynamic command-group cannot be created with an " + "empty CGF list."); + } +} + +size_t dynamic_command_group::get_active_cgf() const { + return impl->getActiveIndex(); +} +void dynamic_command_group::set_active_cgf(size_t Index) { + return impl->setActiveIndex(Index); +} } // namespace experimental } // namespace oneapi } // namespace ext diff --git a/sycl/source/detail/graph_impl.hpp b/sycl/source/detail/graph_impl.hpp index 4ee34830f39a2..3d962619127c9 100644 --- a/sycl/source/detail/graph_impl.hpp +++ b/sycl/source/detail/graph_impl.hpp @@ -99,6 +99,8 @@ class node_impl : public std::enable_shared_from_this { /// Stores the executable graph impl associated with this node if it is a /// subgraph node. std::shared_ptr MSubGraphImpl; + /// For Dynamic command-group nodes, stores the dynamic command-group object. + std::shared_ptr MDynCG; /// Used for tracking visited status during cycle checks. bool MVisited = false; @@ -160,7 +162,7 @@ class node_impl : public std::enable_shared_from_this { : enable_shared_from_this(Other), MSuccessors(Other.MSuccessors), MPredecessors(Other.MPredecessors), MCGType(Other.MCGType), MNodeType(Other.MNodeType), MCommandGroup(Other.getCGCopy()), - MSubGraphImpl(Other.MSubGraphImpl) {} + MSubGraphImpl(Other.MSubGraphImpl), MDynCG(Other.MDynCG) {} /// Copy-assignment operator. This will perform a deep-copy of the /// command group object associated with this node. @@ -172,6 +174,7 @@ class node_impl : public std::enable_shared_from_this { MNodeType = Other.MNodeType; MCommandGroup = Other.getCGCopy(); MSubGraphImpl = Other.MSubGraphImpl; + MDynCG = Other.MDynCG; } return *this; } @@ -1579,6 +1582,41 @@ class dynamic_parameter_impl { std::vector MValueStorage; }; +class dynamic_command_group_impl { +public: + dynamic_command_group_impl( + const sycl::context &Context, const sycl::device &Device, + const std::vector> &CGFList); + + size_t getActiveIndex() const { return MActiveCGF; } + void setActiveIndex(size_t Index) { + if (Index >= MCGFList.size()) { + throw sycl::exception(sycl::make_error_code(errc::invalid), + "Index is out of range."); + } + + MActiveCGF = Index; + } + + const std::function &getActiveCGF() const { + return MCGFList[MActiveCGF]; + } + + const std::unique_ptr &getActiveKernel() const { + return MKernels[MActiveCGF]; + } + + sycl::context MContext; + sycl::device MDevice; + std::atomic MActiveCGF; + + // List of CGFs. Initialized on creation of dynamic command-group object by + // copying by value the list of std::functions passed by the user. + const std::vector> MCGFList; + + /// List of kernel command-groups for dynamic command-group nodes + std::vector> MKernels; +}; } // namespace detail } // namespace experimental } // namespace oneapi diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 10d42792d87e8..cc7f203089634 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1923,10 +1923,12 @@ static std::string_view cgTypeToString(detail::CGType Type) { ExecCGCommand::ExecCGCommand( std::unique_ptr CommandGroup, QueueImplPtr Queue, bool EventNeeded, ur_exp_command_buffer_handle_t CommandBuffer, - const std::vector &Dependencies) + const std::vector &Dependencies, + const std::vector &AlternativeKernels) : Command(CommandType::RUN_CG, std::move(Queue), CommandBuffer, Dependencies), - MEventNeeded(EventNeeded), MCommandGroup(std::move(CommandGroup)) { + MEventNeeded(EventNeeded), MCommandGroup(std::move(CommandGroup)), + MAlternativeKernels(AlternativeKernels) { if (MCommandGroup->getType() == detail::CGType::CodeplayHostTask) { MEvent->setSubmittedQueue( static_cast(MCommandGroup.get())->MQueue); @@ -2479,17 +2481,26 @@ ur_result_t enqueueImpCommandBufferKernel( context Ctx, DeviceImplPtr DeviceImpl, ur_exp_command_buffer_handle_t CommandBuffer, const CGExecKernel &CommandGroup, + const std::vector &AlternativeKernels, std::vector &SyncPoints, ur_exp_command_buffer_sync_point_t *OutSyncPoint, ur_exp_command_buffer_command_handle_t *OutCommand, const std::function &getMemAllocationFunc) { auto ContextImpl = sycl::detail::getSyclObjImpl(Ctx); const sycl::detail::AdapterPtr &Adapter = ContextImpl->getAdapter(); + + // UR kernel and program for 'CommandGroup' ur_kernel_handle_t UrKernel = nullptr; ur_program_handle_t UrProgram = nullptr; + + // Impl objects created when 'CommandGroup' is from a kernel bundle std::shared_ptr SyclKernelImpl = nullptr; std::shared_ptr DeviceImageImpl = nullptr; + // List of ur objects to be released after UR call + std::vector UrKernelsToRelease; + std::vector UrProgramsToRelease; + auto Kernel = CommandGroup.MSyclKernel; auto KernelBundleImplPtr = CommandGroup.MKernelBundle; const KernelArgMask *EliminatedArgMask = nullptr; @@ -2518,6 +2529,39 @@ ur_result_t enqueueImpCommandBufferKernel( std::tie(UrKernel, std::ignore, EliminatedArgMask, UrProgram) = sycl::detail::ProgramManager::getInstance().getOrCreateKernel( ContextImpl, DeviceImpl, CommandGroup.MKernelName); + UrKernelsToRelease.push_back(UrKernel); + UrProgramsToRelease.push_back(UrProgram); + } + + // Build up the list of UR kernel handles that the UR command could be + // updated to use. + std::vector AltUrKernels; + for (const auto &AltCGKernel : AlternativeKernels) { + ur_kernel_handle_t AltUrKernel = nullptr; + if (auto KernelBundleImplPtr = AltCGKernel->MKernelBundle; + KernelBundleImplPtr && !KernelBundleImplPtr->isInterop()) { + auto KernelName = AltCGKernel->MKernelName; + kernel_id KernelID = + detail::ProgramManager::getInstance().getSYCLKernelID(KernelName); + kernel SyclKernel = + KernelBundleImplPtr->get_kernel(KernelID, KernelBundleImplPtr); + AltUrKernel = detail::getSyclObjImpl(SyclKernel)->getHandleRef(); + } else if (AltCGKernel->MSyclKernel != nullptr) { + AltUrKernel = Kernel->getHandleRef(); + } else { + ur_program_handle_t UrProgram = nullptr; + std::tie(AltUrKernel, std::ignore, std::ignore, UrProgram) = + sycl::detail::ProgramManager::getInstance().getOrCreateKernel( + ContextImpl, DeviceImpl, AltCGKernel->MKernelName); + UrKernelsToRelease.push_back(AltUrKernel); + UrProgramsToRelease.push_back(UrProgram); + } + + if (AltUrKernel != UrKernel) { + // Don't include command-group 'CommandGroup' in the list to pass to UR, + // as this will be used for the primary ur kernel parameter. + AltUrKernels.push_back(AltUrKernel); + } } auto SetFunc = [&Adapter, &UrKernel, &DeviceImageImpl, &Ctx, @@ -2561,13 +2605,16 @@ ur_result_t enqueueImpCommandBufferKernel( ur_result_t Res = Adapter->call_nocheck( CommandBuffer, UrKernel, NDRDesc.Dims, &NDRDesc.GlobalOffset[0], - &NDRDesc.GlobalSize[0], LocalSize, 0, nullptr, SyncPoints.size(), - SyncPoints.size() ? SyncPoints.data() : nullptr, OutSyncPoint, - OutCommand); + &NDRDesc.GlobalSize[0], LocalSize, AltUrKernels.size(), + AltUrKernels.size() ? AltUrKernels.data() : nullptr, + SyncPoints.size(), SyncPoints.size() ? SyncPoints.data() : nullptr, + OutSyncPoint, OutCommand); - if (!SyclKernelImpl && !Kernel) { - Adapter->call(UrKernel); - Adapter->call(UrProgram); + for (auto &Kernel : UrKernelsToRelease) { + Adapter->call(Kernel); + } + for (auto &Program : UrProgramsToRelease) { + Adapter->call(Program); } if (Res != UR_RESULT_SUCCESS) { @@ -2779,8 +2826,8 @@ ur_result_t ExecCGCommand::enqueueImpCommandBuffer() { auto result = enqueueImpCommandBufferKernel( MQueue->get_context(), MQueue->getDeviceImplPtr(), MCommandBuffer, - *ExecKernel, MSyncPointDeps, &OutSyncPoint, &OutCommand, - getMemAllocationFunc); + *ExecKernel, MAlternativeKernels, MSyncPointDeps, &OutSyncPoint, + &OutCommand, getMemAllocationFunc); MEvent->setSyncPoint(OutSyncPoint); MEvent->setCommandBufferCommand(OutCommand); return result; diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 1aecf5ed4eabb..dc61bf422b186 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -637,7 +637,8 @@ class ExecCGCommand : public Command { ExecCGCommand( std::unique_ptr CommandGroup, QueueImplPtr Queue, bool EventNeeded, ur_exp_command_buffer_handle_t CommandBuffer = nullptr, - const std::vector &Dependencies = {}); + const std::vector &Dependencies = {}, + const std::vector &AlternativeKernels = {}); std::vector> getAuxiliaryResources() const; @@ -674,6 +675,7 @@ class ExecCGCommand : public Command { AllocaCommandBase *getAllocaForReq(Requirement *Req); std::unique_ptr MCommandGroup; + std::vector MAlternativeKernels; friend class Command; }; @@ -733,6 +735,7 @@ ur_result_t enqueueImpCommandBufferKernel( context Ctx, DeviceImplPtr DeviceImpl, ur_exp_command_buffer_handle_t CommandBuffer, const CGExecKernel &CommandGroup, + const std::vector &AlternativeKernels, std::vector &SyncPoints, ur_exp_command_buffer_sync_point_t *OutSyncPoint, ur_exp_command_buffer_command_handle_t *OutCommand, diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 5f95995e279d7..ecb4c558ac69e 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -921,13 +921,15 @@ Command *Scheduler::GraphBuilder::addCG( std::unique_ptr CommandGroup, const QueueImplPtr &Queue, std::vector &ToEnqueue, bool EventNeeded, ur_exp_command_buffer_handle_t CommandBuffer, - const std::vector &Dependencies) { + const std::vector &Dependencies, + const std::vector &AlternativeKernels) { + std::vector &Reqs = CommandGroup->getRequirements(); std::vector &Events = CommandGroup->getEvents(); - auto NewCmd = std::make_unique(std::move(CommandGroup), Queue, - EventNeeded, CommandBuffer, - std::move(Dependencies)); + auto NewCmd = std::make_unique( + std::move(CommandGroup), Queue, EventNeeded, CommandBuffer, + std::move(Dependencies), std::move(AlternativeKernels)); if (!NewCmd) throw exception(make_error_code(errc::memory_allocation), diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index ac1d8ca44c5dc..92358a2d5f1df 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -97,7 +97,9 @@ void Scheduler::waitForRecordToFinish(MemObjRecord *Record, EventImplPtr Scheduler::addCG( std::unique_ptr CommandGroup, const QueueImplPtr &Queue, bool EventNeeded, ur_exp_command_buffer_handle_t CommandBuffer, - const std::vector &Dependencies) { + const std::vector &Dependencies, + const std::vector &AlternativeKernels) { + EventImplPtr NewEvent = nullptr; const CGType Type = CommandGroup->getType(); std::vector AuxiliaryCmds; @@ -122,7 +124,8 @@ EventImplPtr Scheduler::addCG( default: NewCmd = MGraphBuilder.addCG(std::move(CommandGroup), std::move(Queue), AuxiliaryCmds, EventNeeded, CommandBuffer, - std::move(Dependencies)); + std::move(Dependencies), + std::move(AlternativeKernels)); } NewEvent = NewCmd->getEvent(); NewEvent->setSubmissionTime(); diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index c6d2d07600d12..0b77605ec5cee 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -372,13 +372,18 @@ class Scheduler { /// \param EventNeeded Specifies whether an event is explicitly required. /// \param CommandBuffer Optional command buffer to enqueue to instead of /// directly to the queue. - /// \param Dependencies Optional list of dependency - /// sync points when enqueuing to a command buffer. + /// \param Dependencies Optional list of dependency to other command-buffer + /// sync points when enqueuing to a command buffer. Only valid to pass when + /// \p CommandBuffer is not null. + /// \param AlternativeKernels Optional list of kernels that the command can + /// be dynamically updated to. Only valid to pass when \p CommandBuffer is + /// not null. /// \return an event object to wait on for command group completion. EventImplPtr addCG( std::unique_ptr CommandGroup, const QueueImplPtr &Queue, bool EventNeeded, ur_exp_command_buffer_handle_t CommandBuffer = nullptr, - const std::vector &Dependencies = {}); + const std::vector &Dependencies = {}, + const std::vector &AlternativeKernels = {}); /// Registers a command group, that copies most recent memory to the memory /// pointed by the requirement. @@ -548,18 +553,22 @@ class Scheduler { /// \sa queue::submit, Scheduler::addCG /// \param CommandBuffer Optional command buffer to enqueue to instead of /// directly to the queue. - /// \param Dependencies Optional list of dependency - /// sync points when enqueuing to a command buffer. + /// \param Dependencies Optional list of dependency sync points when + /// enqueuing to a command buffer. + /// \param AlternativeKernels Optional list of kernels that the command can + /// dynamically be updated to. Only valid to pass when CommandBuffer is not + /// null. /// /// \return a command that represents command group execution and a bool /// indicating whether this command should be enqueued to the graph /// processor right away or not. - Command *addCG(std::unique_ptr CommandGroup, - const QueueImplPtr &Queue, std::vector &ToEnqueue, - bool EventNeeded, - ur_exp_command_buffer_handle_t CommandBuffer = nullptr, - const std::vector - &Dependencies = {}); + Command * + addCG(std::unique_ptr CommandGroup, const QueueImplPtr &Queue, + std::vector &ToEnqueue, bool EventNeeded, + ur_exp_command_buffer_handle_t CommandBuffer = nullptr, + const std::vector &Dependencies = + {}, + const std::vector &AlternativeKernels = {}); /// Registers a \ref CG "command group" that updates host memory to the /// latest state. diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_accessor.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_accessor.cpp new file mode 100644 index 0000000000000..74497f554b6d4 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_accessor.cpp @@ -0,0 +1,60 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// + +// XFAIL: level_zero + +// Tests using dynamic command-group objects with buffer accessors + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + const size_t N = 1024; + std::vector HostData(N, 0); + buffer Buf{HostData}; + Buf.set_write_back(false); + auto Acc = Buf.get_access(); + + exp_ext::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {exp_ext::property::graph::assume_buffer_outlives_graph{}}}; + + int PatternA = 42; + auto CGFA = [&](handler &CGH) { + CGH.require(Acc); + CGH.parallel_for(N, [=](item<1> Item) { Acc[Item.get_id()] = PatternA; }); + }; + + int PatternB = 0xA; + auto CGFB = [&](handler &CGH) { + CGH.require(Acc); + CGH.parallel_for(N, [=](item<1> Item) { Acc[Item.get_id()] = PatternB; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Queue, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Acc, HostData.data()).wait(); + for (size_t i = 0; i < N; i++) { + assert(HostData[i] == PatternA); + } + + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(Acc, HostData.data()).wait(); + for (size_t i = 0; i < N; i++) { + assert(HostData[i] == PatternB); + } + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_deps.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_deps.cpp new file mode 100644 index 0000000000000..a7696709c57d8 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_deps.cpp @@ -0,0 +1,74 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// + +// XFAIL: level_zero + +// Tests adding a dynamic command-group node to a graph using buffer +// accessors for the node edges. + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + const size_t N = 1024; + int *Ptr = (int *)sycl::malloc_device(N, Queue); + std::vector HostData(N, 0); + buffer Buf{HostData}; + Buf.set_write_back(false); + + exp_ext::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {exp_ext::property::graph::assume_buffer_outlives_graph{}}}; + + auto RootNode = Graph.add([&](handler &CGH) { + auto Acc = Buf.get_access(CGH); + CGH.parallel_for(N, [=](item<1> Item) { Acc[Item.get_id()] = 1; }); + }); + + int PatternA = 42; + auto CGFA = [&](handler &CGH) { + auto Acc = Buf.get_access(CGH); + CGH.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] += PatternA; }); + }; + + int PatternB = 0xA; + auto CGFB = [&](handler &CGH) { + auto Acc = Buf.get_access(CGH); + CGH.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] += PatternB; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Queue, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + + auto LeafNode = Graph.add([&](handler &CGH) { + auto Acc = Buf.get_access(CGH); + CGH.parallel_for( + N, [=](item<1> Item) { Ptr[Item.get_id()] = Acc[Item.get_id()]; }); + }); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), N).wait(); + for (size_t i = 0; i < N; i++) { + assert(HostData[i] == (PatternA + 1)); + } + + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), N).wait(); + for (size_t i = 0; i < N; i++) { + assert(HostData[i] == (PatternB + 1)); + } + + sycl::free(Ptr, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_spv.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_spv.cpp new file mode 100644 index 0000000000000..35346fd8cf37f --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_spv.cpp @@ -0,0 +1,79 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out %S/../Inputs/Kernels/update_with_indices_accessor.spv +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out %S/../Inputs/Kernels/update_with_indices_accessor.spv 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out %S/../Inputs/Kernels/update_with_indices_accessor.spv 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// REQUIRES: level_zero + +// Tests updating an accessor argument to a graph node created from SPIR-V +// using dynamic command-groups. + +#include "../graph_common.hpp" + +int main(int, char **argv) { + queue Queue{}; + sycl::kernel_bundle KernelBundle = loadKernelsFromFile(Queue, argv[1]); + const auto getKernel = + [](sycl::kernel_bundle &bundle, + const std::string &name) { + return bundle.ext_oneapi_get_kernel(name); + }; + + kernel kernel = getKernel( + KernelBundle, "_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_EUlvE_"); + + const size_t N = 1024; + + exp_ext::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {exp_ext::property::graph::assume_buffer_outlives_graph{}}}; + std::vector HostDataA(N, 0); + std::vector HostDataB(N, 0); + + buffer BufA{HostDataA}; + buffer BufB{HostDataB}; + BufA.set_write_back(false); + BufB.set_write_back(false); + + auto AccA = BufA.get_access(); + auto AccB = BufB.get_access(); + + auto CGFA = [&](handler &CGH) { + CGH.require(AccA); + CGH.set_arg(0, AccA); + CGH.single_task(kernel); + }; + + auto CGFB = [&](handler &CGH) { + CGH.require(AccB); + CGH.set_arg(0, AccB); + CGH.single_task(kernel); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Queue, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(BufA.get_access(), HostDataA.data()).wait(); + Queue.copy(BufB.get_access(), HostDataB.data()).wait(); + for (size_t i = 0; i < N; i++) { + assert(HostDataA[i] == i); + assert(HostDataB[i] == 0); + } + + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(BufA.get_access(), HostDataA.data()).wait(); + Queue.copy(BufB.get_access(), HostDataB.data()).wait(); + for (size_t i = 0; i < N; i++) { + assert(HostDataA[i] == i); + assert(HostDataB[i] == i); + } + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_event_deps.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_event_deps.cpp new file mode 100644 index 0000000000000..f0f7dda3e3416 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_event_deps.cpp @@ -0,0 +1,73 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// + +// XFAIL: level_zero + +// Tests adding a dynamic command-group node to a graph using graph limited +// events for dependencies. + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + const size_t N = 1024; + int *PtrA = malloc_device(N, Queue); + int *PtrB = malloc_device(N, Queue); + int *PtrC = malloc_device(N, Queue); + std::vector HostData(N); + + Graph.begin_recording(Queue); + int PatternA = 42; + auto EventA = Queue.fill(PtrA, PatternA, N); + int PatternB = 0xA; + auto EventB = Queue.fill(PtrA, PatternB, N); + Graph.end_recording(Queue); + + auto CGFA = [&](handler &CGH) { + CGH.depends_on({EventA, EventB}); + CGH.parallel_for(N, [=](item<1> Item) { + auto I = Item.get_id(); + PtrC[I] = PtrA[I] * PtrB[I]; + }); + }; + + auto CGFB = [&](handler &CGH) { + CGH.depends_on({EventA, EventB}); + CGH.parallel_for(N, [=](item<1> Item) { + auto I = Item.get_id(); + PtrC[I] = PtrA[I] + PtrB[I]; + }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Queue, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(PtrC, HostData.data(), N).wait(); + for (size_t i = 0; i < N; i++) { + assert(HostData[i] == PatternA * PatternB); + } + + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(PtrC, HostData.data(), N).wait(); + for (size_t i = 0; i < N; i++) { + assert(HostData[i] == PatternA + PatternB); + } + + sycl::free(PtrA, Queue); + sycl::free(PtrB, Queue); + sycl::free(PtrC, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_ndrange.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_ndrange.cpp new file mode 100644 index 0000000000000..a72a20558d24f --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_ndrange.cpp @@ -0,0 +1,72 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// + +// XFAIL: level_zero + +// Tests updating a dynamic command-group node where the dynamic command-groups +// have different nd-ranges + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + const size_t N = 1024; + int *Ptr = malloc_device(N, Queue); + std::vector HostData(N); + + auto RootNode = Graph.add([&](handler &cgh) { cgh.memset(Ptr, N, 0); }); + + int PatternA = 42; + sycl::range<1> RangeA{512}; + auto CGFA = [&](handler &CGH) { + CGH.parallel_for(RangeA, + [=](item<1> Item) { Ptr[Item.get_id()] = PatternA; }); + }; + + int PatternB = 0xA; + sycl::range<1> RangeB{256}; + auto CGFB = [&](handler &CGH) { + CGH.parallel_for(RangeB, + [=](item<1> Item) { Ptr[Item.get_id()] = PatternB; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Queue, {CGFA, CGFB}); + auto DynamicCGNode = + Graph.add(DynamicCG, exp_ext::property::node::depends_on(RootNode)); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), N).wait(); + for (size_t i = 0; i < N; i++) { + if (i < RangeA.get(0)) { + assert(HostData[i] == PatternA); + } else { + assert(HostData[i] == 0); + } + } + + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), N).wait(); + for (size_t i = 0; i < N; i++) { + if (i < RangeB.get(0)) { + assert(HostData[i] == PatternB); + } else if (i < RangeA.get(0)) { + assert(HostData[i] == PatternA); + } else { + assert(HostData[i] == 0); + } + } + + sycl::free(Ptr, Queue); + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_overwrite.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_overwrite.cpp new file mode 100644 index 0000000000000..479e7be5ca7e0 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_overwrite.cpp @@ -0,0 +1,59 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// + +// XFAIL: level_zero + +// Tests how the nd-range of a node is preserved when it is updated alongside +// the active command-group + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + const size_t N = 1024; + std::vector HostData(N); + int *Ptr = malloc_device(N, Queue); + Queue.memset(Ptr, N, 0).wait(); + + int PatternA = 42; + auto CGFA = [&](handler &CGH) { + CGH.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] = PatternA; }); + }; + + int PatternB = 0xA; + auto CGFB = [&](handler &CGH) { + CGH.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] = PatternB; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Queue, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + + sycl::range<1> UpdateRange(512); + DynamicCGNode.update_range(UpdateRange); + + DynamicCG.set_active_cgf(1); + + // Check if UpdateRange is still used + DynamicCG.set_active_cgf(0); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), N).wait(); + for (size_t i = 0; i < N; i++) { + if (i < UpdateRange.get(0)) { + assert(HostData[i] == PatternA); + } else { + assert(HostData[i] == 0); + } + } + + sycl::free(Ptr, Queue); + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_parameters.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_parameters.cpp new file mode 100644 index 0000000000000..7709b78cceab2 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_parameters.cpp @@ -0,0 +1,70 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// + +// XFAIL: level_zero + +// Tests updating kernel code using dynamic command-groups that have different +// parameters in each command-group. + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + const size_t N = 1024; + int *PtrA = malloc_device(N, Queue); + int *PtrB = malloc_device(N, Queue); + + std::vector HostDataA(N); + std::vector HostDataB(N); + + Queue.memset(PtrA, N, 0); + Queue.memset(PtrB, N, 0); + Queue.wait(); + + int PatternA = 0xA; + auto CGFA = [&](handler &CGH) { + CGH.parallel_for(N, [=](item<1> Item) { PtrA[Item.get_id()] = PatternA; }); + }; + + int PatternB = 42; + auto CGFB = [&](handler &CGH) { + CGH.parallel_for(N, [=](item<1> Item) { PtrB[Item.get_id()] = PatternB; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Queue, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(PtrA, HostDataA.data(), N); + Queue.copy(PtrB, HostDataB.data(), N); + Queue.wait(); + for (size_t i = 0; i < N; i++) { + assert(HostDataA[i] == PatternA); + assert(HostDataB[i] == 0); + } + + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(PtrA, HostDataA.data(), N); + Queue.copy(PtrB, HostDataB.data(), N); + Queue.wait(); + for (size_t i = 0; i < N; i++) { + assert(HostDataA[i] == PatternA); + assert(HostDataB[i] == PatternB); + } + + sycl::free(PtrA, Queue); + sycl::free(PtrB, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_shared_nodes.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_shared_nodes.cpp new file mode 100644 index 0000000000000..065e9266ff107 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_shared_nodes.cpp @@ -0,0 +1,72 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// + +// XFAIL: level_zero + +// Tests using the same dynamic command-group in more than one graph node. + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + const size_t N = 1024; + std::vector HostData(N); + int *Ptr = malloc_device(N, Queue); + Queue.memset(Ptr, N, 0).wait(); + + int PatternA = 42; + auto CGFA = [&](handler &CGH) { + CGH.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] += PatternA; }); + }; + + int PatternB = 0xA; + auto CGFB = [&](handler &CGH) { + CGH.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] += PatternB; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Queue, {CGFA, CGFB}); + auto Node1 = Graph.add(DynamicCG); + + auto Node2 = Graph.add( + [&](handler &cgh) { + cgh.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] *= 2; }); + }, + exp_ext::property::node::depends_on(Node1)); + + auto Node3 = Graph.add(DynamicCG, exp_ext::property::node::depends_on(Node2)); + sycl::range<1> Node3Range(512); + Node3.update_range(Node3Range); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), N).wait(); + for (size_t i = 0; i < N; i++) { + int Ref = PatternA * 2; + if (i < Node3Range.get(0)) { + Ref += PatternA; + } + assert(HostData[i] == Ref); + } + + DynamicCG.set_active_cgf(1); + ExecGraph.update(Node1); + ExecGraph.update(Node2); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), N).wait(); + for (size_t i = 0; i < N; i++) { + int Ref = (PatternB * 2) + PatternB; + assert(HostData[i] == PatternB); + } + + sycl::free(Ptr, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_update_before_finalize.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_update_before_finalize.cpp new file mode 100644 index 0000000000000..1e8edf5ef3022 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_update_before_finalize.cpp @@ -0,0 +1,48 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// + +// XFAIL: level_zero + +// Tests using updating a dynamic command-group node after it has been added to +// a graph but before the graph has been finalized + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + const size_t N = 1024; + int *Ptr = malloc_device(N, Queue); + std::vector HostData(N); + + int PatternA = 42; + auto CGFA = [&](handler &CGH) { + CGH.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] = PatternA; }); + }; + + int PatternB = 0xA; + auto CGFB = [&](handler &CGH) { + CGH.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] = PatternB; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Queue, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + auto ExecGraph = Graph.finalize(); + + DynamicCG.set_active_cgf(1); + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), N).wait(); + for (size_t i = 0; i < N; i++) { + assert(HostData[i] == PatternB); + } + + sycl::free(Ptr, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_usm.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_usm.cpp new file mode 100644 index 0000000000000..45986891fabf4 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_usm.cpp @@ -0,0 +1,55 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// + +// XFAIL: level_zero + +// Tests updating usm kernel code using dynamic command-groups + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + const size_t N = 1024; + int *Ptr = malloc_device(N, Queue); + std::vector HostData(N); + + int PatternA = 42; + auto CGFA = [&](handler &CGH) { + CGH.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] = PatternA; }); + }; + + int PatternB = 0xA; + auto CGFB = [&](handler &CGH) { + CGH.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] = PatternB; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Queue, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), N).wait(); + for (size_t i = 0; i < N; i++) { + assert(HostData[i] == PatternA); + } + + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), N).wait(); + for (size_t i = 0; i < N; i++) { + assert(HostData[i] == PatternB); + } + + sycl::free(Ptr, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_with_dyn_param.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_with_dyn_param.cpp new file mode 100644 index 0000000000000..16af60e5e79d7 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_with_dyn_param.cpp @@ -0,0 +1,104 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// + +// Tests using a dynamic command-group object with dynamic parameters inside it + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + const size_t N = 1024; + int *PtrA = malloc_device(N, Queue); + int *PtrB = malloc_device(N, Queue); + int *PtrC = malloc_device(N, Queue); + + std::vector HostDataA(N); + std::vector HostDataB(N); + std::vector HostDataC(N); + + exp_ext::dynamic_parameter DynParam1(Graph, PtrA); + exp_ext::dynamic_parameter DynParam2(Graph, PtrC); + + auto CGFA = [&](handler &CGH) { + CGH.set_arg(0, DynParam1); + // TODO: Use the free function kernel extension instead of regular kernels + // when available. + CGH.single_task([=]() { + for (size_t i = 0; i < N; i++) { + PtrA[i] = i; + } + }); + }; + + auto CGFB = [&](handler &CGH) { + CGH.set_arg(0, DynParam1); + // TODO: Use the free function kernel extension instead of regular kernels + // when available. + CGH.single_task([=]() { + for (size_t i = 0; i < N; i++) { + PtrA[i] = i; + } + }); + }; + + auto CGFC = [&](handler &CGH) { + CGH.set_arg(0, DynParam2); + // TODO: Use the free function kernel extension instead of regular kernels + // when available. + CGH.single_task([=]() { + for (size_t i = 0; i < N; i++) { + PtrC[i] = i; + } + }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Queue, {CGFA, CGFB, CGFC}); + auto DynamicCGNode = Graph.add(DynamicCG); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + auto ExecuteGraphAndVerifyResults = [&](bool A, bool B, bool C) { + Queue.memset(PtrA, 0, N * sizeof(int)); + Queue.memset(PtrB, 0, N * sizeof(int)); + Queue.memset(PtrC, 0, N * sizeof(int)); + Queue.wait(); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(PtrA, HostDataA.data(), N); + Queue.copy(PtrB, HostDataB.data(), N); + Queue.copy(PtrC, HostDataC.data(), N); + Queue.wait(); + + for (size_t i = 0; i < N; i++) { + assert(HostDataA[i] == A ? i : 0); + assert(HostDataB[i] == B ? i : 0); + assert(HostDataC[i] == C ? i : 0); + } + }; + ExecuteGraphAndVerifyResults(true, false, false); + + DynParam1.update(PtrB); + ExecGraph.update(DynamicCGNode); + ExecuteGraphAndVerifyResults(false, true, false); + + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + ExecuteGraphAndVerifyResults(false, true, false); + + DynamicCG.set_active_cgf(2); + ExecGraph.update(DynamicCGNode); + ExecuteGraphAndVerifyResults(false, false, true); + + sycl::free(PtrA, Queue); + sycl::free(PtrB, Queue); + sycl::free(PtrC, Queue); + + return 0; +} diff --git a/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp b/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp index 1542b5f34d7dc..1422419ab96de 100644 --- a/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp @@ -700,3 +700,161 @@ TEST_F(CommandGraphTest, RecordingWrongGraphDep) { }), sycl::exception); } + +// Error when a dynamic command-group is used with a graph belonging to a +// different context. +TEST_F(CommandGraphTest, DynamicCommandGroupWrongContext) { + device Dev; + context Ctx{Dev}; + context Ctx2{Dev}; + queue Q1{Ctx, Dev}; + queue Q2{Ctx2, Dev}; + + experimental::command_graph Graph{Q1.get_context(), Q1.get_device()}; + auto CGF = [&](sycl::handler &CGH) { + CGH.single_task>([]() {}); + }; + { + experimental::dynamic_command_group DynCG(Q2, {CGF}); + ASSERT_THROW(Graph.add(DynCG), sycl::exception); + } + + { + experimental::dynamic_command_group DynCG(Ctx2, Dev, {CGF}); + ASSERT_THROW(Graph.add(DynCG), sycl::exception); + } +} + +// Error when a dynamic command-group is used with a graph belonging to a +// different device. +TEST_F(CommandGraphTest, DynamicCommandGroupWrongDevice) { + auto Devices = device::get_devices(); + + // Test needs at least 2 devices available. + if (Devices.size() < 2) { + GTEST_SKIP(); + } + + device &Dev1 = Devices[0]; + device &Dev2 = Devices[1]; + context Ctx{{Dev1, Dev2}}; + queue Q1{Ctx, Dev1}; + queue Q2{Ctx, Dev2}; + + experimental::command_graph Graph{Q1.get_context(), Q1.get_device()}; + auto CGF = [&](sycl::handler &CGH) { + CGH.single_task>([]() {}); + }; + { + ext::oneapi::experimental::dynamic_command_group DynCG(Q2, {CGF}); + ASSERT_THROW(Graph.add(DynCG), sycl::exception); + } + + { + experimental::dynamic_command_group DynCG(Ctx, Dev2, {CGF}); + ASSERT_THROW(Graph.add(DynCG), sycl::exception); + } +} + +// Error when a dynamic command-group contains no command-groups. +TEST_F(CommandGraphTest, DynamicCommandGroupEmpty) { + ASSERT_THROW(experimental::dynamic_command_group DynCG(Queue, {}), + sycl::exception); +} + +// Error when an out of bounds index is selected by the user +TEST_F(CommandGraphTest, DynamicCommandGroupInvalidIndex) { + auto CGF = [&](sycl::handler &CGH) { + CGH.single_task>([]() {}); + }; + experimental::dynamic_command_group DynCG(Queue, {CGF}); + ASSERT_THROW(DynCG.set_active_cgf(1), sycl::exception); +} + +// Error when a non-kernel command-group is included in a dynamic command-group +TEST_F(CommandGraphTest, DynamicCommandGroupNotKernel) { + int *Ptr = malloc_device(1, Queue); + auto CGF = [&](sycl::handler &CGH) { CGH.memset(Ptr, 1, 0); }; + + ASSERT_THROW(experimental::dynamic_command_group DynCG(Queue, {CGF}), + sycl::exception); + sycl::free(Ptr, Queue); +} + +// Error if edges are not the same for all command-groups in dynamic command +// group, test using graph limited events to create edges +TEST_F(CommandGraphTest, DynamicCommandGroupMismatchEventEdges) { + size_t N = 32; + int *PtrA = malloc_device(N, Queue); + int *PtrB = malloc_device(N, Queue); + + experimental::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + Graph.begin_recording(Queue); + + auto EventA = Queue.submit([&](handler &CGH) { + CGH.parallel_for(N, [=](item<1> Item) { PtrA[Item.get_id()] = 1; }); + }); + + auto EventB = Queue.submit([&](handler &CGH) { + CGH.parallel_for(N, [=](item<1> Item) { PtrB[Item.get_id()] = 4; }); + }); + + Graph.end_recording(); + + auto CGFA = [&](handler &CGH) { + CGH.depends_on(EventA); + CGH.parallel_for(N, [=](item<1> Item) { PtrA[Item.get_id()] += 2; }); + }; + + auto CGFB = [&](handler &CGH) { + CGH.depends_on(EventB); + CGH.parallel_for(N, [=](item<1> Item) { PtrB[Item.get_id()] += 0xA; }); + }; + + experimental::dynamic_command_group DynCG(Queue, {CGFA, CGFB}); + ASSERT_THROW(Graph.add(DynCG), sycl::exception); +} + +// Error if edges are not the same for all command-groups in dynamic command +// group, test using accessors to create edges +TEST_F(CommandGraphTest, DynamicCommandGroupMismatchAccessorEdges) { + size_t N = 32; + std::vector HostData(N, 0); + buffer BufA{HostData}; + buffer BufB{HostData}; + BufA.set_write_back(false); + BufB.set_write_back(false); + + experimental::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {experimental::property::graph::assume_buffer_outlives_graph{}}}; + + Graph.begin_recording(Queue); + + Queue.submit([&](handler &CGH) { + auto AccA = BufA.get_access(CGH); + CGH.parallel_for(N, [=](item<1> Item) { AccA[Item.get_id()] = 1; }); + }); + + Queue.submit([&](handler &CGH) { + auto AccB = BufB.get_access(CGH); + CGH.parallel_for(N, [=](item<1> Item) { AccB[Item.get_id()] = 4; }); + }); + + Graph.end_recording(); + + auto CGFA = [&](handler &CGH) { + auto AccA = BufA.get_access(CGH); + CGH.parallel_for(N, [=](item<1> Item) { AccA[Item.get_id()] += 2; }); + }; + + auto CGFB = [&](handler &CGH) { + auto AccB = BufB.get_access(CGH); + CGH.parallel_for(N, [=](item<1> Item) { AccB[Item.get_id()] += 0xA; }); + }; + + experimental::dynamic_command_group DynCG(Queue, {CGFA, CGFB}); + ASSERT_THROW(Graph.add(DynCG), sycl::exception); +}