Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL][Graph] Implement dynamic command-groups #15700

Merged
merged 4 commits into from
Nov 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions sycl/doc/design/CommandGraph.md
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,29 @@ requirements for these new accessors to correctly trigger allocations before
updating. This is similar to how individual graph commands are enqueued when
accessors are used in a graph node.

### Dynamic Command-Group

To implement the `dynamic_command_group` class for updating the command-groups (CG)
associated with nodes, the CG member of the node implementation class changes
from a `std::unique_ptr` to a `std::shared_ptr` so that multiple nodes and the
`dynamic_command_group_impl` object can share the same CG object. This avoids
the overhead of having to allocate and free copies of the CG when a new active
CG is selected.

The `dynamic_command_group_impl` class contains a list of weak pointers to the
nodes which have been created with it, so that when a new active CG is selected
it can propagate the change to those nodes. The `dynamic_parameter_impl` class
also contains a list of weak pointers, but to the `dynamic_command_group_impl`
instances of any dynamic command-groups where they are used. This allows
updating the dynamic parameter to propagate to dynamic command-group nodes.

The `sycl::detail::CGExecKernel` class has been added to, so that if the
object was created from an element in the dynamic command-group list, the class
stores a vector of weak pointers to the other alternative command-groups created
from the same dynamic command-group object. This allows the SYCL runtime to
access the list of alternative kernels when calling the UR API to append a
kernel command to a command-buffer.

## Optimizations
### Interactions with Profiling

Expand Down
24 changes: 24 additions & 0 deletions sycl/include/sycl/ext/oneapi/experimental/graph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ class node_impl;
class graph_impl;
class exec_graph_impl;
class dynamic_parameter_impl;
class dynamic_command_group_impl;
} // namespace detail

enum class node_type {
Expand Down Expand Up @@ -216,6 +217,23 @@ class depends_on_all_leaves : public ::sycl::detail::DataLessProperty<
} // namespace node
} // namespace property

class __SYCL_EXPORT dynamic_command_group {
public:
dynamic_command_group(
const command_graph<graph_state::modifiable> &Graph,
const std::vector<std::function<void(handler &)>> &CGFList);

size_t get_active_cgf() const;
void set_active_cgf(size_t Index);

private:
template <class Obj>
friend const decltype(Obj::impl) &
sycl::detail::getSyclObjImpl(const Obj &SyclObject);

std::shared_ptr<detail::dynamic_command_group_impl> impl;
};

namespace detail {
// Templateless modifiable command-graph base class.
class __SYCL_EXPORT modifiable_command_graph {
Expand Down Expand Up @@ -337,6 +355,12 @@ class __SYCL_EXPORT modifiable_command_graph {
modifiable_command_graph(const std::shared_ptr<detail::graph_impl> &Impl)
: impl(Impl) {}

/// Template-less implementation of add() for dynamic command-group nodes.
/// @param DynCGF Dynamic Command-group function object to add.
/// @param Dep List of predecessor nodes.
/// @return Node added to the graph.
node addImpl(dynamic_command_group &DynCGF, const std::vector<node> &Dep);

/// Template-less implementation of add() for CGF nodes.
/// @param CGF Command-group function to add.
/// @param Dep List of predecessor nodes.
Expand Down
14 changes: 3 additions & 11 deletions sycl/include/sycl/handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1175,7 +1175,6 @@ class __SYCL_EXPORT handler {
StoreLambda<KName, decltype(Wrapper), Dims, TransformedArgType>(
std::move(Wrapper));
setType(detail::CGType::Kernel);
setNDRangeUsed(false);
#endif
} else
#endif // !__SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ &&
Expand All @@ -1198,7 +1197,6 @@ class __SYCL_EXPORT handler {
StoreLambda<NameT, KernelType, Dims, TransformedArgType>(
std::move(KernelFunc));
setType(detail::CGType::Kernel);
setNDRangeUsed(false);
#endif
#else
(void)KernelFunc;
Expand Down Expand Up @@ -1249,7 +1247,6 @@ class __SYCL_EXPORT handler {
StoreLambda<NameT, KernelType, Dims, TransformedArgType>(
std::move(KernelFunc));
setType(detail::CGType::Kernel);
setNDRangeUsed(true);
#endif
}

Expand All @@ -1272,7 +1269,6 @@ class __SYCL_EXPORT handler {
setNDRangeDescriptor(std::move(NumWorkItems));
processLaunchProperties<PropertiesT>(Props);
setType(detail::CGType::Kernel);
setNDRangeUsed(false);
extractArgsAndReqs();
MKernelName = getKernelName();
#endif
Expand All @@ -1298,7 +1294,6 @@ class __SYCL_EXPORT handler {
setNDRangeDescriptor(std::move(NDRange));
processLaunchProperties(Props);
setType(detail::CGType::Kernel);
setNDRangeUsed(true);
extractArgsAndReqs();
MKernelName = getKernelName();
#endif
Expand Down Expand Up @@ -1339,7 +1334,6 @@ class __SYCL_EXPORT handler {
setNDRangeDescriptor(NumWorkGroups, /*SetNumWorkGroups=*/true);
StoreLambda<NameT, KernelType, Dims, LambdaArgType>(std::move(KernelFunc));
setType(detail::CGType::Kernel);
setNDRangeUsed(false);
#endif // __SYCL_DEVICE_ONLY__
}

Expand Down Expand Up @@ -1971,7 +1965,6 @@ class __SYCL_EXPORT handler {
StoreLambda<NameT, KernelType, Dims, TransformedArgType>(
std::move(KernelFunc));
setType(detail::CGType::Kernel);
setNDRangeUsed(false);
#endif
}

Expand Down Expand Up @@ -2069,7 +2062,6 @@ class __SYCL_EXPORT handler {
detail::checkValueRange<Dims>(NumWorkItems, WorkItemOffset);
setNDRangeDescriptor(std::move(NumWorkItems), std::move(WorkItemOffset));
setType(detail::CGType::Kernel);
setNDRangeUsed(false);
extractArgsAndReqs();
MKernelName = getKernelName();
#endif
Expand Down Expand Up @@ -2148,7 +2140,6 @@ class __SYCL_EXPORT handler {
setNDRangeDescriptor(std::move(NumWorkItems));
MKernel = detail::getSyclObjImpl(std::move(Kernel));
setType(detail::CGType::Kernel);
setNDRangeUsed(false);
if (!lambdaAndKernelHaveEqualName<NameT>()) {
extractArgsAndReqs();
MKernelName = getKernelName();
Expand Down Expand Up @@ -2189,7 +2180,6 @@ class __SYCL_EXPORT handler {
setNDRangeDescriptor(std::move(NumWorkItems), std::move(WorkItemOffset));
MKernel = detail::getSyclObjImpl(std::move(Kernel));
setType(detail::CGType::Kernel);
setNDRangeUsed(false);
if (!lambdaAndKernelHaveEqualName<NameT>()) {
extractArgsAndReqs();
MKernelName = getKernelName();
Expand Down Expand Up @@ -2229,7 +2219,6 @@ class __SYCL_EXPORT handler {
setNDRangeDescriptor(std::move(NDRange));
MKernel = detail::getSyclObjImpl(std::move(Kernel));
setType(detail::CGType::Kernel);
setNDRangeUsed(true);
if (!lambdaAndKernelHaveEqualName<NameT>()) {
extractArgsAndReqs();
MKernelName = getKernelName();
Expand Down Expand Up @@ -3357,6 +3346,7 @@ class __SYCL_EXPORT handler {
size_t Size, bool Block = false);
friend class ext::oneapi::experimental::detail::graph_impl;
friend class ext::oneapi::experimental::detail::dynamic_parameter_impl;
friend class ext::oneapi::experimental::detail::dynamic_command_group_impl;

bool DisableRangeRounding();

Expand Down Expand Up @@ -3626,8 +3616,10 @@ class __SYCL_EXPORT handler {
}
#endif

#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
// Set that an ND Range was used during a call to parallel_for
void setNDRangeUsed(bool Value);
#endif

inline void internalProfilingTagImpl() {
throwIfActionIsCreated();
Expand Down
5 changes: 4 additions & 1 deletion sycl/source/detail/cg.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,9 @@ class CGExecKernel : public CG {
std::string MKernelName;
std::vector<std::shared_ptr<detail::stream_impl>> MStreams;
std::vector<std::shared_ptr<const void>> MAuxiliaryResources;
/// Used to implement ext_oneapi_graph dynamic_command_group. Stores the list
/// of command-groups that a kernel command can be updated to.
std::vector<std::weak_ptr<CGExecKernel>> MAlternativeKernels;
ur_kernel_cache_config_t MKernelCacheConfig;
bool MKernelIsCooperative = false;
bool MKernelUsesClusterLaunch = false;
Expand All @@ -277,7 +280,7 @@ class CGExecKernel : public CG {
MKernelBundle(std::move(KernelBundle)), MArgs(std::move(Args)),
MKernelName(std::move(KernelName)), MStreams(std::move(Streams)),
MAuxiliaryResources(std::move(AuxiliaryResources)),
MKernelCacheConfig(std::move(KernelCacheConfig)),
MAlternativeKernels{}, MKernelCacheConfig(std::move(KernelCacheConfig)),
MKernelIsCooperative(KernelIsCooperative),
MKernelUsesClusterLaunch(MKernelUsesClusterLaunch) {
assert(getType() == CGType::Kernel && "Wrong type of exec kernel CG.");
Expand Down
Loading
Loading