diff --git a/sycl/doc/design/CommandGraph.md b/sycl/doc/design/CommandGraph.md index d7587113a4615..23d32fd9ff7e2 100644 --- a/sycl/doc/design/CommandGraph.md +++ b/sycl/doc/design/CommandGraph.md @@ -291,18 +291,17 @@ from a `std::unique_ptr` to a `std::shared_ptr` so that multiple nodes and the the overhead of having to allocate and free copies of the CG when a new active CG is selected. -The `dynamic_command_group_impl` class contains weak pointers to the nodes which -have been created with it, so that when a new active CG is selected it can -propagate the change to those nodes. The `node_impl` class also contains a -reference to the dynamic command-group that created it, so that when the graph -is finalized each node can use the list of kernels in its dynamic command-group -as part of the `urCommandBufferAppendKernelLaunchExp` call to pass the possible -alternative kernels. +The `dynamic_command_group_impl` class contains a list of weak pointers to the +nodes which have been created with it, so that when a new active CG is selected +it can propagate the change to those nodes. The `dynamic_parameter_impl` class +also contains a list of weak pointers, but to the `dynamic_command_group_impl` +instances of any dynamic command-groups where they are used. This allows +updating the dynamic parameter to propagate to dynamic command-group nodes. The `sycl::detail::CGExecKernel` class has been added to, so that if the object was created from an element in the dynamic command-group list, the class stores a vector of weak pointers to the other alternative command-groups created -from the same dynamic command-group object. This allows the DPC++ scheduler to +from the same dynamic command-group object. This allows the SYCL runtime to access the list of alternative kernels when calling the UR API to append a kernel command to a command-buffer. diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index ad10a3bdeefaa..0c9394045acfa 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -562,7 +562,6 @@ graph_impl::add(std::shared_ptr &DynCGImpl, // Track the dynamic command-group used inside the node object DynCGImpl->MNodes.push_back(NodeImpl); - NodeImpl->MDynCG = DynCGImpl; return NodeImpl; } diff --git a/sycl/source/detail/graph_impl.hpp b/sycl/source/detail/graph_impl.hpp index 3b1fc3fa01641..11b432f208ea1 100644 --- a/sycl/source/detail/graph_impl.hpp +++ b/sycl/source/detail/graph_impl.hpp @@ -99,8 +99,6 @@ class node_impl : public std::enable_shared_from_this { /// Stores the executable graph impl associated with this node if it is a /// subgraph node. std::shared_ptr MSubGraphImpl; - /// Dynamic command-group object used in node, if any. - std::shared_ptr MDynCG; /// Used for tracking visited status during cycle checks. bool MVisited = false; @@ -158,7 +156,7 @@ class node_impl : public std::enable_shared_from_this { : enable_shared_from_this(Other), MSuccessors(Other.MSuccessors), MPredecessors(Other.MPredecessors), MCGType(Other.MCGType), MNodeType(Other.MNodeType), MCommandGroup(Other.getCGCopy()), - MSubGraphImpl(Other.MSubGraphImpl), MDynCG(Other.MDynCG) {} + MSubGraphImpl(Other.MSubGraphImpl) {} /// Copy-assignment operator. This will perform a deep-copy of the /// command group object associated with this node. @@ -170,7 +168,6 @@ class node_impl : public std::enable_shared_from_this { MNodeType = Other.MNodeType; MCommandGroup = Other.getCGCopy(); MSubGraphImpl = Other.MSubGraphImpl; - MDynCG = Other.MDynCG; } return *this; } @@ -420,7 +417,7 @@ class node_impl : public std::enable_shared_from_this { throw sycl::exception(sycl::errc::invalid, "Cannot update execution range of a node with an " "execution range of different dimensions than what " - "the node was originall created with."); + "the node was original created with."); } NDRDesc = sycl::detail::NDRDescT{ExecutionRange}; @@ -441,7 +438,7 @@ class node_impl : public std::enable_shared_from_this { throw sycl::exception(sycl::errc::invalid, "Cannot update execution range of a node with an " "execution range of different dimensions than what " - "the node was originall created with."); + "the node was original created with."); } NDRDesc = sycl::detail::NDRDescT{ExecutionRange}; diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_accessor.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_accessor.cpp index 2b5f378d8bed7..a5e5a1ea78b87 100644 --- a/sycl/test-e2e/Graph/Update/dyn_cgf_accessor.cpp +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_accessor.cpp @@ -14,9 +14,7 @@ int main() { queue Queue{}; - const size_t N = 1024; - std::vector HostData(N, 0); - buffer Buf{HostData}; + buffer Buf{sycl::range<1>(Size)}; Buf.set_write_back(false); auto Acc = Buf.get_access(); @@ -28,13 +26,15 @@ int main() { int PatternA = 42; auto CGFA = [&](handler &CGH) { CGH.require(Acc); - CGH.parallel_for(N, [=](item<1> Item) { Acc[Item.get_id()] = PatternA; }); + CGH.parallel_for(Size, + [=](item<1> Item) { Acc[Item.get_id()] = PatternA; }); }; int PatternB = 0xA; auto CGFB = [&](handler &CGH) { CGH.require(Acc); - CGH.parallel_for(N, [=](item<1> Item) { Acc[Item.get_id()] = PatternB; }); + CGH.parallel_for(Size, + [=](item<1> Item) { Acc[Item.get_id()] = PatternB; }); }; auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB}); @@ -42,8 +42,9 @@ int main() { auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); Queue.ext_oneapi_graph(ExecGraph).wait(); + std::vector HostData(Size, 0); Queue.copy(Acc, HostData.data()).wait(); - for (size_t i = 0; i < N; i++) { + for (size_t i = 0; i < Size; i++) { assert(HostData[i] == PatternA); } @@ -52,7 +53,7 @@ int main() { Queue.ext_oneapi_graph(ExecGraph).wait(); Queue.copy(Acc, HostData.data()).wait(); - for (size_t i = 0; i < N; i++) { + for (size_t i = 0; i < Size; i++) { assert(HostData[i] == PatternB); } diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_deps.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_deps.cpp index 5ce7a4bf40df1..7b477edacff98 100644 --- a/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_deps.cpp +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_deps.cpp @@ -15,10 +15,8 @@ int main() { queue Queue{}; - const size_t N = 1024; - int *Ptr = (int *)sycl::malloc_device(N, Queue); - std::vector HostData(N, 0); - buffer Buf{HostData}; + int *Ptr = (int *)sycl::malloc_device(Size, Queue); + buffer Buf{sycl::range<1>(Size)}; Buf.set_write_back(false); exp_ext::command_graph Graph{ @@ -28,19 +26,21 @@ int main() { auto RootNode = Graph.add([&](handler &CGH) { auto Acc = Buf.get_access(CGH); - CGH.parallel_for(N, [=](item<1> Item) { Acc[Item.get_id()] = 1; }); + CGH.parallel_for(Size, [=](item<1> Item) { Acc[Item.get_id()] = 1; }); }); int PatternA = 42; auto CGFA = [&](handler &CGH) { auto Acc = Buf.get_access(CGH); - CGH.parallel_for(N, [=](item<1> Item) { Acc[Item.get_id()] += PatternA; }); + CGH.parallel_for(Size, + [=](item<1> Item) { Acc[Item.get_id()] += PatternA; }); }; int PatternB = 0xA; auto CGFB = [&](handler &CGH) { auto Acc = Buf.get_access(CGH); - CGH.parallel_for(N, [=](item<1> Item) { Acc[Item.get_id()] += PatternB; }); + CGH.parallel_for(Size, + [=](item<1> Item) { Acc[Item.get_id()] += PatternB; }); }; auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB}); @@ -49,23 +49,28 @@ int main() { auto LeafNode = Graph.add([&](handler &CGH) { auto Acc = Buf.get_access(CGH); CGH.parallel_for( - N, [=](item<1> Item) { Ptr[Item.get_id()] = Acc[Item.get_id()]; }); + Size, [=](item<1> Item) { Ptr[Item.get_id()] = Acc[Item.get_id()]; }); }); auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); Queue.ext_oneapi_graph(ExecGraph).wait(); - Queue.copy(Ptr, HostData.data(), N).wait(); - for (size_t i = 0; i < N; i++) { - assert(HostData[i] == (PatternA + 1)); + + std::vector HostData(Size, 0); + Queue.copy(Ptr, HostData.data(), Size).wait(); + + int Ref = PatternA + 1; + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == Ref); } DynamicCG.set_active_cgf(1); ExecGraph.update(DynamicCGNode); Queue.ext_oneapi_graph(ExecGraph).wait(); - Queue.copy(Ptr, HostData.data(), N).wait(); - for (size_t i = 0; i < N; i++) { - assert(HostData[i] == (PatternB + 1)); + Queue.copy(Ptr, HostData.data(), Size).wait(); + Ref = PatternB + 1; + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == Ref); } sycl::free(Ptr, Queue); diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_deps2.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_deps2.cpp index 8d50b8b26e0c2..a420d7deb58de 100644 --- a/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_deps2.cpp +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_deps2.cpp @@ -16,11 +16,9 @@ int main() { queue Queue{}; - const size_t N = 1024; - int *Ptr = (int *)sycl::malloc_device(N, Queue); - std::vector HostData(N, 0); - buffer BufA{sycl::range<1>(N)}; - buffer BufB{sycl::range<1>(N)}; + int *Ptr = (int *)sycl::malloc_device(Size, Queue); + buffer BufA{sycl::range<1>(Size)}; + buffer BufB{sycl::range<1>(Size)}; BufA.set_write_back(false); BufB.set_write_back(false); @@ -34,7 +32,7 @@ int main() { auto RootNode = Graph.add([&](handler &CGH) { auto AccA = BufA.get_access(CGH); auto AccB = BufB.get_access(CGH); - CGH.parallel_for(N, [=](item<1> Item) { + CGH.parallel_for(Size, [=](item<1> Item) { AccA[Item.get_id()] = InitA; AccB[Item.get_id()] = InitB; }); @@ -43,13 +41,15 @@ int main() { int PatternA = 42; auto CGFA = [&](handler &CGH) { auto AccA = BufA.get_access(CGH); - CGH.parallel_for(N, [=](item<1> Item) { AccA[Item.get_id()] += PatternA; }); + CGH.parallel_for(Size, + [=](item<1> Item) { AccA[Item.get_id()] += PatternA; }); }; int PatternB = 0xA; auto CGFB = [&](handler &CGH) { auto AccB = BufB.get_access(CGH); - CGH.parallel_for(N, [=](item<1> Item) { AccB[Item.get_id()] += PatternB; }); + CGH.parallel_for(Size, + [=](item<1> Item) { AccB[Item.get_id()] += PatternB; }); }; auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB}); @@ -58,15 +58,17 @@ int main() { auto LeafNode = Graph.add([&](handler &CGH) { auto AccA = BufA.get_access(CGH); auto AccB = BufB.get_access(CGH); - CGH.parallel_for(N, [=](item<1> Item) { + CGH.parallel_for(Size, [=](item<1> Item) { Ptr[Item.get_id()] = AccA[Item.get_id()] + AccB[Item.get_id()]; }); }); auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); Queue.ext_oneapi_graph(ExecGraph).wait(); - Queue.copy(Ptr, HostData.data(), N).wait(); - for (size_t i = 0; i < N; i++) { + + std::vector HostData(Size, 0); + Queue.copy(Ptr, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { assert(HostData[i] == (InitA + InitB + PatternA)); } @@ -74,9 +76,10 @@ int main() { ExecGraph.update(DynamicCGNode); Queue.ext_oneapi_graph(ExecGraph).wait(); - Queue.copy(Ptr, HostData.data(), N).wait(); - for (size_t i = 0; i < N; i++) { - assert(HostData[i] == (InitA + InitB + PatternB)); + Queue.copy(Ptr, HostData.data(), Size).wait(); + int Ref = InitA + InitB + PatternB; + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == Ref); } sycl::free(Ptr, Queue); diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_spv.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_spv.cpp index 80556f60fc75f..0eaa714463670 100644 --- a/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_spv.cpp +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_spv.cpp @@ -26,17 +26,13 @@ int main(int, char **argv) { kernel kernel = getKernel( KernelBundle, "_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_EUlvE_"); - const size_t N = 1024; - exp_ext::command_graph Graph{ Queue.get_context(), Queue.get_device(), {exp_ext::property::graph::assume_buffer_outlives_graph{}}}; - std::vector HostDataA(N, 0); - std::vector HostDataB(N, 0); - buffer BufA{HostDataA}; - buffer BufB{HostDataB}; + buffer BufA{sycl::range<1>(Size)}; + buffer BufB{sycl::range<1>(Size)}; BufA.set_write_back(false); BufB.set_write_back(false); @@ -60,9 +56,12 @@ int main(int, char **argv) { auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); Queue.ext_oneapi_graph(ExecGraph).wait(); + + std::vector HostDataA(Size, 0); + std::vector HostDataB(Size, 0); Queue.copy(BufA.get_access(), HostDataA.data()).wait(); Queue.copy(BufB.get_access(), HostDataB.data()).wait(); - for (size_t i = 0; i < N; i++) { + for (size_t i = 0; i < Size; i++) { assert(HostDataA[i] == i); assert(HostDataB[i] == 0); } @@ -71,9 +70,10 @@ int main(int, char **argv) { ExecGraph.update(DynamicCGNode); Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(BufA.get_access(), HostDataA.data()).wait(); Queue.copy(BufB.get_access(), HostDataB.data()).wait(); - for (size_t i = 0; i < N; i++) { + for (size_t i = 0; i < Size; i++) { assert(HostDataA[i] == i); assert(HostDataB[i] == i); } diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_different_arg_nums.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_different_arg_nums.cpp index e1602864b44a0..795dc074d882e 100644 --- a/sycl/test-e2e/Graph/Update/dyn_cgf_different_arg_nums.cpp +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_different_arg_nums.cpp @@ -17,16 +17,15 @@ int main() { queue Queue{}; exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; - const size_t N = 1024; - int *Ptr = malloc_device(N, Queue); - std::vector HostData(N); + int *Ptr = malloc_device(Size, Queue); + std::vector HostData(Size); // 3 kernel arguments: Ptr, PatternA, PatternB int PatternA = 42; int PatternB = 0xA; auto CGFA = [&](handler &CGH) { CGH.parallel_for( - N, [=](item<1> Item) { Ptr[Item.get_id()] = PatternA + PatternB; }); + Size, [=](item<1> Item) { Ptr[Item.get_id()] = PatternA + PatternB; }); }; // 2 kernel arguments: Ptr, MyPatternStruct @@ -36,20 +35,21 @@ int main() { }; PatternStruct MyPatternStruct{PatternA + 1, PatternB + 1}; auto CGFB = [&](handler &CGH) { - CGH.parallel_for(N, [=](item<1> Item) { + CGH.parallel_for(Size, [=](item<1> Item) { Ptr[Item.get_id()] = MyPatternStruct.PatternA + MyPatternStruct.PatternB; }); }; // 1 kernel argument: Ptr auto CGFC = [&](handler &CGH) { - CGH.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] = 42 - 0xA; }); + CGH.parallel_for(Size, + [=](item<1> Item) { Ptr[Item.get_id()] = 42 - 0xA; }); }; // 4 kernel argument: Ptr int PatternC = -12; auto CGFD = [&](handler &CGH) { - CGH.parallel_for(N, [=](item<1> Item) { + CGH.parallel_for(Size, [=](item<1> Item) { Ptr[Item.get_id()] = PatternA + PatternB + PatternC; }); }; @@ -77,9 +77,9 @@ int main() { // Verify CGFA works with 3 arguments Queue.ext_oneapi_graph(ExecGraph).wait(); - Queue.copy(Ptr, HostData.data(), N).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); int Ref = PatternA + PatternB; - for (size_t i = 0; i < N; i++) { + for (size_t i = 0; i < Size; i++) { assert(HostData[i] == Ref); } @@ -96,9 +96,9 @@ int main() { DynamicCG.set_active_cgf(1); ExecGraph.update(DynamicCGNode); Queue.ext_oneapi_graph(ExecGraph).wait(); - Queue.copy(Ptr, HostData.data(), N).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); Ref = (PatternA + 1) + (PatternB + 1); - for (size_t i = 0; i < N; i++) { + for (size_t i = 0; i < Size; i++) { assert(HostData[i] == Ref); } @@ -113,9 +113,9 @@ int main() { DynamicCG.set_active_cgf(2); ExecGraph.update(DynamicCGNode); Queue.ext_oneapi_graph(ExecGraph).wait(); - Queue.copy(Ptr, HostData.data(), N).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); Ref = PatternA - PatternB; - for (size_t i = 0; i < N; i++) { + for (size_t i = 0; i < Size; i++) { assert(HostData[i] == Ref); } @@ -136,9 +136,9 @@ int main() { DynamicCG.set_active_cgf(3); ExecGraph.update(DynamicCGNode); Queue.ext_oneapi_graph(ExecGraph).wait(); - Queue.copy(Ptr, HostData.data(), N).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); Ref = PatternA + PatternB + PatternC; - for (size_t i = 0; i < N; i++) { + for (size_t i = 0; i < Size; i++) { assert(HostData[i] == Ref); } diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_event_deps.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_event_deps.cpp index 11e28a033a4c2..0964f6e0c354e 100644 --- a/sycl/test-e2e/Graph/Update/dyn_cgf_event_deps.cpp +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_event_deps.cpp @@ -17,22 +17,21 @@ int main() { queue Queue{}; exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; - const size_t N = 1024; - int *PtrA = malloc_device(N, Queue); - int *PtrB = malloc_device(N, Queue); - int *PtrC = malloc_device(N, Queue); - std::vector HostData(N); + int *PtrA = malloc_device(Size, Queue); + int *PtrB = malloc_device(Size, Queue); + int *PtrC = malloc_device(Size, Queue); + std::vector HostData(Size); Graph.begin_recording(Queue); int PatternA = 42; - auto EventA = Queue.fill(PtrA, PatternA, N); + auto EventA = Queue.fill(PtrA, PatternA, Size); int PatternB = 0xA; - auto EventB = Queue.fill(PtrB, PatternB, N); + auto EventB = Queue.fill(PtrB, PatternB, Size); Graph.end_recording(Queue); auto CGFA = [&](handler &CGH) { CGH.depends_on({EventA, EventB}); - CGH.parallel_for(N, [=](item<1> Item) { + CGH.parallel_for(Size, [=](item<1> Item) { auto I = Item.get_id(); PtrC[I] = PtrA[I] * PtrB[I]; }); @@ -40,7 +39,7 @@ int main() { auto CGFB = [&](handler &CGH) { CGH.depends_on({EventA, EventB}); - CGH.parallel_for(N, [=](item<1> Item) { + CGH.parallel_for(Size, [=](item<1> Item) { auto I = Item.get_id(); PtrC[I] = PtrA[I] + PtrB[I]; }); @@ -51,8 +50,8 @@ int main() { auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); Queue.ext_oneapi_graph(ExecGraph).wait(); - Queue.copy(PtrC, HostData.data(), N).wait(); - for (size_t i = 0; i < N; i++) { + Queue.copy(PtrC, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { assert(HostData[i] == PatternA * PatternB); } @@ -60,8 +59,8 @@ int main() { ExecGraph.update(DynamicCGNode); Queue.ext_oneapi_graph(ExecGraph).wait(); - Queue.copy(PtrC, HostData.data(), N).wait(); - for (size_t i = 0; i < N; i++) { + Queue.copy(PtrC, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { assert(HostData[i] == PatternA + PatternB); } diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_ndrange.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_ndrange.cpp index f4717210bb35e..237e9173f253e 100644 --- a/sycl/test-e2e/Graph/Update/dyn_cgf_ndrange.cpp +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_ndrange.cpp @@ -17,23 +17,23 @@ int main() { queue Queue{}; exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; - const size_t N = 1024; - int *Ptr = malloc_device(N, Queue); - std::vector HostData(N); + int *Ptr = malloc_device(Size, Queue); + std::vector HostData(Size); auto RootNode = - Graph.add([&](handler &cgh) { cgh.memset(Ptr, 0, N * sizeof(int)); }); + Graph.add([&](handler &cgh) { cgh.memset(Ptr, 0, Size * sizeof(int)); }); int PatternA = 42; - sycl::range<1> RangeA{512}; + size_t ItemsA = Size / 2; + sycl::range<1> RangeA{ItemsA}; auto CGFA = [&](handler &CGH) { CGH.parallel_for(RangeA, [=](item<1> Item) { Ptr[Item.get_id()] = PatternA; }); }; int PatternB = 0xA; - size_t UpdatedN = 256; - sycl::nd_range<1> RangeB{sycl::range{UpdatedN}, sycl::range{16}}; + size_t ItemsB = Size / 4; + sycl::nd_range<1> RangeB{sycl::range{ItemsB}, sycl::range{16}}; auto CGFB = [&](handler &CGH) { CGH.parallel_for( RangeB, [=](nd_item<1> Item) { Ptr[Item.get_global_id()] = PatternB; }); @@ -45,9 +45,9 @@ int main() { auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); Queue.ext_oneapi_graph(ExecGraph).wait(); - Queue.copy(Ptr, HostData.data(), N).wait(); - for (size_t i = 0; i < N; i++) { - if (i < RangeA.get(0)) { + Queue.copy(Ptr, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + if (i < ItemsA) { assert(HostData[i] == PatternA); } else { assert(HostData[i] == 0); @@ -58,9 +58,9 @@ int main() { ExecGraph.update(DynamicCGNode); Queue.ext_oneapi_graph(ExecGraph).wait(); - Queue.copy(Ptr, HostData.data(), N).wait(); - for (size_t i = 0; i < N; i++) { - if (i < UpdatedN) { + Queue.copy(Ptr, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + if (i < ItemsB) { assert(HostData[i] == PatternB); } else { assert(HostData[i] == 0); diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_ndrange_3D.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_ndrange_3D.cpp index f6390df64303a..261ac6ecf5c3b 100644 --- a/sycl/test-e2e/Graph/Update/dyn_cgf_ndrange_3D.cpp +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_ndrange_3D.cpp @@ -9,7 +9,7 @@ // XFAIL-TRACKER: OFNAAO-307 // Tests updating a dynamic command-group node where the dynamic command-groups -// have different ranges/nd-ranges +// have different range/nd-range dimensions #include "../graph_common.hpp" diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_overwrite_range.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_overwrite_range.cpp index 3ba2500cd6189..c34bd45f0f46e 100644 --- a/sycl/test-e2e/Graph/Update/dyn_cgf_overwrite_range.cpp +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_overwrite_range.cpp @@ -16,25 +16,26 @@ int main() { queue Queue{}; exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; - const size_t N = 1024; - std::vector HostData(N); - int *Ptr = malloc_device(N, Queue); - Queue.memset(Ptr, 0, N * sizeof(int)).wait(); + std::vector HostData(Size); + int *Ptr = malloc_device(Size, Queue); + Queue.memset(Ptr, 0, Size * sizeof(int)).wait(); int PatternA = 42; auto CGFA = [&](handler &CGH) { - CGH.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] = PatternA; }); + CGH.parallel_for(Size, + [=](item<1> Item) { Ptr[Item.get_id()] = PatternA; }); }; int PatternB = 0xA; auto CGFB = [&](handler &CGH) { - CGH.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] = PatternB; }); + CGH.parallel_for(Size, + [=](item<1> Item) { Ptr[Item.get_id()] = PatternB; }); }; auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB}); auto DynamicCGNode = Graph.add(DynamicCG); - size_t NewRange = 512; + size_t NewRange = Size / 2; sycl::range<1> UpdateRange(NewRange); DynamicCGNode.update_range(UpdateRange); @@ -45,8 +46,8 @@ int main() { auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); Queue.ext_oneapi_graph(ExecGraph).wait(); - Queue.copy(Ptr, HostData.data(), N).wait(); - for (size_t i = 0; i < N; i++) { + Queue.copy(Ptr, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { if (i < NewRange) { assert(HostData[i] == PatternA); } else { diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_parameters.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_parameters.cpp index 0c46672869c7d..7049b5bdde305 100644 --- a/sycl/test-e2e/Graph/Update/dyn_cgf_parameters.cpp +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_parameters.cpp @@ -17,25 +17,26 @@ int main() { queue Queue{}; exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; - const size_t N = 1024; - int *PtrA = malloc_device(N, Queue); - int *PtrB = malloc_device(N, Queue); + int *PtrA = malloc_device(Size, Queue); + int *PtrB = malloc_device(Size, Queue); - std::vector HostDataA(N); - std::vector HostDataB(N); + std::vector HostDataA(Size); + std::vector HostDataB(Size); - Queue.memset(PtrA, 0, N * sizeof(int)); - Queue.memset(PtrB, 0, N * sizeof(int)); + Queue.memset(PtrA, 0, Size * sizeof(int)); + Queue.memset(PtrB, 0, Size * sizeof(int)); Queue.wait(); int PatternA = 0xA; auto CGFA = [&](handler &CGH) { - CGH.parallel_for(N, [=](item<1> Item) { PtrA[Item.get_id()] = PatternA; }); + CGH.parallel_for(Size, + [=](item<1> Item) { PtrA[Item.get_id()] = PatternA; }); }; int PatternB = 42; auto CGFB = [&](handler &CGH) { - CGH.parallel_for(N, [=](item<1> Item) { PtrB[Item.get_id()] = PatternB; }); + CGH.parallel_for(Size, + [=](item<1> Item) { PtrB[Item.get_id()] = PatternB; }); }; auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB}); @@ -43,10 +44,10 @@ int main() { auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); Queue.ext_oneapi_graph(ExecGraph).wait(); - Queue.copy(PtrA, HostDataA.data(), N); - Queue.copy(PtrB, HostDataB.data(), N); + Queue.copy(PtrA, HostDataA.data(), Size); + Queue.copy(PtrB, HostDataB.data(), Size); Queue.wait(); - for (size_t i = 0; i < N; i++) { + for (size_t i = 0; i < Size; i++) { assert(HostDataA[i] == PatternA); assert(HostDataB[i] == 0); } @@ -55,10 +56,10 @@ int main() { ExecGraph.update(DynamicCGNode); Queue.ext_oneapi_graph(ExecGraph).wait(); - Queue.copy(PtrA, HostDataA.data(), N); - Queue.copy(PtrB, HostDataB.data(), N); + Queue.copy(PtrA, HostDataA.data(), Size); + Queue.copy(PtrB, HostDataB.data(), Size); Queue.wait(); - for (size_t i = 0; i < N; i++) { + for (size_t i = 0; i < Size; i++) { assert(HostDataA[i] == PatternA); assert(HostDataB[i] == PatternB); } diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_shared_nodes.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_shared_nodes.cpp index f9b0728d8ea67..1f98200791b6c 100644 --- a/sycl/test-e2e/Graph/Update/dyn_cgf_shared_nodes.cpp +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_shared_nodes.cpp @@ -16,21 +16,22 @@ int main() { queue Queue{}; exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; - const size_t N = 1024; - std::vector HostData(N); - int *Ptr = malloc_device(N, Queue); + std::vector HostData(Size); + int *Ptr = malloc_device(Size, Queue); auto RootNode = - Graph.add([&](handler &CGH) { CGH.memset(Ptr, 0, N * sizeof(int)); }); + Graph.add([&](handler &CGH) { CGH.memset(Ptr, 0, Size * sizeof(int)); }); int PatternA = 42; auto CGFA = [&](handler &CGH) { - CGH.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] += PatternA; }); + CGH.parallel_for(Size, + [=](item<1> Item) { Ptr[Item.get_id()] += PatternA; }); }; int PatternB = 0xA; auto CGFB = [&](handler &CGH) { - CGH.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] += PatternB; }); + CGH.parallel_for(Size, + [=](item<1> Item) { Ptr[Item.get_id()] += PatternB; }); }; auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB}); @@ -39,20 +40,20 @@ int main() { auto Node2 = Graph.add( [&](handler &cgh) { - cgh.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] *= 2; }); + cgh.parallel_for(Size, [=](item<1> Item) { Ptr[Item.get_id()] *= 2; }); }, exp_ext::property::node::depends_on(Node1)); auto Node3 = Graph.add(DynamicCG, exp_ext::property::node::depends_on(Node2)); // This ND-Range affects Node 1 as well, as the range is tied to the node. - sycl::range<1> Node3Range(512); + sycl::range<1> Node3Range(Size / 2); Node3.update_range(Node3Range); auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); Queue.ext_oneapi_graph(ExecGraph).wait(); - Queue.copy(Ptr, HostData.data(), N).wait(); - for (size_t i = 0; i < N; i++) { + Queue.copy(Ptr, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { int Ref = (i < Node3Range.get(0)) ? (PatternA * 3) : 0; assert(HostData[i] == Ref); } @@ -62,9 +63,9 @@ int main() { ExecGraph.update(Node3); Queue.ext_oneapi_graph(ExecGraph).wait(); - Queue.copy(Ptr, HostData.data(), N).wait(); - for (size_t i = 0; i < N; i++) { - int Ref = (PatternB * 3); + Queue.copy(Ptr, HostData.data(), Size).wait(); + int Ref = (PatternB * 3); + for (size_t i = 0; i < Size; i++) { assert(HostData[i] == Ref); } diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_update_before_finalize.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_update_before_finalize.cpp index a9109d000eb17..2038c94610dfc 100644 --- a/sycl/test-e2e/Graph/Update/dyn_cgf_update_before_finalize.cpp +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_update_before_finalize.cpp @@ -17,18 +17,19 @@ int main() { queue Queue{}; exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; - const size_t N = 1024; - int *Ptr = malloc_device(N, Queue); - std::vector HostData(N); + int *Ptr = malloc_device(Size, Queue); + std::vector HostData(Size); int PatternA = 42; auto CGFA = [&](handler &CGH) { - CGH.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] = PatternA; }); + CGH.parallel_for(Size, + [=](item<1> Item) { Ptr[Item.get_id()] = PatternA; }); }; int PatternB = 0xA; auto CGFB = [&](handler &CGH) { - CGH.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] = PatternB; }); + CGH.parallel_for(Size, + [=](item<1> Item) { Ptr[Item.get_id()] = PatternB; }); }; auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB}); @@ -37,8 +38,8 @@ int main() { auto ExecGraph = Graph.finalize(); Queue.ext_oneapi_graph(ExecGraph).wait(); - Queue.copy(Ptr, HostData.data(), N).wait(); - for (size_t i = 0; i < N; i++) { + Queue.copy(Ptr, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { assert(HostData[i] == PatternB); } diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_usm.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_usm.cpp index 79db8ebe67c57..100701f7b62aa 100644 --- a/sycl/test-e2e/Graph/Update/dyn_cgf_usm.cpp +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_usm.cpp @@ -16,18 +16,19 @@ int main() { queue Queue{}; exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; - const size_t N = 1024; - int *Ptr = malloc_device(N, Queue); - std::vector HostData(N); + int *Ptr = malloc_device(Size, Queue); + std::vector HostData(Size); int PatternA = 42; auto CGFA = [&](handler &CGH) { - CGH.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] = PatternA; }); + CGH.parallel_for(Size, + [=](item<1> Item) { Ptr[Item.get_id()] = PatternA; }); }; int PatternB = 0xA; auto CGFB = [&](handler &CGH) { - CGH.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] = PatternB; }); + CGH.parallel_for(Size, + [=](item<1> Item) { Ptr[Item.get_id()] = PatternB; }); }; auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB}); @@ -35,8 +36,8 @@ int main() { auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); Queue.ext_oneapi_graph(ExecGraph).wait(); - Queue.copy(Ptr, HostData.data(), N).wait(); - for (size_t i = 0; i < N; i++) { + Queue.copy(Ptr, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { assert(HostData[i] == PatternA); } @@ -44,8 +45,8 @@ int main() { ExecGraph.update(DynamicCGNode); Queue.ext_oneapi_graph(ExecGraph).wait(); - Queue.copy(Ptr, HostData.data(), N).wait(); - for (size_t i = 0; i < N; i++) { + Queue.copy(Ptr, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { assert(HostData[i] == PatternB); } diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_with_all_dyn_params.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_with_all_dyn_params.cpp index cb9bdf15f76b8..53b34d1add289 100644 --- a/sycl/test-e2e/Graph/Update/dyn_cgf_with_all_dyn_params.cpp +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_with_all_dyn_params.cpp @@ -16,14 +16,13 @@ int main() { queue Queue{}; exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; - const size_t N = 1024; - int *PtrA = malloc_device(N, Queue); - int *PtrB = malloc_device(N, Queue); - int *PtrC = malloc_device(N, Queue); + int *PtrA = malloc_device(Size, Queue); + int *PtrB = malloc_device(Size, Queue); + int *PtrC = malloc_device(Size, Queue); - std::vector HostDataA(N); - std::vector HostDataB(N); - std::vector HostDataC(N); + std::vector HostDataA(Size); + std::vector HostDataB(Size); + std::vector HostDataC(Size); exp_ext::dynamic_parameter DynParam1(Graph, PtrA); exp_ext::dynamic_parameter DynParam2(Graph, PtrC); @@ -33,7 +32,7 @@ int main() { // TODO: Use the free function kernel extension instead of regular kernels // when available. CGH.single_task([=]() { - for (size_t i = 0; i < N; i++) { + for (size_t i = 0; i < Size; i++) { PtrA[i] = i; } }); @@ -44,7 +43,7 @@ int main() { // TODO: Use the free function kernel extension instead of regular kernels // when available. CGH.single_task([=]() { - for (size_t i = 0; i < N; i++) { + for (size_t i = 0; i < Size; i++) { PtrA[i] = i; } }); @@ -55,7 +54,7 @@ int main() { // TODO: Use the free function kernel extension instead of regular kernels // when available. CGH.single_task([=]() { - for (size_t i = 0; i < N; i++) { + for (size_t i = 0; i < Size; i++) { PtrC[i] = i; } }); @@ -69,7 +68,7 @@ int main() { // TODO: Use the free function kernel extension instead of regular // kernels when available. CGH.single_task([=]() { - for (size_t i = 0; i < N; i++) { + for (size_t i = 0; i < Size; i++) { PtrC[i] += i; } }); @@ -79,19 +78,19 @@ int main() { auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); auto ExecuteGraphAndVerifyResults = [&](bool A, bool B, bool C) { - Queue.memset(PtrA, 0, N * sizeof(int)); - Queue.memset(PtrB, 0, N * sizeof(int)); - Queue.memset(PtrC, 0, N * sizeof(int)); + Queue.memset(PtrA, 0, Size * sizeof(int)); + Queue.memset(PtrB, 0, Size * sizeof(int)); + Queue.memset(PtrC, 0, Size * sizeof(int)); Queue.wait(); Queue.ext_oneapi_graph(ExecGraph).wait(); - Queue.copy(PtrA, HostDataA.data(), N); - Queue.copy(PtrB, HostDataB.data(), N); - Queue.copy(PtrC, HostDataC.data(), N); + Queue.copy(PtrA, HostDataA.data(), Size); + Queue.copy(PtrB, HostDataB.data(), Size); + Queue.copy(PtrC, HostDataC.data(), Size); Queue.wait(); - for (size_t i = 0; i < N; i++) { + for (size_t i = 0; i < Size; i++) { assert(HostDataA[i] == (A ? i : 0)); assert(HostDataB[i] == (B ? i : 0)); assert(HostDataC[i] == (C ? (2 * i) : i)); diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_with_different_type_dyn_params.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_with_different_type_dyn_params.cpp index 15f815664a740..00482185ebc27 100644 --- a/sycl/test-e2e/Graph/Update/dyn_cgf_with_different_type_dyn_params.cpp +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_with_different_type_dyn_params.cpp @@ -17,14 +17,13 @@ int main() { queue Queue{}; exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; - const size_t N = 1024; - int *PtrA = malloc_device(N, Queue); - int *PtrB = malloc_device(N, Queue); - int *PtrC = malloc_device(N, Queue); + int *PtrA = malloc_device(Size, Queue); + int *PtrB = malloc_device(Size, Queue); + int *PtrC = malloc_device(Size, Queue); - std::vector HostDataA(N); - std::vector HostDataB(N); - std::vector HostDataC(N); + std::vector HostDataA(Size); + std::vector HostDataB(Size); + std::vector HostDataC(Size); int ScalarValue = 17; exp_ext::dynamic_parameter DynParamScalar(Graph, ScalarValue); @@ -37,7 +36,7 @@ int main() { // TODO: Use the free function kernel extension instead of regular kernels // when available. CGH.single_task([=]() { - for (size_t i = 0; i < N; i++) { + for (size_t i = 0; i < Size; i++) { PtrA[i] = ScalarValue; } }); @@ -49,7 +48,7 @@ int main() { // TODO: Use the free function kernel extension instead of regular kernels // when available. CGH.single_task([=]() { - for (size_t i = 0; i < N; i++) { + for (size_t i = 0; i < Size; i++) { PtrA[i] = ScalarValue; } }); @@ -62,7 +61,7 @@ int main() { // TODO: Use the free function kernel extension instead of regular kernels // when available. CGH.single_task([=]() { - for (size_t i = 0; i < N; i++) { + for (size_t i = 0; i < Size; i++) { PtrC[i] = ScalarValue; } }); @@ -73,7 +72,7 @@ int main() { // TODO: Use the free function kernel extension instead of regular kernels // when available. CGH.single_task([=]() { - for (size_t i = 0; i < N; i++) { + for (size_t i = 0; i < Size; i++) { PtrA[i] = ScalarValue; } }); @@ -86,19 +85,19 @@ int main() { auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); auto ExecuteGraphAndVerifyResults = [&](int A, int B, int C) { - Queue.memset(PtrA, 0, N * sizeof(int)); - Queue.memset(PtrB, 0, N * sizeof(int)); - Queue.memset(PtrC, 0, N * sizeof(int)); + Queue.memset(PtrA, 0, Size * sizeof(int)); + Queue.memset(PtrB, 0, Size * sizeof(int)); + Queue.memset(PtrC, 0, Size * sizeof(int)); Queue.wait(); Queue.ext_oneapi_graph(ExecGraph).wait(); - Queue.copy(PtrA, HostDataA.data(), N); - Queue.copy(PtrB, HostDataB.data(), N); - Queue.copy(PtrC, HostDataC.data(), N); + Queue.copy(PtrA, HostDataA.data(), Size); + Queue.copy(PtrB, HostDataB.data(), Size); + Queue.copy(PtrC, HostDataC.data(), Size); Queue.wait(); - for (size_t i = 0; i < N; i++) { + for (size_t i = 0; i < Size; i++) { assert(HostDataA[i] == A); assert(HostDataB[i] == B); assert(HostDataC[i] == C); diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_with_some_dyn_params.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_with_some_dyn_params.cpp index 264c1b6849689..3213fc4eec2fe 100644 --- a/sycl/test-e2e/Graph/Update/dyn_cgf_with_some_dyn_params.cpp +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_with_some_dyn_params.cpp @@ -17,14 +17,13 @@ int main() { queue Queue{}; exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; - const size_t N = 1024; - int *PtrA = malloc_device(N, Queue); - int *PtrB = malloc_device(N, Queue); - int *PtrC = malloc_device(N, Queue); + int *PtrA = malloc_device(Size, Queue); + int *PtrB = malloc_device(Size, Queue); + int *PtrC = malloc_device(Size, Queue); - std::vector HostDataA(N); - std::vector HostDataB(N); - std::vector HostDataC(N); + std::vector HostDataA(Size); + std::vector HostDataB(Size); + std::vector HostDataC(Size); exp_ext::dynamic_parameter DynParam(Graph, PtrA); @@ -33,7 +32,7 @@ int main() { // TODO: Use the free function kernel extension instead of regular kernels // when available. CGH.single_task([=]() { - for (size_t i = 0; i < N; i++) { + for (size_t i = 0; i < Size; i++) { PtrA[i] = i; } }); @@ -44,7 +43,7 @@ int main() { // TODO: Use the free function kernel extension instead of regular kernels // when available. CGH.single_task([=]() { - for (size_t i = 0; i < N; i++) { + for (size_t i = 0; i < Size; i++) { PtrA[i] = i; } }); @@ -52,7 +51,7 @@ int main() { auto CGFC = [&](handler &CGH) { CGH.single_task([=]() { - for (size_t i = 0; i < N; i++) { + for (size_t i = 0; i < Size; i++) { PtrC[i] = i; } }); @@ -63,19 +62,19 @@ int main() { auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); auto ExecuteGraphAndVerifyResults = [&](bool A, bool B, bool C) { - Queue.memset(PtrA, 0, N * sizeof(int)); - Queue.memset(PtrB, 0, N * sizeof(int)); - Queue.memset(PtrC, 0, N * sizeof(int)); + Queue.memset(PtrA, 0, Size * sizeof(int)); + Queue.memset(PtrB, 0, Size * sizeof(int)); + Queue.memset(PtrC, 0, Size * sizeof(int)); Queue.wait(); Queue.ext_oneapi_graph(ExecGraph).wait(); - Queue.copy(PtrA, HostDataA.data(), N); - Queue.copy(PtrB, HostDataB.data(), N); - Queue.copy(PtrC, HostDataC.data(), N); + Queue.copy(PtrA, HostDataA.data(), Size); + Queue.copy(PtrB, HostDataB.data(), Size); + Queue.copy(PtrC, HostDataC.data(), Size); Queue.wait(); - for (size_t i = 0; i < N; i++) { + for (size_t i = 0; i < Size; i++) { assert(HostDataA[i] == (A ? i : 0)); assert(HostDataB[i] == (B ? i : 0)); assert(HostDataC[i] == (C ? i : 0)); diff --git a/sycl/test-e2e/Graph/Update/update_ndrange_to_range.cpp b/sycl/test-e2e/Graph/Update/update_ndrange_to_range.cpp index 0f1c10e5142bf..43db9d172e618 100644 --- a/sycl/test-e2e/Graph/Update/update_ndrange_to_range.cpp +++ b/sycl/test-e2e/Graph/Update/update_ndrange_to_range.cpp @@ -13,17 +13,15 @@ int main() { queue Queue{}; - const size_t N = 1024; - exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; - int *PtrA = malloc_device(N, Queue); + int *PtrA = malloc_device(Size, Queue); - std::vector HostDataA(N); + std::vector HostDataA(Size); - Queue.memset(PtrA, 0, N * sizeof(int)).wait(); + Queue.memset(PtrA, 0, Size * sizeof(int)).wait(); - nd_range<1> NDRange{range{N}, range{32}}; + nd_range<1> NDRange{range{Size}, range{32}}; auto KernelNode = Graph.add([&](handler &cgh) { cgh.parallel_for(NDRange, [=](nd_item<1> Item) { @@ -37,19 +35,20 @@ int main() { // first half of PtrA should be filled with values Queue.ext_oneapi_graph(ExecGraph).wait(); - Queue.copy(PtrA, HostDataA.data(), N).wait(); - for (size_t i = 0; i < N; i++) { + Queue.copy(PtrA, HostDataA.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { assert(HostDataA[i] == i); } // Update NDRange to target first half only - KernelNode.update_range(range<1>{512}); + size_t UpdateSize = Size / 2; + KernelNode.update_range(range<1>{UpdateSize}); ExecGraph.update(KernelNode); Queue.ext_oneapi_graph(ExecGraph).wait(); - Queue.copy(PtrA, HostDataA.data(), N).wait(); - for (size_t i = 0; i < N; i++) { - assert(HostDataA[i] == (i >= 512 ? i : i * 2)); + Queue.copy(PtrA, HostDataA.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostDataA[i] == (i >= UpdateSize ? i : i * 2)); } return 0; } diff --git a/sycl/test-e2e/Graph/Update/update_range_to_ndrange.cpp b/sycl/test-e2e/Graph/Update/update_range_to_ndrange.cpp index 9489d20c6a916..94052c8379b58 100644 --- a/sycl/test-e2e/Graph/Update/update_range_to_ndrange.cpp +++ b/sycl/test-e2e/Graph/Update/update_range_to_ndrange.cpp @@ -13,17 +13,15 @@ int main() { queue Queue{}; - const size_t N = 1024; - exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; - int *PtrA = malloc_device(N, Queue); + int *PtrA = malloc_device(Size, Queue); - std::vector HostDataA(N); + std::vector HostDataA(Size); - Queue.memset(PtrA, 0, N * sizeof(int)).wait(); + Queue.memset(PtrA, 0, Size * sizeof(int)).wait(); - range<1> Range{1024}; + range<1> Range{Size}; auto KernelNode = Graph.add([&](handler &cgh) { cgh.parallel_for(Range, [=](item<1> Item) { @@ -37,20 +35,21 @@ int main() { // first half of PtrA should be filled with values Queue.ext_oneapi_graph(ExecGraph).wait(); - Queue.copy(PtrA, HostDataA.data(), N).wait(); - for (size_t i = 0; i < N; i++) { + Queue.copy(PtrA, HostDataA.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { assert(HostDataA[i] == i); } // Update NDRange to target first half only - nd_range<1> NDRange{range{512}, range{32}}; + size_t UpdateSize = Size / 2; + nd_range<1> NDRange{range{UpdateSize}, range{32}}; KernelNode.update_nd_range(NDRange); ExecGraph.update(KernelNode); Queue.ext_oneapi_graph(ExecGraph).wait(); - Queue.copy(PtrA, HostDataA.data(), N).wait(); - for (size_t i = 0; i < N; i++) { - assert(HostDataA[i] == (i >= 512 ? i : i * 2)); + Queue.copy(PtrA, HostDataA.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostDataA[i] == (i >= UpdateSize ? i : i * 2)); } return 0; } diff --git a/sycl/test-e2e/Graph/Update/whole_update_dynamic_cgf.cpp b/sycl/test-e2e/Graph/Update/whole_update_dynamic_cgf.cpp index 03a0e19f8c51e..0e8b87c0725f2 100644 --- a/sycl/test-e2e/Graph/Update/whole_update_dynamic_cgf.cpp +++ b/sycl/test-e2e/Graph/Update/whole_update_dynamic_cgf.cpp @@ -17,18 +17,19 @@ int main() { exp_ext::command_graph GraphA{Queue.get_context(), Queue.get_device()}; exp_ext::command_graph GraphB{Queue.get_context(), Queue.get_device()}; - const size_t N = 1024; - int *Ptr = malloc_device(N, Queue); - std::vector HostData(N); + int *Ptr = malloc_device(Size, Queue); + std::vector HostData(Size); int PatternA = 42; auto CGFA = [&](handler &CGH) { - CGH.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] = PatternA; }); + CGH.parallel_for(Size, + [=](item<1> Item) { Ptr[Item.get_id()] = PatternA; }); }; int PatternB = 0xA; auto CGFB = [&](handler &CGH) { - CGH.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] = PatternB; }); + CGH.parallel_for(Size, + [=](item<1> Item) { Ptr[Item.get_id()] = PatternB; }); }; auto DynamicCGA = exp_ext::dynamic_command_group(GraphA, {CGFA, CGFB}); @@ -41,8 +42,8 @@ int main() { auto ExecGraph = GraphA.finalize(exp_ext::property::graph::updatable{}); Queue.ext_oneapi_graph(ExecGraph).wait(); - Queue.copy(Ptr, HostData.data(), N).wait(); - for (size_t i = 0; i < N; i++) { + Queue.copy(Ptr, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { assert(HostData[i] == PatternA); } @@ -64,8 +65,8 @@ int main() { ExecGraph.update(GraphB); Queue.ext_oneapi_graph(ExecGraph).wait(); - Queue.copy(Ptr, HostData.data(), N).wait(); - for (size_t i = 0; i < N; i++) { + Queue.copy(Ptr, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { assert(HostData[i] == PatternB); } diff --git a/sycl/unittests/Extensions/CommandGraph/Update.cpp b/sycl/unittests/Extensions/CommandGraph/Update.cpp index 676e3bead1416..b149cc08ccc88 100644 --- a/sycl/unittests/Extensions/CommandGraph/Update.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Update.cpp @@ -134,6 +134,37 @@ TEST_F(CommandGraphTest, UpdateNodeTypeExceptions) { })); } +TEST_F(CommandGraphTest, UpdateRangeErrors) { + // Test that the correct errors are throw when trying to update node ranges + nd_range<1> NDRange{range{128}, range{32}}; + range<1> Range{128}; + auto NodeNDRange = Graph.add([&](sycl::handler &cgh) { + cgh.parallel_for>(NDRange, [](nd_item<1>) {}); + }); + + // OK + EXPECT_NO_THROW(NodeNDRange.update_nd_range(NDRange)); + // OK to update an nd_range node with a range of the same dimension + EXPECT_NO_THROW(NodeNDRange.update_range(Range)); + // Can't update with a different number of dimensions + EXPECT_ANY_THROW(NodeNDRange.update_nd_range( + nd_range<2>{range<2>{128, 128}, range<2>{32, 32}})); + EXPECT_ANY_THROW(NodeNDRange.update_range(range<3>{32, 32, 1})); + + auto NodeRange = Graph.add([&](sycl::handler &cgh) { + cgh.parallel_for>(range<1>{128}, [](item<1>) {}); + }); + + // OK + EXPECT_NO_THROW(NodeRange.update_range(Range)); + // OK to update a range node with an nd_range of the same dimension + EXPECT_NO_THROW(NodeRange.update_nd_range(NDRange)); + // Can't update with a different number of dimensions + EXPECT_ANY_THROW(NodeRange.update_range(range<2>{128, 128})); + EXPECT_ANY_THROW(NodeRange.update_nd_range( + nd_range<3>{range<3>{8, 8, 8}, range<3>{8, 8, 8}})); +} + class WholeGraphUpdateTest : public CommandGraphTest { protected: