From e7e3b9687a260df0f432ac91417c4a4e430a17b8 Mon Sep 17 00:00:00 2001 From: Udit Agarwal Date: Wed, 6 Nov 2024 10:20:44 -0800 Subject: [PATCH 01/24] [E2E] Fix no_zstd_warning.cpp when built with `clang-cl` (#16002) Currently, when this test is built with a compiler that accepts MSVC-style flags, compilation fails because `-O0` is unrecognized. This PR updates the test to replace `-O0` with `%O0`. Internal bug report: CMPLRTST-26037 --- sycl/test-e2e/Compression/no_zstd_warning.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test-e2e/Compression/no_zstd_warning.cpp b/sycl/test-e2e/Compression/no_zstd_warning.cpp index 8a4460f9b8643..c87f2fe480096 100644 --- a/sycl/test-e2e/Compression/no_zstd_warning.cpp +++ b/sycl/test-e2e/Compression/no_zstd_warning.cpp @@ -1,4 +1,4 @@ // using --offload-compress without zstd should throw an error. // REQUIRES: !zstd -// RUN: not %{build} -O0 -g --offload-compress %S/Inputs/single_kernel.cpp -o %t_compress.out 2>&1 | FileCheck %s +// RUN: not %{build} %O0 -g --offload-compress %S/Inputs/single_kernel.cpp -o %t_compress.out 2>&1 | FileCheck %s // CHECK: '--offload-compress' option is specified but zstd is not available. The device image will not be compressed. From 66867d4faf87e03b855e3dc3d004f6b39c7553cd Mon Sep 17 00:00:00 2001 From: Pablo Reble Date: Wed, 6 Nov 2024 12:54:20 -0600 Subject: [PATCH 02/24] [SYCL][Graph] Adding new graph enqueue function to spec (#15677) replacing https://github.com/intel/llvm/pull/15385 after offline discussion. --- ...sycl_ext_oneapi_enqueue_functions.asciidoc | 28 +++++++++++++++++++ .../sycl_ext_oneapi_graph.asciidoc | 4 ++- .../oneapi/experimental/enqueue_functions.hpp | 12 ++++++++ .../ext_oneapi_enqueue_functions.cpp | 2 +- .../ext_oneapi_enqueue_functions_submit.cpp | 2 +- ...pi_enqueue_functions_submit_with_event.cpp | 5 ++-- 6 files changed, 48 insertions(+), 5 deletions(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_enqueue_functions.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_enqueue_functions.asciidoc index ad8d6a7f50194..933a6aabd2bd4 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_enqueue_functions.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_enqueue_functions.asciidoc @@ -716,6 +716,34 @@ optimize such partial barriers. _{endnote}_] |==== +==== Command Graph + +The functions in this section are only available if the +link:./sycl_ext_oneapi_graph.asciidoc[ + sycl_ext_oneapi_graph] extension is supported. + +|==== +a| +[frame=all,grid=none] +!==== +a! +[source,c++] +---- +namespace sycl::ext::oneapi::experimental { + +void execute_graph(sycl::queue q, command_graph &g); + +void execute_graph(sycl::handler &h, command_graph &g); + +} +---- +!==== +_Constraints_: Device and context associated with queue need to be identical +to device and context provided at command graph creation. + +_Effects_: Submits an executable command graph to the `sycl::queue` or `sycl::handler`. + +|==== == Issues diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc index 56f09c04d3055..5dff0396f07fb 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc @@ -1975,7 +1975,9 @@ Removing this restriction is something we may look at for future revisions of The command submission functions defined in link:../experimental/sycl_ext_oneapi_enqueue_functions.asciidoc[sycl_ext_oneapi_enqueue_functions] -can be used to add nodes to a graph when creating a graph from queue recording. +can be used adding nodes to a graph when creating a graph from queue recording. +New methods are also defined that enable submitting an executable graph, +e.g. directly to a queue without returning an event. == Examples and Usage Guide diff --git a/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp b/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp index b3c758aaa891d..7ecf5ce4c8b14 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -383,6 +384,17 @@ inline void partial_barrier(queue Q, const std::vector &Events, submit(Q, [&](handler &CGH) { partial_barrier(CGH, Events); }, CodeLoc); } +inline void execute_graph(queue Q, command_graph &G, + const sycl::detail::code_location &CodeLoc = + sycl::detail::code_location::current()) { + Q.ext_oneapi_graph(G, CodeLoc); +} + +inline void execute_graph(handler &CGH, + command_graph &G) { + CGH.ext_oneapi_graph(G); +} + } // namespace ext::oneapi::experimental } // namespace _V1 } // namespace sycl diff --git a/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions.cpp b/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions.cpp index ef3b440790c6b..6c6fe20337dab 100644 --- a/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions.cpp @@ -43,7 +43,7 @@ int main() { auto GraphExec = Graph.finalize(); - InOrderQueue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }); + exp_ext::execute_graph(InOrderQueue, GraphExec); InOrderQueue.wait_and_throw(); free(PtrA, InOrderQueue); diff --git a/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions_submit.cpp b/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions_submit.cpp index 8eccce2ea8ef9..623d6fc817879 100644 --- a/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions_submit.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions_submit.cpp @@ -60,7 +60,7 @@ int main() { auto GraphExec = Graph.finalize(); - Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }); + exp_ext::execute_graph(Queue, GraphExec); Queue.wait_and_throw(); } diff --git a/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions_submit_with_event.cpp b/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions_submit_with_event.cpp index f66731d745bd2..4b8294be7e989 100644 --- a/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions_submit_with_event.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions_submit_with_event.cpp @@ -52,8 +52,9 @@ int main() { auto GraphExec = Graph.finalize(); - Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }); - Queue.wait_and_throw(); + exp_ext::submit_with_event(Queue, [&](handler &CGH) { + exp_ext::execute_graph(CGH, GraphExec); + }).wait(); free(PtrA, Queue); free(PtrB, Queue); From b3fb5addeb3d1b8368226814811bf83cdd4cd8fa Mon Sep 17 00:00:00 2001 From: Udit Agarwal Date: Wed, 6 Nov 2024 16:15:36 -0800 Subject: [PATCH 03/24] [Clang] Fix passing `-offload-compress` when compiling and linking device images separately (#15997) When compiling and linking device images separately, compression related flags are not propagated correctly to `offload-wrapper`. This PR fixes that and updates the test accordingly. --- clang/lib/Driver/ToolChains/Clang.cpp | 57 ++++++++++++------- .../compression_separate_compile.cpp | 7 +++ 2 files changed, 43 insertions(+), 21 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 3fa80d479936f..f90ba124e5a09 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -10353,33 +10353,48 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA, assert(JA.getInputs().size() == Inputs.size() && "Not have inputs for all dependence actions??"); - // For FPGA, we wrap the host objects before archiving them when using - // -fsycl-link. This allows for better extraction control from the - // archive when we need the host objects for subsequent compilations. if (OffloadingKind == Action::OFK_None && - C.getArgs().hasArg(options::OPT_fintelfpga) && C.getArgs().hasArg(options::OPT_fsycl_link_EQ)) { - // Add offload targets and inputs. - CmdArgs.push_back(C.getArgs().MakeArgString( - Twine("-kind=") + Action::GetOffloadKindName(OffloadingKind))); - CmdArgs.push_back( - TCArgs.MakeArgString(Twine("-target=") + Triple.getTriple())); + // For FPGA, we wrap the host objects before archiving them when using + // -fsycl-link. This allows for better extraction control from the + // archive when we need the host objects for subsequent compilations. + if (C.getArgs().hasArg(options::OPT_fintelfpga)) { - if (Inputs[0].getType() == types::TY_Tempfiletable || - Inputs[0].getType() == types::TY_Tempfilelist) - // Input files are passed via the batch job file table. - CmdArgs.push_back(C.getArgs().MakeArgString("-batch")); + // Add offload targets and inputs. + CmdArgs.push_back(C.getArgs().MakeArgString( + Twine("-kind=") + Action::GetOffloadKindName(OffloadingKind))); + CmdArgs.push_back( + TCArgs.MakeArgString(Twine("-target=") + Triple.getTriple())); - // Add input. - assert(Inputs[0].isFilename() && "Invalid input."); - CmdArgs.push_back(TCArgs.MakeArgString(Inputs[0].getFilename())); + if (Inputs[0].getType() == types::TY_Tempfiletable || + Inputs[0].getType() == types::TY_Tempfilelist) + // Input files are passed via the batch job file table. + CmdArgs.push_back(C.getArgs().MakeArgString("-batch")); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::None(), - TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())), - CmdArgs, Inputs)); - return; + // Add input. + assert(Inputs[0].isFilename() && "Invalid input."); + CmdArgs.push_back(TCArgs.MakeArgString(Inputs[0].getFilename())); + + C.addCommand(std::make_unique( + JA, *this, ResponseFileSupport::None(), + TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())), + CmdArgs, Inputs)); + return; + } else { + // When compiling and linking separately, we need to propagate the + // compression related CLI options to offload-wrapper. Don't propagate + // these options when wrapping objects for FPGA. + if (C.getInputArgs().getLastArg(options::OPT_offload_compress)) { + CmdArgs.push_back( + C.getArgs().MakeArgString(Twine("-offload-compress"))); + // -offload-compression-level=<> + if (Arg *A = C.getInputArgs().getLastArg( + options::OPT_offload_compression_level_EQ)) + CmdArgs.push_back(C.getArgs().MakeArgString( + Twine("-offload-compression-level=") + A->getValue())); + } + } } // Add offload targets and inputs. diff --git a/sycl/test-e2e/Compression/compression_separate_compile.cpp b/sycl/test-e2e/Compression/compression_separate_compile.cpp index 9e47bbebdc875..dab17e3506b4e 100644 --- a/sycl/test-e2e/Compression/compression_separate_compile.cpp +++ b/sycl/test-e2e/Compression/compression_separate_compile.cpp @@ -10,6 +10,13 @@ ////////////////////// Link device images // RUN: %clangxx --offload-compress -fsycl -fsycl-link -fsycl-targets=spir64_x86_64 -fPIC %t_kernel1_aot.o %t_kernel2_aot.o -o %t_compressed_image.o -v +// Make sure the clang-offload-wrapper is called with the --offload-compress +// option. +// RUN: %clangxx --offload-compress -fsycl -fsycl-link -fsycl-targets=spir64_x86_64 -fPIC %t_kernel1_aot.o %t_kernel2_aot.o -o %t_compressed_image.o -### &> %t_driver_opts.txt +// RUN: FileCheck -input-file=%t_driver_opts.txt %s --check-prefix=CHECK-DRIVER-OPTS + +// CHECK-DRIVER-OPTS: clang-offload-wrapper{{.*}} "-offload-compress" + ////////////////////// Compile the host program // RUN: %clangxx -fsycl -std=c++17 -Wno-attributes -Wno-deprecated-declarations -fPIC -c %s -o %t_main.o From 3d26d63cec7eae3fd771e8bdb1f66a029c10f313 Mon Sep 17 00:00:00 2001 From: Joe Todd Date: Thu, 7 Nov 2024 06:38:44 +0000 Subject: [PATCH 04/24] [SYCL][COMPAT] Improve non-e2e tests (#15992) Use `-fsyntax-only` where possible, and avoid passing a specific target triple. --- .../syclcompat/launch/kernel_properties.cpp | 2 +- .../launch/launch_policy_lmem_neg.cpp | 2 +- .../syclcompat/launch/launch_policy_neg.cpp | 24 +++++++++---------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/sycl/test/syclcompat/launch/kernel_properties.cpp b/sycl/test/syclcompat/launch/kernel_properties.cpp index 78920c62c5347..6beefce73d14b 100644 --- a/sycl/test/syclcompat/launch/kernel_properties.cpp +++ b/sycl/test/syclcompat/launch/kernel_properties.cpp @@ -23,7 +23,7 @@ // We need hardware which can support at least 2 sub-group sizes, since that // hardware (presumably) supports the `intel_reqd_sub_group_size` attribute. // REQUIRES: sg-32 && sg-16 -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %if cl_options %{/clang:-S /clang:-emit-llvm%} %else %{-S -emit-llvm%} %s -o - | FileCheck %s +// RUN: %clangxx -fsycl -fsycl-device-only -Xclang -fsycl-is-device %if cl_options %{/clang:-S /clang:-emit-llvm%} %else %{-S -emit-llvm%} %s -o - | FileCheck %s #include #include #include diff --git a/sycl/test/syclcompat/launch/launch_policy_lmem_neg.cpp b/sycl/test/syclcompat/launch/launch_policy_lmem_neg.cpp index 98222d6cc374f..5c2750e86b705 100644 --- a/sycl/test/syclcompat/launch/launch_policy_lmem_neg.cpp +++ b/sycl/test/syclcompat/launch/launch_policy_lmem_neg.cpp @@ -22,7 +22,7 @@ * templates as tests in launch_policy_neg.cpp **************************************************************************/ -// RUN: not %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out 2>&1 | FileCheck -vv %s +// RUN: not %clangxx -fsycl -fsyntax-only %s 2>&1 | FileCheck -vv %s #include #include diff --git a/sycl/test/syclcompat/launch/launch_policy_neg.cpp b/sycl/test/syclcompat/launch/launch_policy_neg.cpp index 558864084ff62..cee796471f23f 100644 --- a/sycl/test/syclcompat/launch/launch_policy_neg.cpp +++ b/sycl/test/syclcompat/launch/launch_policy_neg.cpp @@ -20,18 +20,18 @@ * Negative tests for new launch_policy. **************************************************************************/ -// RUN: not %clangxx -fsycl -fsycl-targets=%sycl_triple %s -DCHECK1 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK1 -// RUN: not %clangxx -fsycl -fsycl-targets=%sycl_triple %s -DCHECK2 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK2 -// RUN: not %clangxx -fsycl -fsycl-targets=%sycl_triple %s -DCHECK3 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK3 -// RUN: not %clangxx -fsycl -fsycl-targets=%sycl_triple %s -DCHECK4 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK4 -// RUN: not %clangxx -fsycl -fsycl-targets=%sycl_triple %s -DCHECK5 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK5 -// RUN: not %clangxx -fsycl -fsycl-targets=%sycl_triple %s -DCHECK6 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK6 -// RUN: not %clangxx -fsycl -fsycl-targets=%sycl_triple %s -DCHECK7 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK7 -// RUN: not %clangxx -fsycl -fsycl-targets=%sycl_triple %s -DCHECK8 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK8 -// RUN: not %clangxx -fsycl -fsycl-targets=%sycl_triple %s -DCHECK9 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK9 -// RUN: not %clangxx -fsycl -fsycl-targets=%sycl_triple %s -DCHECK10 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK10 -// RUN: not %clangxx -fsycl -fsycl-targets=%sycl_triple %s -DCHECK11 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK11 -// RUN: not %clangxx -fsycl -fsycl-targets=%sycl_triple %s -DCHECK12 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK12 +// RUN: not %clangxx -fsycl -fsyntax-only %s -DCHECK1 2>&1 | FileCheck -vv %s --check-prefixes=CHECK1 +// RUN: not %clangxx -fsycl -fsyntax-only %s -DCHECK2 2>&1 | FileCheck -vv %s --check-prefixes=CHECK2 +// RUN: not %clangxx -fsycl -fsyntax-only %s -DCHECK3 2>&1 | FileCheck -vv %s --check-prefixes=CHECK3 +// RUN: not %clangxx -fsycl -fsyntax-only %s -DCHECK4 2>&1 | FileCheck -vv %s --check-prefixes=CHECK4 +// RUN: not %clangxx -fsycl -fsyntax-only %s -DCHECK5 2>&1 | FileCheck -vv %s --check-prefixes=CHECK5 +// RUN: not %clangxx -fsycl -fsyntax-only %s -DCHECK6 2>&1 | FileCheck -vv %s --check-prefixes=CHECK6 +// RUN: not %clangxx -fsycl -fsyntax-only %s -DCHECK7 2>&1 | FileCheck -vv %s --check-prefixes=CHECK7 +// RUN: not %clangxx -fsycl -fsyntax-only %s -DCHECK8 2>&1 | FileCheck -vv %s --check-prefixes=CHECK8 +// RUN: not %clangxx -fsycl -fsyntax-only %s -DCHECK9 2>&1 | FileCheck -vv %s --check-prefixes=CHECK9 +// RUN: not %clangxx -fsycl -fsyntax-only %s -DCHECK10 2>&1 | FileCheck -vv %s --check-prefixes=CHECK10 +// RUN: not %clangxx -fsycl -fsyntax-only %s -DCHECK11 2>&1 | FileCheck -vv %s --check-prefixes=CHECK11 +// RUN: not %clangxx -fsycl -fsyntax-only %s -DCHECK12 2>&1 | FileCheck -vv %s --check-prefixes=CHECK12 #include #include From dd2390c50259f24278a0ab866cf7898952a3c04a Mon Sep 17 00:00:00 2001 From: Rafal Bielski Date: Thu, 7 Nov 2024 09:34:45 +0000 Subject: [PATCH 05/24] [UR][HIP] Set the right HIP device before creating base event counter (#15964) Update UR tag to include https://github.com/oneapi-src/unified-runtime/pull/2276 --------- Co-authored-by: Callum Fare --- sycl/cmake/modules/UnifiedRuntimeTag.cmake | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index 15985fb0cc0b0..a7b68befa96bf 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -1,7 +1,7 @@ -# commit 3edf99755ce2af3b53102a7d8438e0fe969efac3 -# Merge: 5955bad3 0b968661 -# Author: Ross Brunton -# Date: Wed Nov 6 11:07:29 2024 +0000 -# Merge pull request #2082 from RossBrunton/ross/multiadapt -# [CI] Add "loader" support to conformance testing -set(UNIFIED_RUNTIME_TAG 3edf99755ce2af3b53102a7d8438e0fe969efac3) +# commit 2858a8a28d0b6524a3b2b0e25a597d1c8295ce9d +# Merge: a5a649f3 34b66fda +# Author: Callum Fare +# Date: Wed Nov 6 16:45:49 2024 +0000 +# Merge pull request #2276 from rafbiels/rafbiels/fix-hip-evbase +# Set the right HIP device before creating base event counter +set(UNIFIED_RUNTIME_TAG 2858a8a28d0b6524a3b2b0e25a597d1c8295ce9d) From bcdfc02260f8c0f3df9e2034a7fce9f43ec2e9c2 Mon Sep 17 00:00:00 2001 From: Martin Grant Date: Thu, 7 Nov 2024 15:41:18 +0000 Subject: [PATCH 06/24] [UR] Bump tag to 09ae26a for UR#2233 (#15619) https://github.com/oneapi-src/unified-runtime/pull/2233/ --------- Co-authored-by: Callum Fare --- sycl/cmake/modules/UnifiedRuntimeTag.cmake | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index a7b68befa96bf..b739ed6beadc8 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -1,7 +1,7 @@ -# commit 2858a8a28d0b6524a3b2b0e25a597d1c8295ce9d -# Merge: a5a649f3 34b66fda +# commit 09ae26af4e4e4301177db704b3b109ecd388c846 +# Merge: d2f69680 dd920712 # Author: Callum Fare -# Date: Wed Nov 6 16:45:49 2024 +0000 -# Merge pull request #2276 from rafbiels/rafbiels/fix-hip-evbase -# Set the right HIP device before creating base event counter -set(UNIFIED_RUNTIME_TAG 2858a8a28d0b6524a3b2b0e25a597d1c8295ce9d) +# Date: Thu Nov 7 11:43:19 2024 +0000 +# Merge pull request #2233 from martygrant/martin/memory-cts-spec-gap-2 +# Improvements to align CTS and Spec for Memory +set(UNIFIED_RUNTIME_TAG 09ae26af4e4e4301177db704b3b109ecd388c846) From d71b1583f87ed18a497caa0650b19b5371fa06d4 Mon Sep 17 00:00:00 2001 From: aelovikov-intel Date: Thu, 7 Nov 2024 08:30:56 -0800 Subject: [PATCH 07/24] [NFCI][SYCL] Introduce `oneapi::experimental::detail::property_base` (#15993) This lays some ground base for subsequent refactoring PRs by isolating simple but noisy diff into a separate change. We're anticipating at least two changes that would be built on top of this: * Change implementation of `has_property`/`get_property` to be inheritance-based vs type-lists/boost. * Bigger design-wise refactoring of the compile time properties, e.g. removal of `property_value` class template. --- .../ext/intel/esimd/memory_properties.hpp | 24 +++-- .../kernel_execution_properties.hpp | 5 +- .../experimental/cluster_group_prop.hpp | 1 + .../oneapi/kernel_properties/properties.hpp | 97 +++++++++++++------ .../ext/oneapi/latency_control/properties.hpp | 11 ++- .../sycl/ext/oneapi/properties/properties.hpp | 4 + .../sycl/ext/oneapi/properties/property.hpp | 35 +++++-- .../ext/oneapi/properties/property_value.hpp | 8 +- sycl/include/sycl/kernel_bundle.hpp | 12 ++- .../abi/sycl_classes_abi_neutral_test.cpp | 15 ++- .../annotated_usm/fake_properties.hpp | 72 +++++++------- .../mock_compile_time_properties.hpp | 6 +- 12 files changed, 189 insertions(+), 101 deletions(-) diff --git a/sycl/include/sycl/ext/intel/esimd/memory_properties.hpp b/sycl/include/sycl/ext/intel/esimd/memory_properties.hpp index c5d57e6b496fa..b25f24bdc829c 100644 --- a/sycl/include/sycl/ext/intel/esimd/memory_properties.hpp +++ b/sycl/include/sycl/ext/intel/esimd/memory_properties.hpp @@ -289,22 +289,34 @@ namespace ext::oneapi::experimental { template <__ESIMD_NS::cache_hint Hint> struct property_value<__ESIMD_NS::cache_hint_L1_key, - std::integral_constant<__ESIMD_NS::cache_hint, Hint>> { - using key_t = __ESIMD_NS::cache_hint_L1_key; + std::integral_constant<__ESIMD_NS::cache_hint, Hint>> + : detail::property_base< + property_value<__ESIMD_NS::cache_hint_L1_key, + std::integral_constant<__ESIMD_NS::cache_hint, Hint>>, + oneapi::experimental::detail::PropKind::ESIMDL1CacheHint, + __ESIMD_NS::cache_hint_L1_key> { static constexpr __ESIMD_NS::cache_level level = __ESIMD_NS::cache_level::L1; static constexpr __ESIMD_NS::cache_hint hint = Hint; }; template <__ESIMD_NS::cache_hint Hint> struct property_value<__ESIMD_NS::cache_hint_L2_key, - std::integral_constant<__ESIMD_NS::cache_hint, Hint>> { - using key_t = __ESIMD_NS::cache_hint_L2_key; + std::integral_constant<__ESIMD_NS::cache_hint, Hint>> + : detail::property_base< + property_value<__ESIMD_NS::cache_hint_L2_key, + std::integral_constant<__ESIMD_NS::cache_hint, Hint>>, + oneapi::experimental::detail::PropKind::ESIMDL2CacheHint, + __ESIMD_NS::cache_hint_L2_key> { static constexpr __ESIMD_NS::cache_level level = __ESIMD_NS::cache_level::L2; static constexpr __ESIMD_NS::cache_hint hint = Hint; }; template <__ESIMD_NS::cache_hint Hint> struct property_value<__ESIMD_NS::cache_hint_L3_key, - std::integral_constant<__ESIMD_NS::cache_hint, Hint>> { - using key_t = __ESIMD_NS::cache_hint_L3_key; + std::integral_constant<__ESIMD_NS::cache_hint, Hint>> + : detail::property_base< + property_value<__ESIMD_NS::cache_hint_L3_key, + std::integral_constant<__ESIMD_NS::cache_hint, Hint>>, + oneapi::experimental::detail::PropKind::ESIMDL3CacheHint, + __ESIMD_NS::cache_hint_L3_key> { static constexpr __ESIMD_NS::cache_level level = __ESIMD_NS::cache_level::L3; static constexpr __ESIMD_NS::cache_hint hint = Hint; }; diff --git a/sycl/include/sycl/ext/intel/experimental/kernel_execution_properties.hpp b/sycl/include/sycl/ext/intel/experimental/kernel_execution_properties.hpp index e65a39c072110..a88839b313504 100644 --- a/sycl/include/sycl/ext/intel/experimental/kernel_execution_properties.hpp +++ b/sycl/include/sycl/ext/intel/experimental/kernel_execution_properties.hpp @@ -26,8 +26,9 @@ inline constexpr cache_config_enum large_slm = inline constexpr cache_config_enum large_data = cache_config_enum::large_data; -struct cache_config : oneapi::experimental::detail::run_time_property_key< - oneapi::experimental::detail::PropKind::CacheConfig> { +struct cache_config + : oneapi::experimental::detail::run_time_property_key< + cache_config, oneapi::experimental::detail::PropKind::CacheConfig> { cache_config(cache_config_enum v) : value(v) {} cache_config_enum value; }; diff --git a/sycl/include/sycl/ext/oneapi/experimental/cluster_group_prop.hpp b/sycl/include/sycl/ext/oneapi/experimental/cluster_group_prop.hpp index 31487beffd810..f035a431ca2a1 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/cluster_group_prop.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/cluster_group_prop.hpp @@ -19,6 +19,7 @@ namespace cuda { template struct cluster_size : ::sycl::ext::oneapi::experimental::detail::run_time_property_key< + cluster_size, ::sycl::ext::oneapi::experimental::detail::ClusterLaunch> { cluster_size(const range &size) : size(size) {} sycl::range get_cluster_size() { return size; } diff --git a/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp b/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp index 70055c6680b74..9f91607456dd6 100644 --- a/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp @@ -60,13 +60,15 @@ struct device_has_key std::integral_constant...>; }; -struct nd_range_kernel_key { +struct nd_range_kernel_key + : detail::compile_time_property_key { template using value_t = property_value>; }; -struct single_task_kernel_key { +struct single_task_kernel_key + : detail::compile_time_property_key { using value_t = property_value; }; @@ -87,15 +89,18 @@ struct max_linear_work_group_size_key template struct property_value, - std::integral_constant...> { + std::integral_constant...> + : detail::property_base< + property_value, + std::integral_constant...>, + detail::PropKind::WorkGroupSize, work_group_size_key> { static_assert( sizeof...(Dims) + 1 <= 3, "work_group_size property currently only supports up to three values."); static_assert(detail::AllNonZero::value, "work_group_size property must only contain non-zero values."); - using key_t = work_group_size_key; - constexpr size_t operator[](int Dim) const { return std::array{Dim0, Dims...}[Dim]; } @@ -104,7 +109,12 @@ struct property_value, template struct property_value, - std::integral_constant...> { + std::integral_constant...> + : detail::property_base< + property_value, + std::integral_constant...>, + detail::PropKind::WorkGroupSizeHint, work_group_size_hint_key> { static_assert(sizeof...(Dims) + 1 <= 3, "work_group_size_hint property currently " "only supports up to three values."); @@ -112,8 +122,6 @@ struct property_value::value, "work_group_size_hint property must only contain non-zero values."); - using key_t = work_group_size_hint_key; - constexpr size_t operator[](int Dim) const { return std::array{Dim0, Dims...}[Dim]; } @@ -121,41 +129,57 @@ struct property_value struct property_value> { + std::integral_constant> + : detail::property_base< + property_value>, + detail::PropKind::SubGroupSize, sub_group_size_key> { static_assert(Size != 0, "sub_group_size_key property must contain a non-zero value."); - using key_t = sub_group_size_key; using value_t = std::integral_constant; static constexpr uint32_t value = Size; }; template struct property_value...> { - using key_t = device_has_key; + std::integral_constant...> + : detail::property_base< + property_value...>, + detail::PropKind::DeviceHas, device_has_key> { static constexpr std::array value{Aspects...}; }; template -struct property_value> { +struct property_value> + : detail::property_base>, + detail::PropKind::NDRangeKernel, + nd_range_kernel_key> { static_assert( Dims >= 1 && Dims <= 3, "nd_range_kernel_key property must use dimension of 1, 2 or 3."); - using key_t = nd_range_kernel_key; using value_t = int; static constexpr int dimensions = Dims; }; -template <> struct property_value { - using key_t = single_task_kernel_key; -}; +template <> +struct property_value + : detail::property_base, + detail::PropKind::SingleTaskKernel, + single_task_kernel_key> {}; template struct property_value, - std::integral_constant...> { + std::integral_constant...> + : detail::property_base< + property_value, + std::integral_constant...>, + detail::PropKind::MaxWorkGroupSize, max_work_group_size_key> { static_assert(sizeof...(Dims) + 1 <= 3, "max_work_group_size property currently " "only supports up to three values."); @@ -163,16 +187,16 @@ struct property_value::value, "max_work_group_size property must only contain non-zero values."); - using key_t = max_work_group_size_key; - constexpr size_t operator[](int Dim) const { return std::array{Dim0, Dims...}[Dim]; } }; -template <> struct property_value { - using key_t = max_linear_work_group_size_key; -}; +template <> +struct property_value + : detail::property_base, + detail::PropKind::MaxLinearWorkGroupSize, + max_linear_work_group_size_key> {}; template inline constexpr work_group_size_key::value_t work_group_size; @@ -235,8 +259,13 @@ template , - std::integral_constant> { - using key_t = work_group_progress_key; + std::integral_constant> + : detail::property_base< + property_value< + work_group_progress_key, + std::integral_constant, + std::integral_constant>, + detail::PropKind::WorkGroupProgress, work_group_progress_key> { static constexpr forward_progress_guarantee guarantee = Guarantee; static constexpr execution_scope coordinationScope = CoordinationScope; }; @@ -246,8 +275,13 @@ template , - std::integral_constant> { - using key_t = work_group_progress_key; + std::integral_constant> + : detail::property_base< + property_value< + sub_group_progress_key, + std::integral_constant, + std::integral_constant>, + detail::PropKind::SubGroupProgress, sub_group_progress_key> { static constexpr forward_progress_guarantee guarantee = Guarantee; static constexpr execution_scope coordinationScope = CoordinationScope; }; @@ -257,8 +291,13 @@ template , - std::integral_constant> { - using key_t = work_group_progress_key; + std::integral_constant> + : detail::property_base< + property_value< + work_item_progress_key, + std::integral_constant, + std::integral_constant>, + detail::PropKind::WorkItemProgress, work_item_progress_key> { static constexpr forward_progress_guarantee guarantee = Guarantee; static constexpr execution_scope coordinationScope = CoordinationScope; }; diff --git a/sycl/include/sycl/ext/oneapi/latency_control/properties.hpp b/sycl/include/sycl/ext/oneapi/latency_control/properties.hpp index 0716720a3ab36..ef7a5e7f6b704 100644 --- a/sycl/include/sycl/ext/oneapi/latency_control/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/latency_control/properties.hpp @@ -58,8 +58,15 @@ struct property_value< intel::experimental::latency_constraint_key, std::integral_constant, std::integral_constant, - std::integral_constant> { - using key_t = intel::experimental::latency_constraint_key; + std::integral_constant> + : detail::property_base< + property_value, + std::integral_constant< + intel::experimental::latency_control_type, Type>, + std::integral_constant>, + oneapi::experimental::detail::PropKind::LatencyConstraint, + intel::experimental::latency_constraint_key> { static constexpr int target = Target; static constexpr intel::experimental::latency_control_type type = Type; static constexpr int cycle = Cycle; diff --git a/sycl/include/sycl/ext/oneapi/properties/properties.hpp b/sycl/include/sycl/ext/oneapi/properties/properties.hpp index 1c93e00dbe880..b7db9715cad4d 100644 --- a/sycl/include/sycl/ext/oneapi/properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/properties.hpp @@ -200,6 +200,10 @@ template class properties { static_assert(NumContainedProps == sizeof...(PropertyValueTs), "One or more property argument is not a property in the " "property list."); + // We're in process of refactoring properties infrastructure, make sure that + // any newly added properties use `detail::property_base`! + static_assert( + (std::is_base_of_v && ...)); } template diff --git a/sycl/include/sycl/ext/oneapi/properties/property.hpp b/sycl/include/sycl/ext/oneapi/properties/property.hpp index 0b3e0c610c30f..41a59337440b8 100644 --- a/sycl/include/sycl/ext/oneapi/properties/property.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/property.hpp @@ -94,7 +94,7 @@ enum PropKind : uint32_t { namespace sycl::ext::oneapi::experimental { // (2.) -struct foo : detail::run_time_property_key { +struct foo : detail::run_time_property_key { foo(int v) : value(v) {} int value; }; @@ -215,10 +215,35 @@ enum PropKind : uint32_t { PropKindSize = 79, }; +template struct PropertyToKind { + static constexpr PropKind Kind = PropertyT::Kind; +}; + +struct property_tag {}; +template +struct property_base : property_tag { + using key_t = property_key_t; +#if !defined(_MSC_VER) + // Temporary, to ensure new code matches previous behavior and to catch any + // silly copy-paste mistakes. MSVC can't compile it, but linux-only is enough + // for this temporary check. + static_assert([]() constexpr { + if constexpr (std::is_same_v) + // key_t is incomplete at this point for runtime properties. + return true; + else + return Kind == PropertyToKind::Kind; + }()); +#endif +}; + struct property_key_base_tag {}; struct compile_time_property_key_base_tag : property_key_base_tag {}; -template struct run_time_property_key : property_key_base_tag { +template +struct run_time_property_key : property_key_base_tag, + property_base { protected: static constexpr PropKind Kind = Kind_; @@ -235,12 +260,6 @@ struct compile_time_property_key : compile_time_property_key_base_tag { friend struct PropertyToKind; }; -// This trait must be specialized for all properties and must have a unique -// constexpr PropKind member named Kind. -template struct PropertyToKind { - static constexpr PropKind Kind = PropertyT::Kind; -}; - // Get unique ID for property. template struct PropertyID { static constexpr int value = diff --git a/sycl/include/sycl/ext/oneapi/properties/property_value.hpp b/sycl/include/sycl/ext/oneapi/properties/property_value.hpp index dc7d13145677d..629ae794a0b1c 100644 --- a/sycl/include/sycl/ext/oneapi/properties/property_value.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/property_value.hpp @@ -41,9 +41,11 @@ struct PropertyValueBase : public detail::SingleNontypePropertyValueBase { } // namespace detail template -struct property_value : public detail::PropertyValueBase { - using key_t = PropertyT; -}; +struct property_value + : public detail::PropertyValueBase, + public detail::property_base, + detail::PropertyToKind::Kind, + PropertyT> {}; template constexpr std::enable_if_t::value, diff --git a/sycl/include/sycl/kernel_bundle.hpp b/sycl/include/sycl/kernel_bundle.hpp index 1237bc0651b40..73c336d3d131e 100644 --- a/sycl/include/sycl/kernel_bundle.hpp +++ b/sycl/include/sycl/kernel_bundle.hpp @@ -901,7 +901,8 @@ struct build_source_bundle_props; // PropertyT syclex::include_files ///////////////////////// struct include_files - : detail::run_time_property_key { + : detail::run_time_property_key { include_files(); include_files(const std::string &name, const std::string &content) { record.emplace_back(std::make_pair(name, content)); @@ -922,7 +923,8 @@ struct is_property_key_of { + : detail::run_time_property_key { std::vector opts; build_options(const std::string &optsArg) : opts{optsArg} {} build_options(const std::vector &optsArg) : opts(optsArg) {} @@ -936,7 +938,8 @@ struct is_property_key_of ///////////////////////// // PropertyT syclex::save_log ///////////////////////// -struct save_log : detail::run_time_property_key { +struct save_log + : detail::run_time_property_key { std::string *log; save_log(std::string *logArg) : log(logArg) {} }; @@ -950,7 +953,8 @@ struct is_property_key_of // PropertyT syclex::registered_kernel_names ///////////////////////// struct registered_kernel_names - : detail::run_time_property_key { + : detail::run_time_property_key { std::vector kernel_names; registered_kernel_names() {} registered_kernel_names(const std::string &knArg) : kernel_names{knArg} {} diff --git a/sycl/test/abi/sycl_classes_abi_neutral_test.cpp b/sycl/test/abi/sycl_classes_abi_neutral_test.cpp index 071481d0d1be0..fc494d500ce90 100644 --- a/sycl/test/abi/sycl_classes_abi_neutral_test.cpp +++ b/sycl/test/abi/sycl_classes_abi_neutral_test.cpp @@ -16,9 +16,8 @@ // member is not crossing ABI boundary. All current exclusions are listed below. // CHECK: 0 | struct sycl::ext::oneapi::experimental::build_options -// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::detail::run_time_property_key (base) (empty) -// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::detail::property_key_base_tag (base) (empty) -// CHECK-NEXT: 0 | class std::vector > opts +// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::detail::run_time_property_key +// CHECK: 0 | class std::vector > opts // CHECK-NEXT: 0 | struct std::_Vector_base, class std::allocator > > (base) // CHECK-NEXT: 0 | struct std::_Vector_base, class std::allocator > >::_Vector_impl _M_impl // CHECK-NEXT: 0 | class std::allocator > (base) (empty) @@ -26,9 +25,8 @@ // CHECK-NEXT: 0 | {{(struct std::_Vector_base, class std::allocator > >::_Vector_impl_data \(base\)|pointer _M_start)}} // CHECK: 0 | struct sycl::ext::oneapi::experimental::include_files -// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::detail::run_time_property_key (base) (empty) -// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::detail::property_key_base_tag (base) (empty) -// CHECK-NEXT: 0 | class std::vector, class std::basic_string > > record +// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::detail::run_time_property_key +// CHECK: 0 | class std::vector, class std::basic_string > > record // CHECK-NEXT: 0 | struct std::_Vector_base, class std::basic_string >, class std::allocator, class std::basic_string > > > (base) // CHECK-NEXT: 0 | struct std::_Vector_base, class std::basic_string >, class std::allocator, class std::basic_string > > >::_Vector_impl _M_impl // CHECK-NEXT: 0 | class std::allocator, class std::basic_string > > (base) (empty) @@ -36,9 +34,8 @@ // CHECK-NEXT: 0 | {{(struct std::_Vector_base, class std::basic_string >, class std::allocator, class std::basic_string > > >::_Vector_impl_data \(base\)|pointer _M_start)}} // CHECK: 0 | struct sycl::ext::oneapi::experimental::registered_kernel_names -// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::detail::run_time_property_key (base) (empty) -// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::detail::property_key_base_tag (base) (empty) -// CHECK-NEXT: 0 | class std::vector > kernel_names +// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::detail::run_time_property_key +// CHECK: 0 | class std::vector > kernel_names // CHECK-NEXT: 0 | struct std::_Vector_base, class std::allocator > > (base) // CHECK-NEXT: 0 | struct std::_Vector_base, class std::allocator > >::_Vector_impl _M_impl // CHECK-NEXT: 0 | class std::allocator > (base) (empty) diff --git a/sycl/test/extensions/annotated_usm/fake_properties.hpp b/sycl/test/extensions/annotated_usm/fake_properties.hpp index 656ea1a1ad75e..0462b96e8e24c 100644 --- a/sycl/test/extensions/annotated_usm/fake_properties.hpp +++ b/sycl/test/extensions/annotated_usm/fake_properties.hpp @@ -49,51 +49,53 @@ struct is_property_key_of : std::true_type {}; // Runtime properties enum foo_enum : unsigned { a, b, c }; -struct foo : detail::run_time_property_key { +struct foo : detail::run_time_property_key { constexpr foo(foo_enum v) : value(v) {} foo_enum value; }; -struct foz : detail::run_time_property_key { +struct foz : detail::run_time_property_key { float value1; bool value2; foz(float value1, bool value2) : value1(value1), value2(value2) {} }; -struct rt_prop1 : detail::run_time_property_key {}; -struct rt_prop2 : detail::run_time_property_key {}; -struct rt_prop3 : detail::run_time_property_key {}; -struct rt_prop4 : detail::run_time_property_key {}; -struct rt_prop5 : detail::run_time_property_key {}; -struct rt_prop6 : detail::run_time_property_key {}; -struct rt_prop7 : detail::run_time_property_key {}; -struct rt_prop8 : detail::run_time_property_key {}; -struct rt_prop9 : detail::run_time_property_key {}; -struct rt_prop10 : detail::run_time_property_key {}; -struct rt_prop11 : detail::run_time_property_key {}; -struct rt_prop12 : detail::run_time_property_key {}; -struct rt_prop13 : detail::run_time_property_key {}; -struct rt_prop14 : detail::run_time_property_key {}; -struct rt_prop15 : detail::run_time_property_key {}; -struct rt_prop16 : detail::run_time_property_key {}; -struct rt_prop17 : detail::run_time_property_key {}; -struct rt_prop18 : detail::run_time_property_key {}; -struct rt_prop19 : detail::run_time_property_key {}; -struct rt_prop20 : detail::run_time_property_key {}; -struct rt_prop21 : detail::run_time_property_key {}; -struct rt_prop22 : detail::run_time_property_key {}; -struct rt_prop23 : detail::run_time_property_key {}; -struct rt_prop24 : detail::run_time_property_key {}; -struct rt_prop25 : detail::run_time_property_key {}; -struct rt_prop26 : detail::run_time_property_key {}; -struct rt_prop27 : detail::run_time_property_key {}; -struct rt_prop28 : detail::run_time_property_key {}; -struct rt_prop29 : detail::run_time_property_key {}; -struct rt_prop30 : detail::run_time_property_key {}; -struct rt_prop31 : detail::run_time_property_key {}; -struct rt_prop32 : detail::run_time_property_key {}; -struct rt_prop33 : detail::run_time_property_key {}; +// clang-format off +struct rt_prop1 : detail::run_time_property_key {}; +struct rt_prop2 : detail::run_time_property_key {}; +struct rt_prop3 : detail::run_time_property_key {}; +struct rt_prop4 : detail::run_time_property_key {}; +struct rt_prop5 : detail::run_time_property_key {}; +struct rt_prop6 : detail::run_time_property_key {}; +struct rt_prop7 : detail::run_time_property_key {}; +struct rt_prop8 : detail::run_time_property_key {}; +struct rt_prop9 : detail::run_time_property_key {}; +struct rt_prop10 : detail::run_time_property_key {}; +struct rt_prop11 : detail::run_time_property_key {}; +struct rt_prop12 : detail::run_time_property_key {}; +struct rt_prop13 : detail::run_time_property_key {}; +struct rt_prop14 : detail::run_time_property_key {}; +struct rt_prop15 : detail::run_time_property_key {}; +struct rt_prop16 : detail::run_time_property_key {}; +struct rt_prop17 : detail::run_time_property_key {}; +struct rt_prop18 : detail::run_time_property_key {}; +struct rt_prop19 : detail::run_time_property_key {}; +struct rt_prop20 : detail::run_time_property_key {}; +struct rt_prop21 : detail::run_time_property_key {}; +struct rt_prop22 : detail::run_time_property_key {}; +struct rt_prop23 : detail::run_time_property_key {}; +struct rt_prop24 : detail::run_time_property_key {}; +struct rt_prop25 : detail::run_time_property_key {}; +struct rt_prop26 : detail::run_time_property_key {}; +struct rt_prop27 : detail::run_time_property_key {}; +struct rt_prop28 : detail::run_time_property_key {}; +struct rt_prop29 : detail::run_time_property_key {}; +struct rt_prop30 : detail::run_time_property_key {}; +struct rt_prop31 : detail::run_time_property_key {}; +struct rt_prop32 : detail::run_time_property_key {}; +struct rt_prop33 : detail::run_time_property_key {}; +// clang-format on using foo_key = foo; using foz_key = foz; diff --git a/sycl/test/extensions/properties/mock_compile_time_properties.hpp b/sycl/test/extensions/properties/mock_compile_time_properties.hpp index f9743e27e3896..ea8d98ffa5e58 100644 --- a/sycl/test/extensions/properties/mock_compile_time_properties.hpp +++ b/sycl/test/extensions/properties/mock_compile_time_properties.hpp @@ -34,7 +34,7 @@ struct boo_key : detail::compile_time_property_key { template using value_t = property_value; }; -struct foo : detail::run_time_property_key { +struct foo : detail::run_time_property_key { constexpr foo(int v = 0) : value(v) {} int value; }; @@ -44,7 +44,7 @@ inline bool operator==(const foo &lhs, const foo &rhs) { } inline bool operator!=(const foo &lhs, const foo &rhs) { return !(lhs == rhs); } -struct foz : detail::run_time_property_key { +struct foz : detail::run_time_property_key { constexpr foz(float v1, bool v2) : value1(v1), value2(v2) {} // Define copy constructor to make foz non-trivially copyable constexpr foz(const foz &f) { @@ -60,7 +60,7 @@ inline bool operator==(const foz &lhs, const foz &rhs) { } inline bool operator!=(const foz &lhs, const foz &rhs) { return !(lhs == rhs); } -struct fir : detail::run_time_property_key { +struct fir : detail::run_time_property_key { // Intentionally not constexpr to test for properties that cannot be constexpr fir(float v1, bool v2) : value1(v1), value2(v2) {} // Define copy constructor to make foz non-trivially copyable From aebfdc0d96ae23efc5aebe80fd682fe0640d6338 Mon Sep 17 00:00:00 2001 From: Nikita Kornev Date: Thu, 7 Nov 2024 16:48:06 +0000 Subject: [PATCH 08/24] [SYCL] Add missing special values to exp(complex) (#15980) Follow-up to https://github.com/intel/llvm/pull/15672 This patch updates float behavior --------- Signed-off-by: jinge90 Co-authored-by: jinge90 --- libdevice/fallback-complex-fp64.cpp | 9 +++---- libdevice/fallback-complex.cpp | 25 ++++++++----------- .../exp/exp-std-complex-double-edge-cases.cpp | 1 + .../exp/exp-std-complex-edge-cases.hpp | 8 ++++++ .../exp/exp-std-complex-float-edge-cases.cpp | 12 +++++++++ .../no-unsupported-without-info.cpp | 3 +-- 6 files changed, 37 insertions(+), 21 deletions(-) create mode 100644 sycl/test-e2e/DeviceLib/exp/exp-std-complex-float-edge-cases.cpp diff --git a/libdevice/fallback-complex-fp64.cpp b/libdevice/fallback-complex-fp64.cpp index 11803a1b72f83..28a5be8ab4a48 100644 --- a/libdevice/fallback-complex-fp64.cpp +++ b/libdevice/fallback-complex-fp64.cpp @@ -152,11 +152,10 @@ double __complex__ __devicelib_cexp(double __complex__ z) { } else if (__spirv_IsNan(z_real)) { if (z_imag == 0.0) return z; - else /* z_imag != 0.0 */ - return CMPLX(NAN, NAN); - } else if (__spirv_IsFinite(z_real)) { - if (__spirv_IsNan(z_imag) || __spirv_IsInf(z_imag)) - return CMPLX(NAN, NAN); + return CMPLX(NAN, NAN); + } else if (__spirv_IsFinite(z_real) && + (__spirv_IsNan(z_imag) || __spirv_IsInf(z_imag))) { + return CMPLX(NAN, NAN); } double __e = __spirv_ocl_exp(z_real); double ret_real = __e * __spirv_ocl_cos(z_imag); diff --git a/libdevice/fallback-complex.cpp b/libdevice/fallback-complex.cpp index e3f58b9eeb019..9f94195a3a407 100644 --- a/libdevice/fallback-complex.cpp +++ b/libdevice/fallback-complex.cpp @@ -141,27 +141,24 @@ DEVICE_EXTERN_C_INLINE float __complex__ __devicelib_cexpf(float __complex__ z) { float z_imag = __devicelib_cimagf(z); float z_real = __devicelib_crealf(z); + if (z_imag == 0) { + return CMPLXF(__spirv_ocl_exp(z_real), __spirv_ocl_copysign(0.f, z_imag)); + } + if (__spirv_IsInf(z_real)) { - if (z_real < 0.0f) { + if (z_real < 0.f) { if (!__spirv_IsFinite(z_imag)) z_imag = 1.0f; - } else if (z_imag == 0.0f || !__spirv_IsFinite(z_imag)) { + } else if (__spirv_IsNan(z_imag)) { + return z; + } else if (z_imag == 0.f || !__spirv_IsFinite(z_imag)) { if (__spirv_IsInf(z_imag)) - z_imag = NAN; - return CMPLXF(z_real, z_imag); + return CMPLXF(z_real, NAN); } - } else if (__spirv_IsNan(z_real) && (z_imag == 0.0f)) { - return z; } - float __e = __spirv_ocl_exp(z_real); - float ret_real = __e * __spirv_ocl_cos(z_imag); - float ret_imag = __e * __spirv_ocl_sin(z_imag); - if (__spirv_IsNan(ret_real)) - ret_real = 0.f; - if (__spirv_IsNan(ret_imag)) - ret_imag = 0.f; - return CMPLXF(ret_real, ret_imag); + float e = __spirv_ocl_exp(z_real); + return CMPLXF(e * __spirv_ocl_cos(z_imag), e * __spirv_ocl_sin(z_imag)); } DEVICE_EXTERN_C_INLINE diff --git a/sycl/test-e2e/DeviceLib/exp/exp-std-complex-double-edge-cases.cpp b/sycl/test-e2e/DeviceLib/exp/exp-std-complex-double-edge-cases.cpp index 791bda2cf1e61..f539f67aacab6 100644 --- a/sycl/test-e2e/DeviceLib/exp/exp-std-complex-double-edge-cases.cpp +++ b/sycl/test-e2e/DeviceLib/exp/exp-std-complex-double-edge-cases.cpp @@ -3,6 +3,7 @@ // // REQUIRES: aspect-fp64 // UNSUPPORTED: hip || cuda +// UNSUPPORTED-INTENDED: This test is intended for backends with SPIR-V support. // // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/DeviceLib/exp/exp-std-complex-edge-cases.hpp b/sycl/test-e2e/DeviceLib/exp/exp-std-complex-edge-cases.hpp index 649c960750156..d807a461e574f 100644 --- a/sycl/test-e2e/DeviceLib/exp/exp-std-complex-edge-cases.hpp +++ b/sycl/test-e2e/DeviceLib/exp/exp-std-complex-edge-cases.hpp @@ -5,6 +5,7 @@ #include #include +#include bool check(bool cond, const std::string &cond_str, int line, unsigned testcase) { @@ -290,6 +291,13 @@ template bool test() { } else if (std::isfinite(testcases[i].imag()) && std::abs(testcases[i].imag()) <= 1) { CHECK(!std::signbit(r.real()), passed, i); +#ifdef _WIN32 + // This check fails on win, temporary skipping: + // CMPLRLLVM-61834 + // TODO: Delete this macro block when fixed + if (std::is_same_v) + continue; +#endif CHECK(std::signbit(r.imag()) == std::signbit(testcases[i].imag()), passed, i); // Those tests were taken from oneDPL, not sure what is the corner case diff --git a/sycl/test-e2e/DeviceLib/exp/exp-std-complex-float-edge-cases.cpp b/sycl/test-e2e/DeviceLib/exp/exp-std-complex-float-edge-cases.cpp new file mode 100644 index 0000000000000..e1c956a45e382 --- /dev/null +++ b/sycl/test-e2e/DeviceLib/exp/exp-std-complex-float-edge-cases.cpp @@ -0,0 +1,12 @@ +// This test checks edge cases handling for std::exp(std::complex) used +// in SYCL kernels. +// +// UNSUPPORTED: hip || cuda +// UNSUPPORTED-INTENDED: This test is intended for backends with SPIR-V support. +// +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include "exp-std-complex-edge-cases.hpp" + +int main() { return test(); } diff --git a/sycl/test/e2e_test_requirements/no-unsupported-without-info.cpp b/sycl/test/e2e_test_requirements/no-unsupported-without-info.cpp index ae42a97ce9264..3602d7d01ae95 100644 --- a/sycl/test/e2e_test_requirements/no-unsupported-without-info.cpp +++ b/sycl/test/e2e_test_requirements/no-unsupported-without-info.cpp @@ -54,7 +54,7 @@ // tests to match the required format and in that case you should just update // (i.e. reduce) the number and the list below. // -// NUMBER-OF-UNSUPPORTED-WITHOUT-INFO: 478 +// NUMBER-OF-UNSUPPORTED-WITHOUT-INFO: 477 // // List of improperly UNSUPPORTED tests. // Remove the CHECK once the test has been properly UNSUPPORTED. @@ -113,7 +113,6 @@ // CHECK-NEXT: DeviceLib/cmath-aot.cpp // CHECK-NEXT: DeviceLib/cmath_fp64_test.cpp // CHECK-NEXT: DeviceLib/complex-fpga.cpp -// CHECK-NEXT: DeviceLib/exp/exp-std-complex-double-edge-cases.cpp // CHECK-NEXT: DeviceLib/imf_bfloat16_integeral_convesions.cpp // CHECK-NEXT: DeviceLib/imf_bfloat16_integeral_convesions.cpp // CHECK-NEXT: DeviceLib/imf_double2bfloat16.cpp From 4bca2461ffdb167e8f3bd000a452958191efa8ce Mon Sep 17 00:00:00 2001 From: Daniel Skrobot Date: Fri, 8 Nov 2024 07:10:31 +0100 Subject: [PATCH 09/24] [SYCL][E2E] Add virtual memory operations test for `sycl_ext_oneapi_virtual_mem` extension (#15988) Based on the test plan https://github.com/intel/llvm/pull/15509, this PR adds an e2e test that performs following checks: - A check should be performed that we can successfully perform and immediately release a valid reservation of virtual memory. - A check should be performed that methods `get_context()`, `get_device()` and `size()` return correct values (i.e. ones which were passed to physical_mem constructor). - A check should be performed that a value returned from a valid call to `map()` is the same as `reinterpret_cast(ptr)`. - A check should be performed that we can change access mode of a virtual memory range and immediately see it changed. - A check should be performed that we can successfully map and immediately unmap a virtual memory range. --- .../VirtualMem/virtual_mem_operations.cpp | 126 ++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 sycl/test-e2e/VirtualMem/virtual_mem_operations.cpp diff --git a/sycl/test-e2e/VirtualMem/virtual_mem_operations.cpp b/sycl/test-e2e/VirtualMem/virtual_mem_operations.cpp new file mode 100644 index 0000000000000..9d9b7a0628951 --- /dev/null +++ b/sycl/test-e2e/VirtualMem/virtual_mem_operations.cpp @@ -0,0 +1,126 @@ +// This test checks whether certain operations in virtual memory extension work +// as expectd. + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include "helpers.hpp" + +int main() { + + constexpr size_t NumberOfIterations = 3; + std::array NumberOfElementsPerIteration{10, 100, + 1000}; + + sycl::queue Q; + sycl::context Context = Q.get_context(); + sycl::device Device = Q.get_device(); + + // A check should be performed that we can successfully perform and + // immediately release a valid reservation. + for (const size_t RequiredNumElements : NumberOfElementsPerIteration) { + size_t BytesRequired = RequiredNumElements * sizeof(int); + size_t UsedGranularity = GetLCMGranularity(Device, Context); + size_t AlignedByteSize = GetAlignedByteSize(BytesRequired, UsedGranularity); + uintptr_t VirtualMemoryPtr = + syclext::reserve_virtual_mem(0, AlignedByteSize, Context); + syclext::free_virtual_mem(VirtualMemoryPtr, AlignedByteSize, Context); + } + + // A check should be performed that we can successfully map and immediately + // unmap a virtual memory range. + for (const size_t RequiredNumElements : NumberOfElementsPerIteration) { + size_t BytesRequired = RequiredNumElements * sizeof(int); + size_t UsedGranularity = GetLCMGranularity(Device, Context); + size_t AlignedByteSize = GetAlignedByteSize(BytesRequired, UsedGranularity); + uintptr_t VirtualMemoryPtr = + syclext::reserve_virtual_mem(0, AlignedByteSize, Context); + syclext::physical_mem PhysicalMem{Device, Context, AlignedByteSize}; + void *MappedPtr = PhysicalMem.map(VirtualMemoryPtr, AlignedByteSize, + syclext::address_access_mode::read_write); + syclext::unmap(MappedPtr, AlignedByteSize, Context); + syclext::free_virtual_mem(VirtualMemoryPtr, AlignedByteSize, Context); + } + + { + // Check should be performed that methods get_context(), get_device() and + // size() return correct values (i.e. ones which were passed to physical_mem + // constructor). + size_t BytesRequired = NumberOfElementsPerIteration[2] * sizeof(int); + size_t UsedGranularity = GetLCMGranularity(Device, Context); + size_t AlignedByteSize = GetAlignedByteSize(BytesRequired, UsedGranularity); + + syclext::physical_mem PhysicalMem{Device, Context, AlignedByteSize}; + + assert(PhysicalMem.get_device() == Device && + "device passed to physical_mem must be the same as returned from " + "get_device()"); + + assert(PhysicalMem.get_context() == Context && + "context passed to physical_mem must be the same as returned from " + "get_context()"); + + assert(PhysicalMem.size() == AlignedByteSize && + "size in bytes passed to physical_mem must be the same as returned " + "from size()"); + } + + { + // Check to see if value returned from a valid call to map() is the same as + // reinterpret_cast(ptr). + size_t BytesRequired = NumberOfElementsPerIteration[2] * sizeof(int); + size_t UsedGranularity = GetLCMGranularity(Device, Context); + size_t AlignedByteSize = GetAlignedByteSize(BytesRequired, UsedGranularity); + + uintptr_t VirtualMemoryPtr = + syclext::reserve_virtual_mem(0, AlignedByteSize, Context); + + syclext::physical_mem PhysicalMem{Device, Context, AlignedByteSize}; + + void *MappedPtr = PhysicalMem.map(VirtualMemoryPtr, AlignedByteSize, + syclext::address_access_mode::read_write); + + assert(MappedPtr == reinterpret_cast(VirtualMemoryPtr) && + "value returned from a valid call to map() must be equal " + "reinterpret_cast(ptr)"); + + syclext::unmap(MappedPtr, AlignedByteSize, Context); + syclext::free_virtual_mem(VirtualMemoryPtr, AlignedByteSize, Context); + } + + // Check to see if can change access mode of a virtual memory range and + // immediately see it changed. + for (const size_t RequiredNumElements : NumberOfElementsPerIteration) { + size_t BytesRequired = RequiredNumElements * sizeof(int); + size_t UsedGranularity = GetLCMGranularity(Device, Context); + size_t AlignedByteSize = GetAlignedByteSize(BytesRequired, UsedGranularity); + uintptr_t VirtualMemoryPtr = + syclext::reserve_virtual_mem(0, AlignedByteSize, Context); + syclext::physical_mem PhysicalMem{Device, Context, AlignedByteSize}; + void *MappedPtr = PhysicalMem.map(VirtualMemoryPtr, AlignedByteSize, + syclext::address_access_mode::read_write); + + syclext::address_access_mode CurrentAccessMode = + syclext::get_access_mode(MappedPtr, AlignedByteSize, Context); + + assert(CurrentAccessMode == syclext::address_access_mode::read_write && + "access mode must be address_access_mode::read_write before change " + "with " + "set_access_mode()"); + + syclext::set_access_mode(MappedPtr, AlignedByteSize, + syclext::address_access_mode::read, Context); + + CurrentAccessMode = + syclext::get_access_mode(MappedPtr, AlignedByteSize, Context); + + assert(CurrentAccessMode == syclext::address_access_mode::read && + "access mode must be address_access_mode::read after change with " + "set_access_mode()"); + + syclext::unmap(MappedPtr, AlignedByteSize, Context); + syclext::free_virtual_mem(VirtualMemoryPtr, AlignedByteSize, Context); + } + + return 0; +} From 2155906b7a55dd19d1aa539e9dde44500d19bd85 Mon Sep 17 00:00:00 2001 From: Dounia Khaldi Date: Fri, 8 Nov 2024 00:14:12 -0600 Subject: [PATCH 10/24] [SYCL][Joint Matrix] Add more cases in common JM tests functions - Part 2 (#16021) --- .../Matrix/element_wise_all_ops_impl.hpp | 34 ++++++++++--------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp b/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp index ac6dc118ce8b2..0105ea076839d 100644 --- a/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp +++ b/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp @@ -109,7 +109,7 @@ void verify_op_c(const T l, const T r, const float ref, OP op) { template class ewops_ab {}; template + size_t VF, typename Tv = T> void test_ewops_ab() { if constexpr (Use == use::a) std::cout << "Test A "; @@ -122,41 +122,43 @@ void test_ewops_ab() { verify_op_ab>( - T(5.0), T(2.0), 7.0, [](auto l, auto r) { return l + r; }); + Tv(5.0), Tv(2.0), 7.0, [](auto l, auto r) { return l + r; }); verify_op_ab>( - T(5.0), T(2.0), 3.0, [](auto l, auto r) { return l - r; }); + Tv(5.0), Tv(2.0), 3.0, [](auto l, auto r) { return l - r; }); verify_op_ab>( - T(5.0), T(2.0), 10.0, [](auto l, auto r) { return l * r; }); + Tv(5.0), Tv(2.0), 10.0, [](auto l, auto r) { return l * r; }); verify_op_ab>( - T(5.0), T(2.0), 2.5, [](auto l, auto r) { return l / r; }); + Tv(5.0), Tv(2.0), 2.5, [](auto l, auto r) { return l / r; }); verify_op_ab>( - T(5.0), T(5.0), 5.0, [](auto l, auto r) { return l == r ? l : T(1.0); }); + Tv(5.0), Tv(5.0), 5.0, + [](auto l, auto r) { return l == r ? l : Tv(1.0); }); verify_op_ab>( - T(5.0), T(4.0), 4.0, [](auto l, auto r) { return l == r ? l : r; }); + Tv(5.0), Tv(4.0), 4.0, [](auto l, auto r) { return l == r ? l : r; }); verify_op_ab>( - T(5.0), T(5.0), 1.0, [](auto l, auto r) { return l != r ? l : T(1.0); }); + Tv(5.0), Tv(5.0), 1.0, + [](auto l, auto r) { return l != r ? l : Tv(1.0); }); verify_op_ab>( - T(5.0), T(2.0), 3.0, - [](auto l, auto r) { return l > r ? T(3.0) : T(2.0); }); + Tv(5.0), Tv(2.0), 3.0, + [](auto l, auto r) { return l > r ? Tv(3.0) : Tv(2.0); }); verify_op_ab>( - T(5.0), T(2.0), 2.0, - [](auto l, auto r) { return l < r ? T(3.0) : T(2.0); }); + Tv(5.0), Tv(2.0), 2.0, + [](auto l, auto r) { return l < r ? Tv(3.0) : Tv(2.0); }); verify_op_ab>( - T(5.0), T(2.0), 3.0, - [](auto l, auto r) { return l >= r ? T(3.0) : T(2.0); }); + Tv(5.0), Tv(2.0), 3.0, + [](auto l, auto r) { return l >= r ? Tv(3.0) : Tv(2.0); }); verify_op_ab>( - T(5.0), T(2.0), 2.0, - [](auto l, auto r) { return l <= r ? T(3.0) : T(2.0); }); + Tv(5.0), Tv(2.0), 2.0, + [](auto l, auto r) { return l <= r ? Tv(3.0) : Tv(2.0); }); } // Avoid same kernel name for different types and numbers of columns From 0fcc58ec53a57cf85e3c4d164c5564fcabceed24 Mon Sep 17 00:00:00 2001 From: Daniel Skrobot Date: Fri, 8 Nov 2024 09:35:33 +0100 Subject: [PATCH 11/24] [SYCL][E2E] Add extending virtual mem range test for `sycl_ext_oneapi_virtual_mem` extension (#15944) Based on the test plan https://github.com/intel/llvm/pull/15509, this PR adds an e2e test described as `"Extending" virtual memory range`. --- .../extending_virtual_memory_range.cpp | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 sycl/test-e2e/VirtualMem/extending_virtual_memory_range.cpp diff --git a/sycl/test-e2e/VirtualMem/extending_virtual_memory_range.cpp b/sycl/test-e2e/VirtualMem/extending_virtual_memory_range.cpp new file mode 100644 index 0000000000000..67dec551543f1 --- /dev/null +++ b/sycl/test-e2e/VirtualMem/extending_virtual_memory_range.cpp @@ -0,0 +1,97 @@ +// This test checks whether memory accesses to contiguous virtual memory ranges +// are performed correctly + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include + +#include "helpers.hpp" + +struct VirtualAddressRange { + VirtualAddressRange(uintptr_t Ptr, size_t Size) : MPtr{Ptr}, MSize{Size} {} + + uintptr_t MPtr; + size_t MSize; +}; + +struct PhysicalMemoryMapping { + PhysicalMemoryMapping(syclext::physical_mem &&PhysicalMem, void *MappingPtr) + : MPhysicalMem(std::move(PhysicalMem)), MMappingPtr(MappingPtr) {} + syclext::physical_mem MPhysicalMem; + void *MMappingPtr; +}; + +int main() { + int Failed = 0; + sycl::queue Q; + sycl::context Context = Q.get_context(); + sycl::device Device = Q.get_device(); + + constexpr size_t NumberOfVirtualMemoryRanges = 5; + constexpr size_t ElementsInRange = 100; + constexpr int ValueSetInKernel = 999; + + size_t BytesRequiredPerRange = ElementsInRange * sizeof(int); + + size_t UsedGranularity = GetLCMGranularity(Device, Context); + + size_t AlignedByteSizePerRange = + GetAlignedByteSize(BytesRequiredPerRange, UsedGranularity); + + std::vector VirtualMemoryRanges; + std::vector PhysicalMemoryMappings; + + for (size_t Index = 0; Index < NumberOfVirtualMemoryRanges; ++Index) { + uintptr_t VirtualMemoryPtr = + syclext::reserve_virtual_mem(AlignedByteSizePerRange, Context); + syclext::physical_mem PhysicalMem{Device, Context, AlignedByteSizePerRange}; + void *MappedPtr = PhysicalMem.map(VirtualMemoryPtr, AlignedByteSizePerRange, + syclext::address_access_mode::read_write); + + VirtualMemoryRanges.emplace_back(VirtualMemoryPtr, AlignedByteSizePerRange); + PhysicalMemoryMappings.emplace_back(std::move(PhysicalMem), MappedPtr); + } + + std::vector ResultHostData(ElementsInRange); + + for (size_t Index = 0; Index < NumberOfVirtualMemoryRanges; ++Index) { + int *DataRangePtr = + reinterpret_cast(PhysicalMemoryMappings[Index].MMappingPtr); + + Q.parallel_for(ElementsInRange, [=](sycl::id<1> Idx) { + DataRangePtr[Idx] = ValueSetInKernel; + }).wait_and_throw(); + + { + sycl::buffer ResultBuffer(ResultHostData); + + Q.submit([&](sycl::handler &Handle) { + sycl::accessor A(ResultBuffer, Handle, sycl::write_only); + Handle.parallel_for(ElementsInRange, [=](sycl::id<1> Idx) { + A[Idx] = DataRangePtr[Idx]; + }); + }); + } + + for (size_t i = 0; i < ElementsInRange; i++) { + if (ResultHostData[i] != ValueSetInKernel) { + std::cout << "Comparison failed with virtual range " << Index + 1 + << " at index " << i << ": " << ResultHostData[i] + << " != " << ValueSetInKernel << std::endl; + ++Failed; + } + } + } + + for (auto PhysMemMap : PhysicalMemoryMappings) { + syclext::unmap(PhysMemMap.MMappingPtr, PhysMemMap.MPhysicalMem.size(), + Context); + } + for (auto VirtualMemRange : VirtualMemoryRanges) { + syclext::free_virtual_mem(VirtualMemRange.MPtr, VirtualMemRange.MSize, + Context); + } + + return Failed; +} From 8a5133f090bff409eb2fef612ffd454da3878de7 Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Fri, 8 Nov 2024 09:44:40 +0000 Subject: [PATCH 12/24] [SYCL][Graph] E2E test for nullptr dynamic parameter update (#15984) Test for updating a dynamic parameter with a nullptr USM parameter. --- sycl/test-e2e/Graph/Update/update_nullptr.cpp | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 sycl/test-e2e/Graph/Update/update_nullptr.cpp diff --git a/sycl/test-e2e/Graph/Update/update_nullptr.cpp b/sycl/test-e2e/Graph/Update/update_nullptr.cpp new file mode 100644 index 0000000000000..060386c6659a3 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/update_nullptr.cpp @@ -0,0 +1,67 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// Tests updating a graph node using a USM pointer set to nullptr + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + + const size_t N = 1024; + + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + int *PtrOut = malloc_device(N, Queue); + int *PtrIn = malloc_device(N, Queue); + + Queue.memset(PtrOut, 0, N * sizeof(int)).wait(); + int PtrPattern = 42; + Queue.fill(PtrIn, PtrPattern, N).wait(); + + exp_ext::dynamic_parameter InputParam(Graph, PtrIn); + int DefaultPattern = 10; + auto KernelNode = Graph.add([&](handler &cgh) { + cgh.set_arg(0, InputParam); + cgh.set_arg(1, PtrOut); + cgh.single_task([=]() { + for (size_t i = 0; i < N; i++) { + if (PtrIn) { + PtrOut[i] = PtrIn[i]; + } else { + PtrOut[i] = DefaultPattern; + } + } + }); + }); + + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + // PtrA should be filled with values + Queue.ext_oneapi_graph(ExecGraph).wait(); + + std::vector HostData(N); + Queue.copy(PtrOut, HostData.data(), N).wait(); + for (size_t i = 0; i < N; i++) { + assert(HostData[i] == PtrPattern); + } + + // Swap Input to nullptr + int *NullPtr = nullptr; + InputParam.update(NullPtr); + ExecGraph.update(KernelNode); + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(PtrOut, HostData.data(), N).wait(); + for (size_t i = 0; i < N; i++) { + assert(HostData[i] == DefaultPattern); + } + + free(PtrIn, Queue); + free(PtrOut, Queue); + return 0; +} From 5c73a20d4df4d9fd6e0e1f1594207c74b8f8d855 Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Fri, 8 Nov 2024 09:51:48 +0000 Subject: [PATCH 13/24] [SYCL][Graph] Implement dynamic command-groups (#15700) Implement Dynamic Command-Group feature specified in PR [[SYCL][Graph] Add specification for kernel binary updates](https://github.com/intel/llvm/pull/14896). This feature enables updating `ur_kernel_handle_t` objects in graph nodes between executions as well as parameters and execution range of nodes. Points to note in this change: * The functionality is currently supported on CUDA & HIP which are used for testing in the new E2E tests. Level Zero support will follow shortly, resulting in the removal of the `XFAIL` labels with tracker number from the E2E tests. * The code for adding nodes to a graph has been refactored to split out verification of edges, and marking memory objects used in a node, as separate helper functions. This allows path for adding a command-group node to do this functions over each CG in the list before creating the node itself. * The `dynamic_parameter_impl` code has also been refactored so the code is shared for updating a dynamic parameter used in both a regular kernel node and a dynamic command-group node. * There is now no need for the `handler::setNDRangeUsed()` API now that graph kernel nodes can update between kernels using `sycl::nd_range` and `sycl::range`. The functionality in this method has be turned into a no-op, however removing the method is an ABI breaking change, so it remains guarded by the `__INTEL_PREVIEW_BREAKING_CHANGES` macro. See the addition to the design doc for further details on the implementation. --- sycl/doc/design/CommandGraph.md | 23 + .../sycl/ext/oneapi/experimental/graph.hpp | 24 ++ sycl/include/sycl/handler.hpp | 14 +- sycl/source/detail/cg.hpp | 5 +- sycl/source/detail/graph_impl.cpp | 402 +++++++++++++++--- sycl/source/detail/graph_impl.hpp | 259 ++++++----- sycl/source/detail/handler_impl.hpp | 4 - sycl/source/detail/scheduler/commands.cpp | 95 +++-- sycl/source/handler.cpp | 26 +- .../Graph/Update/dyn_cgf_accessor.cpp | 61 +++ .../Graph/Update/dyn_cgf_accessor_deps.cpp | 79 ++++ .../Graph/Update/dyn_cgf_accessor_deps2.cpp | 88 ++++ .../Graph/Update/dyn_cgf_accessor_spv.cpp | 81 ++++ .../Update/dyn_cgf_different_arg_nums.cpp | 148 +++++++ .../Graph/Update/dyn_cgf_event_deps.cpp | 72 ++++ .../test-e2e/Graph/Update/dyn_cgf_ndrange.cpp | 72 ++++ .../Graph/Update/dyn_cgf_ndrange_3D.cpp | 80 ++++ .../Graph/Update/dyn_cgf_overwrite_range.cpp | 57 +++ .../Graph/Update/dyn_cgf_parameters.cpp | 71 ++++ .../Graph/Update/dyn_cgf_shared_nodes.cpp | 75 ++++ .../Update/dyn_cgf_update_before_finalize.cpp | 46 ++ sycl/test-e2e/Graph/Update/dyn_cgf_usm.cpp | 56 +++ .../Update/dyn_cgf_with_all_dyn_params.cpp | 120 ++++++ ...dyn_cgf_with_different_type_dyn_params.cpp | 136 ++++++ .../Update/dyn_cgf_with_some_dyn_params.cpp | 106 +++++ .../Graph/Update/update_ndrange_to_range.cpp | 54 +++ .../Graph/Update/update_range_to_ndrange.cpp | 55 +++ .../Graph/Update/whole_update_dynamic_cgf.cpp | 76 ++++ .../Update/whole_update_dynamic_param.cpp | 2 - sycl/test/abi/sycl_symbols_linux.dump | 5 + sycl/test/abi/sycl_symbols_windows.dump | 9 + .../Extensions/CommandGraph/Exceptions.cpp | 163 +++++++ .../Extensions/CommandGraph/Update.cpp | 12 +- 33 files changed, 2311 insertions(+), 265 deletions(-) create mode 100644 sycl/test-e2e/Graph/Update/dyn_cgf_accessor.cpp create mode 100644 sycl/test-e2e/Graph/Update/dyn_cgf_accessor_deps.cpp create mode 100644 sycl/test-e2e/Graph/Update/dyn_cgf_accessor_deps2.cpp create mode 100644 sycl/test-e2e/Graph/Update/dyn_cgf_accessor_spv.cpp create mode 100644 sycl/test-e2e/Graph/Update/dyn_cgf_different_arg_nums.cpp create mode 100644 sycl/test-e2e/Graph/Update/dyn_cgf_event_deps.cpp create mode 100644 sycl/test-e2e/Graph/Update/dyn_cgf_ndrange.cpp create mode 100644 sycl/test-e2e/Graph/Update/dyn_cgf_ndrange_3D.cpp create mode 100644 sycl/test-e2e/Graph/Update/dyn_cgf_overwrite_range.cpp create mode 100644 sycl/test-e2e/Graph/Update/dyn_cgf_parameters.cpp create mode 100644 sycl/test-e2e/Graph/Update/dyn_cgf_shared_nodes.cpp create mode 100644 sycl/test-e2e/Graph/Update/dyn_cgf_update_before_finalize.cpp create mode 100644 sycl/test-e2e/Graph/Update/dyn_cgf_usm.cpp create mode 100644 sycl/test-e2e/Graph/Update/dyn_cgf_with_all_dyn_params.cpp create mode 100644 sycl/test-e2e/Graph/Update/dyn_cgf_with_different_type_dyn_params.cpp create mode 100644 sycl/test-e2e/Graph/Update/dyn_cgf_with_some_dyn_params.cpp create mode 100644 sycl/test-e2e/Graph/Update/update_ndrange_to_range.cpp create mode 100644 sycl/test-e2e/Graph/Update/update_range_to_ndrange.cpp create mode 100644 sycl/test-e2e/Graph/Update/whole_update_dynamic_cgf.cpp diff --git a/sycl/doc/design/CommandGraph.md b/sycl/doc/design/CommandGraph.md index 2f83d42a3c57c..23d32fd9ff7e2 100644 --- a/sycl/doc/design/CommandGraph.md +++ b/sycl/doc/design/CommandGraph.md @@ -282,6 +282,29 @@ requirements for these new accessors to correctly trigger allocations before updating. This is similar to how individual graph commands are enqueued when accessors are used in a graph node. +### Dynamic Command-Group + +To implement the `dynamic_command_group` class for updating the command-groups (CG) +associated with nodes, the CG member of the node implementation class changes +from a `std::unique_ptr` to a `std::shared_ptr` so that multiple nodes and the +`dynamic_command_group_impl` object can share the same CG object. This avoids +the overhead of having to allocate and free copies of the CG when a new active +CG is selected. + +The `dynamic_command_group_impl` class contains a list of weak pointers to the +nodes which have been created with it, so that when a new active CG is selected +it can propagate the change to those nodes. The `dynamic_parameter_impl` class +also contains a list of weak pointers, but to the `dynamic_command_group_impl` +instances of any dynamic command-groups where they are used. This allows +updating the dynamic parameter to propagate to dynamic command-group nodes. + +The `sycl::detail::CGExecKernel` class has been added to, so that if the +object was created from an element in the dynamic command-group list, the class +stores a vector of weak pointers to the other alternative command-groups created +from the same dynamic command-group object. This allows the SYCL runtime to +access the list of alternative kernels when calling the UR API to append a +kernel command to a command-buffer. + ## Optimizations ### Interactions with Profiling diff --git a/sycl/include/sycl/ext/oneapi/experimental/graph.hpp b/sycl/include/sycl/ext/oneapi/experimental/graph.hpp index d18cf3ebc4b3d..2bc3ef1d921ab 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/graph.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/graph.hpp @@ -96,6 +96,7 @@ class node_impl; class graph_impl; class exec_graph_impl; class dynamic_parameter_impl; +class dynamic_command_group_impl; } // namespace detail enum class node_type { @@ -216,6 +217,23 @@ class depends_on_all_leaves : public ::sycl::detail::DataLessProperty< } // namespace node } // namespace property +class __SYCL_EXPORT dynamic_command_group { +public: + dynamic_command_group( + const command_graph &Graph, + const std::vector> &CGFList); + + size_t get_active_cgf() const; + void set_active_cgf(size_t Index); + +private: + template + friend const decltype(Obj::impl) & + sycl::detail::getSyclObjImpl(const Obj &SyclObject); + + std::shared_ptr impl; +}; + namespace detail { // Templateless modifiable command-graph base class. class __SYCL_EXPORT modifiable_command_graph { @@ -337,6 +355,12 @@ class __SYCL_EXPORT modifiable_command_graph { modifiable_command_graph(const std::shared_ptr &Impl) : impl(Impl) {} + /// Template-less implementation of add() for dynamic command-group nodes. + /// @param DynCGF Dynamic Command-group function object to add. + /// @param Dep List of predecessor nodes. + /// @return Node added to the graph. + node addImpl(dynamic_command_group &DynCGF, const std::vector &Dep); + /// Template-less implementation of add() for CGF nodes. /// @param CGF Command-group function to add. /// @param Dep List of predecessor nodes. diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 8abb4323ab3e1..4e8f62d53c36d 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -1175,7 +1175,6 @@ class __SYCL_EXPORT handler { StoreLambda( std::move(Wrapper)); setType(detail::CGType::Kernel); - setNDRangeUsed(false); #endif } else #endif // !__SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ && @@ -1198,7 +1197,6 @@ class __SYCL_EXPORT handler { StoreLambda( std::move(KernelFunc)); setType(detail::CGType::Kernel); - setNDRangeUsed(false); #endif #else (void)KernelFunc; @@ -1249,7 +1247,6 @@ class __SYCL_EXPORT handler { StoreLambda( std::move(KernelFunc)); setType(detail::CGType::Kernel); - setNDRangeUsed(true); #endif } @@ -1272,7 +1269,6 @@ class __SYCL_EXPORT handler { setNDRangeDescriptor(std::move(NumWorkItems)); processLaunchProperties(Props); setType(detail::CGType::Kernel); - setNDRangeUsed(false); extractArgsAndReqs(); MKernelName = getKernelName(); #endif @@ -1298,7 +1294,6 @@ class __SYCL_EXPORT handler { setNDRangeDescriptor(std::move(NDRange)); processLaunchProperties(Props); setType(detail::CGType::Kernel); - setNDRangeUsed(true); extractArgsAndReqs(); MKernelName = getKernelName(); #endif @@ -1339,7 +1334,6 @@ class __SYCL_EXPORT handler { setNDRangeDescriptor(NumWorkGroups, /*SetNumWorkGroups=*/true); StoreLambda(std::move(KernelFunc)); setType(detail::CGType::Kernel); - setNDRangeUsed(false); #endif // __SYCL_DEVICE_ONLY__ } @@ -1971,7 +1965,6 @@ class __SYCL_EXPORT handler { StoreLambda( std::move(KernelFunc)); setType(detail::CGType::Kernel); - setNDRangeUsed(false); #endif } @@ -2069,7 +2062,6 @@ class __SYCL_EXPORT handler { detail::checkValueRange(NumWorkItems, WorkItemOffset); setNDRangeDescriptor(std::move(NumWorkItems), std::move(WorkItemOffset)); setType(detail::CGType::Kernel); - setNDRangeUsed(false); extractArgsAndReqs(); MKernelName = getKernelName(); #endif @@ -2148,7 +2140,6 @@ class __SYCL_EXPORT handler { setNDRangeDescriptor(std::move(NumWorkItems)); MKernel = detail::getSyclObjImpl(std::move(Kernel)); setType(detail::CGType::Kernel); - setNDRangeUsed(false); if (!lambdaAndKernelHaveEqualName()) { extractArgsAndReqs(); MKernelName = getKernelName(); @@ -2189,7 +2180,6 @@ class __SYCL_EXPORT handler { setNDRangeDescriptor(std::move(NumWorkItems), std::move(WorkItemOffset)); MKernel = detail::getSyclObjImpl(std::move(Kernel)); setType(detail::CGType::Kernel); - setNDRangeUsed(false); if (!lambdaAndKernelHaveEqualName()) { extractArgsAndReqs(); MKernelName = getKernelName(); @@ -2229,7 +2219,6 @@ class __SYCL_EXPORT handler { setNDRangeDescriptor(std::move(NDRange)); MKernel = detail::getSyclObjImpl(std::move(Kernel)); setType(detail::CGType::Kernel); - setNDRangeUsed(true); if (!lambdaAndKernelHaveEqualName()) { extractArgsAndReqs(); MKernelName = getKernelName(); @@ -3357,6 +3346,7 @@ class __SYCL_EXPORT handler { size_t Size, bool Block = false); friend class ext::oneapi::experimental::detail::graph_impl; friend class ext::oneapi::experimental::detail::dynamic_parameter_impl; + friend class ext::oneapi::experimental::detail::dynamic_command_group_impl; bool DisableRangeRounding(); @@ -3626,8 +3616,10 @@ class __SYCL_EXPORT handler { } #endif +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES // Set that an ND Range was used during a call to parallel_for void setNDRangeUsed(bool Value); +#endif inline void internalProfilingTagImpl() { throwIfActionIsCreated(); diff --git a/sycl/source/detail/cg.hpp b/sycl/source/detail/cg.hpp index 1799bbedd4903..f0dadad99dac5 100644 --- a/sycl/source/detail/cg.hpp +++ b/sycl/source/detail/cg.hpp @@ -257,6 +257,9 @@ class CGExecKernel : public CG { std::string MKernelName; std::vector> MStreams; std::vector> MAuxiliaryResources; + /// Used to implement ext_oneapi_graph dynamic_command_group. Stores the list + /// of command-groups that a kernel command can be updated to. + std::vector> MAlternativeKernels; ur_kernel_cache_config_t MKernelCacheConfig; bool MKernelIsCooperative = false; bool MKernelUsesClusterLaunch = false; @@ -277,7 +280,7 @@ class CGExecKernel : public CG { MKernelBundle(std::move(KernelBundle)), MArgs(std::move(Args)), MKernelName(std::move(KernelName)), MStreams(std::move(Streams)), MAuxiliaryResources(std::move(AuxiliaryResources)), - MKernelCacheConfig(std::move(KernelCacheConfig)), + MAlternativeKernels{}, MKernelCacheConfig(std::move(KernelCacheConfig)), MKernelIsCooperative(KernelIsCooperative), MKernelUsesClusterLaunch(MKernelUsesClusterLaunch) { assert(getType() == CGType::Kernel && "Wrong type of exec kernel CG."); diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index e725fc4ce0c82..861ec2a883601 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -352,11 +352,76 @@ void graph_impl::removeRoot(const std::shared_ptr &Root) { MRoots.erase(Root); } -std::shared_ptr -graph_impl::add(const std::vector> &Dep) { - // Copy deps so we can modify them - auto Deps = Dep; +std::set> graph_impl::getCGEdges( + const std::shared_ptr &CommandGroup) const { + const auto &Requirements = CommandGroup->getRequirements(); + if (!MAllowBuffers && Requirements.size()) { + throw sycl::exception(make_error_code(errc::invalid), + "Cannot use buffers in a graph without passing the " + "assume_buffer_outlives_graph property on " + "Graph construction."); + } + + if (CommandGroup->getType() == sycl::detail::CGType::Kernel) { + auto CGKernel = + static_cast(CommandGroup.get()); + if (CGKernel->hasStreams()) { + throw sycl::exception( + make_error_code(errc::invalid), + "Using sycl streams in a graph node is unsupported."); + } + } + + // Add any nodes specified by event dependencies into the dependency list + std::set> UniqueDeps; + for (auto &Dep : CommandGroup->getEvents()) { + if (auto NodeImpl = MEventsMap.find(Dep); NodeImpl == MEventsMap.end()) { + throw sycl::exception(sycl::make_error_code(errc::invalid), + "Event dependency from handler::depends_on does " + "not correspond to a node within the graph"); + } else { + UniqueDeps.insert(NodeImpl->second); + } + } + + // A unique set of dependencies obtained by checking requirements and events + for (auto &Req : Requirements) { + // Look through the graph for nodes which share this requirement + for (auto &Node : MNodeStorage) { + if (Node->hasRequirementDependency(Req)) { + bool ShouldAddDep = true; + // If any of this node's successors have this requirement then we skip + // adding the current node as a dependency. + for (auto &Succ : Node->MSuccessors) { + if (Succ.lock()->hasRequirementDependency(Req)) { + ShouldAddDep = false; + break; + } + } + if (ShouldAddDep) { + UniqueDeps.insert(Node); + } + } + } + } + + return UniqueDeps; +} + +void graph_impl::markCGMemObjs( + const std::shared_ptr &CommandGroup) { + const auto &Requirements = CommandGroup->getRequirements(); + for (auto &Req : Requirements) { + auto MemObj = static_cast(Req->MSYCLMemObj); + bool WasInserted = MMemObjs.insert(MemObj).second; + if (WasInserted) { + MemObj->markBeingUsedInGraph(); + } + } +} +std::shared_ptr +graph_impl::add(std::vector> &Deps) { const std::shared_ptr &NodeImpl = std::make_shared(); MNodeStorage.push_back(NodeImpl); @@ -370,7 +435,7 @@ graph_impl::add(const std::vector> &Dep) { std::shared_ptr graph_impl::add(std::function CGF, const std::vector &Args, - const std::vector> &Dep) { + std::vector> &Deps) { (void)Args; sycl::handler Handler{shared_from_this()}; @@ -401,8 +466,8 @@ graph_impl::add(std::function CGF, Handler.getType()); auto NodeImpl = - this->add(NodeType, std::move(Handler.impl->MGraphNodeCG), Dep); - NodeImpl->MNDRangeUsed = Handler.impl->MNDRangeUsed; + this->add(NodeType, std::move(Handler.impl->MGraphNodeCG), Deps); + // Add an event associated with this explicit node for mixed usage addEventForNode(std::make_shared(), NodeImpl); @@ -444,67 +509,15 @@ graph_impl::add(const std::vector Events) { std::shared_ptr graph_impl::add(node_type NodeType, - std::unique_ptr CommandGroup, - const std::vector> &Dep) { - // Copy deps so we can modify them - auto Deps = Dep; + std::shared_ptr CommandGroup, + std::vector> &Deps) { // A unique set of dependencies obtained by checking requirements and events - std::set> UniqueDeps; - const auto &Requirements = CommandGroup->getRequirements(); - if (!MAllowBuffers && Requirements.size()) { - throw sycl::exception(make_error_code(errc::invalid), - "Cannot use buffers in a graph without passing the " - "assume_buffer_outlives_graph property on " - "Graph construction."); - } + std::set> UniqueDeps = getCGEdges(CommandGroup); - if (CommandGroup->getType() == sycl::detail::CGType::Kernel) { - auto CGKernel = - static_cast(CommandGroup.get()); - if (CGKernel->hasStreams()) { - throw sycl::exception( - make_error_code(errc::invalid), - "Using sycl streams in a graph node is unsupported."); - } - } + // Track and mark the memory objects being used by the graph. + markCGMemObjs(CommandGroup); - for (auto &Req : Requirements) { - // Track and mark the memory objects being used by the graph. - auto MemObj = static_cast(Req->MSYCLMemObj); - bool WasInserted = MMemObjs.insert(MemObj).second; - if (WasInserted) { - MemObj->markBeingUsedInGraph(); - } - // Look through the graph for nodes which share this requirement - for (auto &Node : MNodeStorage) { - if (Node->hasRequirementDependency(Req)) { - bool ShouldAddDep = true; - // If any of this node's successors have this requirement then we skip - // adding the current node as a dependency. - for (auto &Succ : Node->MSuccessors) { - if (Succ.lock()->hasRequirementDependency(Req)) { - ShouldAddDep = false; - break; - } - } - if (ShouldAddDep) { - UniqueDeps.insert(Node); - } - } - } - } - - // Add any nodes specified by event dependencies into the dependency list - for (auto &Dep : CommandGroup->getEvents()) { - if (auto NodeImpl = MEventsMap.find(Dep); NodeImpl != MEventsMap.end()) { - UniqueDeps.insert(NodeImpl->second); - } else { - throw sycl::exception(sycl::make_error_code(errc::invalid), - "Event dependency from handler::depends_on does " - "not correspond to a node within the graph"); - } - } // Add any deps determined from requirements and events into the dependency // list Deps.insert(Deps.end(), UniqueDeps.begin(), UniqueDeps.end()); @@ -518,6 +531,41 @@ graph_impl::add(node_type NodeType, return NodeImpl; } +std::shared_ptr +graph_impl::add(std::shared_ptr &DynCGImpl, + std::vector> &Deps) { + // Set of Dependent nodes based on CG event and accessor dependencies. + std::set> DynCGDeps = + getCGEdges(DynCGImpl->MKernels[0]); + for (unsigned i = 1; i < DynCGImpl->getNumCGs(); i++) { + auto &CG = DynCGImpl->MKernels[i]; + auto CGEdges = getCGEdges(CG); + if (CGEdges != DynCGDeps) { + throw sycl::exception(make_error_code(sycl::errc::invalid), + "Command-groups in dynamic command-group don't have" + "equivalent dependencies to other graph nodes."); + } + } + + // Track and mark the memory objects being used by the graph. + for (auto &CG : DynCGImpl->MKernels) { + markCGMemObjs(CG); + } + + // Get active dynamic command-group CG and use to create a node object + const auto &ActiveKernel = DynCGImpl->getActiveKernel(); + std::shared_ptr NodeImpl = + add(node_type::kernel, ActiveKernel, Deps); + + // Add an event associated with this explicit node for mixed usage + addEventForNode(std::make_shared(), NodeImpl); + + // Track the dynamic command-group used inside the node object + DynCGImpl->MNodes.push_back(NodeImpl); + + return NodeImpl; +} + bool graph_impl::clearQueues() { bool AnyQueuesCleared = false; for (auto &Queue : MRecordingQueues) { @@ -1515,7 +1563,7 @@ void exec_graph_impl::updateImpl(std::shared_ptr Node) { } } - UpdateDesc.hNewKernel = nullptr; + UpdateDesc.hNewKernel = UrKernel; UpdateDesc.numNewMemObjArgs = MemobjDescs.size(); UpdateDesc.pNewMemObjArgList = MemobjDescs.data(); UpdateDesc.numNewPointerArgs = PtrDescs.size(); @@ -1568,6 +1616,27 @@ modifiable_command_graph::modifiable_command_graph( : impl(std::make_shared( SyclQueue.get_context(), SyclQueue.get_device(), PropList)) {} +node modifiable_command_graph::addImpl(dynamic_command_group &DynCGF, + const std::vector &Deps) { + impl->throwIfGraphRecordingQueue("Explicit API \"Add()\" function"); + auto DynCGFImpl = sycl::detail::getSyclObjImpl(DynCGF); + + if (DynCGFImpl->MGraph != impl) { + throw sycl::exception(make_error_code(sycl::errc::invalid), + "Graph does not match the graph associated with " + "dynamic command-group."); + } + + std::vector> DepImpls; + for (auto &D : Deps) { + DepImpls.push_back(sycl::detail::getSyclObjImpl(D)); + } + + graph_impl::WriteLock Lock(impl->MMutex); + std::shared_ptr NodeImpl = impl->add(DynCGFImpl, DepImpls); + return sycl::detail::createSyclObjFromImpl(NodeImpl); +} + node modifiable_command_graph::addImpl(const std::vector &Deps) { impl->throwIfGraphRecordingQueue("Explicit API \"Add()\" function"); std::vector> DepImpls; @@ -1775,6 +1844,190 @@ void dynamic_parameter_base::updateAccessor( impl->updateAccessor(Acc); } +void dynamic_parameter_impl::updateValue(const raw_kernel_arg *NewRawValue, + size_t Size) { + // Number of bytes is taken from member of raw_kernel_arg object rather + // than using the size parameter which represents sizeof(raw_kernel_arg). + std::ignore = Size; + size_t RawArgSize = NewRawValue->MArgSize; + const void *RawArgData = NewRawValue->MArgData; + + updateValue(RawArgData, RawArgSize); +} + +void dynamic_parameter_impl::updateValue(const void *NewValue, size_t Size) { + for (auto &[NodeWeak, ArgIndex] : MNodes) { + auto NodeShared = NodeWeak.lock(); + if (NodeShared) { + dynamic_parameter_impl::updateCGArgValue(NodeShared->MCommandGroup, + ArgIndex, NewValue, Size); + } + } + + for (auto &DynCGInfo : MDynCGs) { + auto DynCG = DynCGInfo.DynCG.lock(); + if (DynCG) { + auto &CG = DynCG->MKernels[DynCGInfo.CGIndex]; + dynamic_parameter_impl::updateCGArgValue(CG, DynCGInfo.ArgIndex, NewValue, + Size); + } + } + + std::memcpy(MValueStorage.data(), NewValue, Size); +} + +void dynamic_parameter_impl::updateAccessor( + const sycl::detail::AccessorBaseHost *Acc) { + for (auto &[NodeWeak, ArgIndex] : MNodes) { + auto NodeShared = NodeWeak.lock(); + // Should we fail here if the node isn't alive anymore? + if (NodeShared) { + dynamic_parameter_impl::updateCGAccessor(NodeShared->MCommandGroup, + ArgIndex, Acc); + } + } + + for (auto &DynCGInfo : MDynCGs) { + auto DynCG = DynCGInfo.DynCG.lock(); + if (DynCG) { + auto &CG = DynCG->MKernels[DynCGInfo.CGIndex]; + dynamic_parameter_impl::updateCGAccessor(CG, DynCGInfo.ArgIndex, Acc); + } + } + + std::memcpy(MValueStorage.data(), Acc, + sizeof(sycl::detail::AccessorBaseHost)); +} + +void dynamic_parameter_impl::updateCGArgValue( + std::shared_ptr CG, int ArgIndex, const void *NewValue, + size_t Size) { + auto &Args = static_cast(CG.get())->MArgs; + for (auto &Arg : Args) { + if (Arg.MIndex != ArgIndex) { + continue; + } + assert(Arg.MSize == static_cast(Size)); + // MPtr may be a pointer into arg storage so we memcpy the contents of + // NewValue rather than assign it directly + std::memcpy(Arg.MPtr, NewValue, Size); + break; + } +} + +void dynamic_parameter_impl::updateCGAccessor( + std::shared_ptr CG, int ArgIndex, + const sycl::detail::AccessorBaseHost *Acc) { + auto &Args = static_cast(CG.get())->MArgs; + + auto NewAccImpl = sycl::detail::getSyclObjImpl(*Acc); + for (auto &Arg : Args) { + if (Arg.MIndex != ArgIndex) { + continue; + } + assert(Arg.MType == sycl::detail::kernel_param_kind_t::kind_accessor); + + // Find old accessor in accessor storage and replace with new one + if (static_cast(NewAccImpl->MSYCLMemObj) + ->needsWriteBack()) { + throw sycl::exception( + make_error_code(errc::invalid), + "Accessors to buffers which have write_back enabled " + "are not allowed to be used in command graphs."); + } + + // All accessors passed to this function will be placeholders, so we must + // perform steps similar to what happens when handler::require() is + // called here. + sycl::detail::Requirement *NewReq = NewAccImpl.get(); + if (NewReq->MAccessMode != sycl::access_mode::read) { + auto SYCLMemObj = + static_cast(NewReq->MSYCLMemObj); + SYCLMemObj->handleWriteAccessorCreation(); + } + + for (auto &Acc : CG->getAccStorage()) { + if (auto OldAcc = static_cast(Arg.MPtr); + Acc.get() == OldAcc) { + Acc = NewAccImpl; + } + } + + for (auto &Req : CG->getRequirements()) { + if (auto OldReq = static_cast(Arg.MPtr); + Req == OldReq) { + Req = NewReq; + } + } + Arg.MPtr = NewAccImpl.get(); + break; + } +} + +dynamic_command_group_impl::dynamic_command_group_impl( + const command_graph &Graph) + : MGraph{sycl::detail::getSyclObjImpl(Graph)}, MActiveCGF(0) {} + +void dynamic_command_group_impl::finalizeCGFList( + const std::vector> &CGFList) { + for (size_t CGFIndex = 0; CGFIndex < CGFList.size(); CGFIndex++) { + const auto &CGF = CGFList[CGFIndex]; + // Handler defined inside the loop so it doesn't appear to the runtime + // as a single command-group with multiple commands inside. + sycl::handler Handler{MGraph}; + CGF(Handler); + + if (Handler.getType() != sycl::detail::CGType::Kernel) { + throw sycl::exception( + make_error_code(errc::invalid), + "The only type of command-groups that can be used in " + "dynamic command-groups is kernels."); + } + + Handler.finalize(); + + // Take unique_ptr object from handler and convert to + // shared_ptr to store + sycl::detail::CG *RawCGPtr = Handler.impl->MGraphNodeCG.release(); + auto RawCGExecPtr = static_cast(RawCGPtr); + auto CGExecSP = std::shared_ptr(RawCGExecPtr); + MKernels.push_back(CGExecSP); + + // Track dynamic_parameter usage in command-list + auto &DynamicParams = Handler.impl->MDynamicParameters; + for (auto &[DynamicParam, ArgIndex] : DynamicParams) { + DynamicParam->registerDynCG(shared_from_this(), CGFIndex, ArgIndex); + } + } + + // For each CGExecKernel store the list of alternative kernels, not + // including itself. + using CGExecKernelSP = std::shared_ptr; + using CGExecKernelWP = std::weak_ptr; + for (auto KernelCG : MKernels) { + std::vector Alternatives; + std::copy_if( + MKernels.begin(), MKernels.end(), std::back_inserter(Alternatives), + [&KernelCG](const CGExecKernelSP &K) { return K != KernelCG; }); + + KernelCG->MAlternativeKernels = std::move(Alternatives); + } +} + +void dynamic_command_group_impl::setActiveIndex(size_t Index) { + if (Index >= getNumCGs()) { + throw sycl::exception(sycl::make_error_code(errc::invalid), + "Index is out of range."); + } + MActiveCGF = Index; + + // Update nodes using the dynamic command-group to use the new active CG + for (auto &Node : MNodes) { + if (auto NodeSP = Node.lock()) { + NodeSP->MCommandGroup = getActiveKernel(); + } + } +} } // namespace detail node_type node::get_type() const { return impl->MNodeType; } @@ -1813,6 +2066,25 @@ template <> __SYCL_EXPORT void node::update_range<2>(range<2> Range) { template <> __SYCL_EXPORT void node::update_range<3>(range<3> Range) { impl->updateRange(Range); } + +dynamic_command_group::dynamic_command_group( + const command_graph &Graph, + const std::vector> &CGFList) + : impl(std::make_shared(Graph)) { + if (CGFList.empty()) { + throw sycl::exception(sycl::make_error_code(errc::invalid), + "Dynamic command-group cannot be created with an " + "empty CGF list."); + } + impl->finalizeCGFList(CGFList); +} + +size_t dynamic_command_group::get_active_cgf() const { + return impl->getActiveIndex(); +} +void dynamic_command_group::set_active_cgf(size_t Index) { + return impl->setActiveIndex(Index); +} } // namespace experimental } // namespace oneapi } // namespace ext diff --git a/sycl/source/detail/graph_impl.hpp b/sycl/source/detail/graph_impl.hpp index 4ee34830f39a2..6144e3f51b9da 100644 --- a/sycl/source/detail/graph_impl.hpp +++ b/sycl/source/detail/graph_impl.hpp @@ -95,7 +95,7 @@ class node_impl : public std::enable_shared_from_this { /// User facing type of the node. node_type MNodeType = node_type::empty; /// Command group object which stores all args etc needed to enqueue the node - std::unique_ptr MCommandGroup; + std::shared_ptr MCommandGroup; /// Stores the executable graph impl associated with this node if it is a /// subgraph node. std::shared_ptr MSubGraphImpl; @@ -108,9 +108,6 @@ class node_impl : public std::enable_shared_from_this { /// cannot be used to find out the partion of a node outside of this process. int MPartitionNum = -1; - /// Track whether an ND-Range was used for kernel nodes - bool MNDRangeUsed = false; - /// Add successor to the node. /// @param Node Node to add as a successor. void registerSuccessor(const std::shared_ptr &Node) { @@ -143,10 +140,9 @@ class node_impl : public std::enable_shared_from_this { /// @param NodeType Type of the command-group. /// @param CommandGroup The CG which stores the command information for this /// node. - node_impl(node_type NodeType, - std::unique_ptr &&CommandGroup) + node_impl(node_type NodeType, std::shared_ptr CommandGroup) : MCGType(CommandGroup->getType()), MNodeType(NodeType), - MCommandGroup(std::move(CommandGroup)) { + MCommandGroup(CommandGroup) { if (NodeType == node_type::subgraph) { MSubGraphImpl = static_cast(MCommandGroup.get()) @@ -405,75 +401,6 @@ class node_impl : public std::enable_shared_from_this { return (ReqSrc->MDims > 1) || (ReqDst->MDims > 1); } - /// Update the value of an accessor inside this node. Accessors must be - /// handled specifically compared to other argument values. - /// @param ArgIndex The index of the accessor arg to be updated - /// @param Acc Pointer to the new accessor value - void updateAccessor(int ArgIndex, const sycl::detail::AccessorBaseHost *Acc) { - auto &Args = - static_cast(MCommandGroup.get())->MArgs; - auto NewAccImpl = sycl::detail::getSyclObjImpl(*Acc); - for (auto &Arg : Args) { - if (Arg.MIndex != ArgIndex) { - continue; - } - assert(Arg.MType == sycl::detail::kernel_param_kind_t::kind_accessor); - - // Find old accessor in accessor storage and replace with new one - if (static_cast(NewAccImpl->MSYCLMemObj) - ->needsWriteBack()) { - throw sycl::exception( - make_error_code(errc::invalid), - "Accessors to buffers which have write_back enabled " - "are not allowed to be used in command graphs."); - } - - // All accessors passed to this function will be placeholders, so we must - // perform steps similar to what happens when handler::require() is - // called here. - sycl::detail::Requirement *NewReq = NewAccImpl.get(); - if (NewReq->MAccessMode != sycl::access_mode::read) { - auto SYCLMemObj = - static_cast(NewReq->MSYCLMemObj); - SYCLMemObj->handleWriteAccessorCreation(); - } - - for (auto &Acc : MCommandGroup->getAccStorage()) { - if (auto OldAcc = - static_cast(Arg.MPtr); - Acc.get() == OldAcc) { - Acc = NewAccImpl; - } - } - - for (auto &Req : MCommandGroup->getRequirements()) { - if (auto OldReq = - static_cast(Arg.MPtr); - Req == OldReq) { - Req = NewReq; - } - } - Arg.MPtr = NewAccImpl.get(); - break; - } - } - - void updateArgValue(int ArgIndex, const void *NewValue, size_t Size) { - - auto &Args = - static_cast(MCommandGroup.get())->MArgs; - for (auto &Arg : Args) { - if (Arg.MIndex != ArgIndex) { - continue; - } - assert(Arg.MSize == static_cast(Size)); - // MPtr may be a pointer into arg storage so we memcpy the contents of - // NewValue rather than assign it directly - std::memcpy(Arg.MPtr, NewValue, Size); - break; - } - } - template void updateNDRange(nd_range ExecutionRange) { if (MCGType != sycl::detail::CGType::Kernel) { @@ -481,11 +408,6 @@ class node_impl : public std::enable_shared_from_this { sycl::errc::invalid, "Cannot update execution range of nodes which are not kernel nodes"); } - if (!MNDRangeUsed) { - throw sycl::exception(sycl::errc::invalid, - "Cannot update node which was created with a " - "sycl::range with a sycl::nd_range"); - } auto &NDRDesc = static_cast(MCommandGroup.get()) @@ -495,7 +417,7 @@ class node_impl : public std::enable_shared_from_this { throw sycl::exception(sycl::errc::invalid, "Cannot update execution range of a node with an " "execution range of different dimensions than what " - "the node was originall created with."); + "the node was originally created with."); } NDRDesc = sycl::detail::NDRDescT{ExecutionRange}; @@ -507,11 +429,6 @@ class node_impl : public std::enable_shared_from_this { sycl::errc::invalid, "Cannot update execution range of nodes which are not kernel nodes"); } - if (MNDRangeUsed) { - throw sycl::exception(sycl::errc::invalid, - "Cannot update node which was created with a " - "sycl::nd_range with a sycl::range"); - } auto &NDRDesc = static_cast(MCommandGroup.get()) @@ -521,7 +438,7 @@ class node_impl : public std::enable_shared_from_this { throw sycl::exception(sycl::errc::invalid, "Cannot update execution range of a node with an " "execution range of different dimensions than what " - "the node was originall created with."); + "the node was originally created with."); } NDRDesc = sycl::detail::NDRDescT{ExecutionRange}; @@ -535,6 +452,7 @@ class node_impl : public std::enable_shared_from_this { ExecCG->MArgs = OtherExecCG->MArgs; ExecCG->MNDRDesc = OtherExecCG->MNDRDesc; + ExecCG->MKernelName = OtherExecCG->MKernelName; ExecCG->getAccStorage() = OtherExecCG->getAccStorage(); ExecCG->getRequirements() = OtherExecCG->getRequirements(); @@ -888,30 +806,40 @@ class graph_impl : public std::enable_shared_from_this { /// @param Root Node to remove from list of root nodes. void removeRoot(const std::shared_ptr &Root); + /// Verifies the CG is valid to add to the graph and returns set of + /// dependent nodes if so. + /// @param CommandGroup The command group to verify and retrieve edges for. + /// @return Set of dependent nodes in the graph. + std::set> + getCGEdges(const std::shared_ptr &CommandGroup) const; + + /// Identifies the sycl buffers used in the command-group and marks them + /// as used in the graph. + /// @param CommandGroup The command-group to check for buffer usage in. + void markCGMemObjs(const std::shared_ptr &CommandGroup); + /// Create a kernel node in the graph. /// @param NodeType User facing type of the node. /// @param CommandGroup The CG which stores all information for this node. - /// @param Dep Dependencies of the created node. + /// @param Deps Dependencies of the created node. /// @return Created node in the graph. - std::shared_ptr - add(node_type NodeType, std::unique_ptr CommandGroup, - const std::vector> &Dep = {}); + std::shared_ptr add(node_type NodeType, + std::shared_ptr CommandGroup, + std::vector> &Deps); /// Create a CGF node in the graph. /// @param CGF Command-group function to create node with. /// @param Args Node arguments. - /// @param Dep Dependencies of the created node. + /// @param Deps Dependencies of the created node. /// @return Created node in the graph. - std::shared_ptr - add(std::function CGF, - const std::vector &Args, - const std::vector> &Dep = {}); + std::shared_ptr add(std::function CGF, + const std::vector &Args, + std::vector> &Deps); /// Create an empty node in the graph. - /// @param Dep List of predecessor nodes. + /// @param Deps List of predecessor nodes. /// @return Created node in the graph. - std::shared_ptr - add(const std::vector> &Dep = {}); + std::shared_ptr add(std::vector> &Deps); /// Create an empty node in the graph. /// @param Events List of events associated to this node. @@ -919,6 +847,14 @@ class graph_impl : public std::enable_shared_from_this { std::shared_ptr add(const std::vector Events); + /// Create a dynamic command-group node in the graph. + /// @param DynCGImpl Dynamic command-group used to create node. + /// @param Deps List of predecessor nodes. + /// @return Created node in the graph. + std::shared_ptr + add(std::shared_ptr &DynCGImpl, + std::vector> &Deps); + /// Add a queue to the set of queues which are currently recording to this /// graph. /// @param RecordingQueue Queue to add to set. @@ -1236,7 +1172,7 @@ class graph_impl : public std::enable_shared_from_this { /// @param Node The node to add deps for /// @param Deps List of dependent nodes void addDepsToNode(std::shared_ptr Node, - const std::vector> &Deps) { + std::vector> &Deps) { if (!Deps.empty()) { for (auto &N : Deps) { N->registerSuccessor(Node); @@ -1520,65 +1456,110 @@ class dynamic_parameter_impl { MNodes.emplace_back(NodeImpl, ArgIndex); } + /// Struct detailing an instance of the usage of the dynamic parameter in a + /// dynamic CG. + struct DynamicCGInfo { + /// Dynamic command-group that uses this dynamic parameter. + std::weak_ptr DynCG; + /// Index of the CG in the Dynamic CG that uses this dynamic parameter. + size_t CGIndex; + /// The arg index in the kernel the dynamic parameter is used. + int ArgIndex; + }; + + /// Registers a dynamic command-group with this dynamic parameter. + /// @param DynCG The dynamic command-group to register. + /// @param CGIndex Index of the CG in DynCG using this dynamic parameter. + /// @param ArgIndex The arg index in the kernel the dynamic parameter is used. + void registerDynCG(std::shared_ptr DynCG, + size_t CGIndex, int ArgIndex) { + MDynCGs.emplace_back(DynamicCGInfo{DynCG, CGIndex, ArgIndex}); + } + /// Get a pointer to the internal value of this dynamic parameter void *getValue() { return MValueStorage.data(); } /// Update sycl_ext_oneapi_raw_kernel_arg parameter /// @param NewRawValue Pointer to a raw_kernel_arg object. /// @param Size Parameter is ignored. - void updateValue(const raw_kernel_arg *NewRawValue, size_t Size) { - // Number of bytes is taken from member of raw_kernel_arg object rather - // than using the size parameter which represents sizeof(raw_kernel_arg). - std::ignore = Size; - size_t RawArgSize = NewRawValue->MArgSize; - const void *RawArgData = NewRawValue->MArgData; - - for (auto &[NodeWeak, ArgIndex] : MNodes) { - auto NodeShared = NodeWeak.lock(); - if (NodeShared) { - NodeShared->updateArgValue(ArgIndex, RawArgData, RawArgSize); - } - } - std::memcpy(MValueStorage.data(), RawArgData, RawArgSize); - } + void updateValue(const raw_kernel_arg *NewRawValue, size_t Size); /// Update the internal value of this dynamic parameter as well as the value - /// of this parameter in all registered nodes. + /// of this parameter in all registered nodes and dynamic CGs. /// @param NewValue Pointer to the new value /// @param Size Size of the data pointer to by NewValue - void updateValue(const void *NewValue, size_t Size) { - for (auto &[NodeWeak, ArgIndex] : MNodes) { - auto NodeShared = NodeWeak.lock(); - if (NodeShared) { - NodeShared->updateArgValue(ArgIndex, NewValue, Size); - } - } - std::memcpy(MValueStorage.data(), NewValue, Size); - } + void updateValue(const void *NewValue, size_t Size); /// Update the internal value of this dynamic parameter as well as the value - /// of this parameter in all registered nodes. Should only be called for - /// accessor dynamic_parameters. + /// of this parameter in all registered nodes and dynamic CGs. Should only be + /// called for accessor dynamic_parameters. /// @param Acc The new accessor value - void updateAccessor(const sycl::detail::AccessorBaseHost *Acc) { - for (auto &[NodeWeak, ArgIndex] : MNodes) { - auto NodeShared = NodeWeak.lock(); - // Should we fail here if the node isn't alive anymore? - if (NodeShared) { - NodeShared->updateAccessor(ArgIndex, Acc); - } - } - std::memcpy(MValueStorage.data(), Acc, - sizeof(sycl::detail::AccessorBaseHost)); - } + void updateAccessor(const sycl::detail::AccessorBaseHost *Acc); + + /// Static helper function for updating command-group value arguments. + /// @param CG The command-group to update the argument information for. + /// @param ArgIndex The argument index to update. + /// @param NewValue Pointer to the new value. + /// @param Size Size of the data pointer to by NewValue + static void updateCGArgValue(std::shared_ptr CG, + int ArgIndex, const void *NewValue, size_t Size); + + /// Static helper function for updating command-group accessor arguments. + /// @param CG The command-group to update the argument information for. + /// @param ArgIndex The argument index to update. + /// @param Acc The new accessor value + static void updateCGAccessor(std::shared_ptr CG, + int ArgIndex, + const sycl::detail::AccessorBaseHost *Acc); // Weak ptrs to node_impls which will be updated std::vector, int>> MNodes; + // Dynamic command-groups which will be updated + std::vector MDynCGs; std::shared_ptr MGraph; std::vector MValueStorage; }; +class dynamic_command_group_impl + : public std::enable_shared_from_this { +public: + dynamic_command_group_impl( + const command_graph &Graph); + + /// Returns the index of the active command-group + size_t getActiveIndex() const { return MActiveCGF; } + + /// Returns the number of CGs in the dynamic command-group. + size_t getNumCGs() const { return MKernels.size(); } + + /// Set the index of the active command-group. + /// @param Index The new index. + void setActiveIndex(size_t Index); + + /// Instantiates a command-group object for each CGF in the list. + /// @param CGFList List of CGFs to finalize with a handler into CG objects. + void + finalizeCGFList(const std::vector> &CGFList); + + /// Retrieve CG at the currently active index + /// @param Shared pointer to the active CG object. + std::shared_ptr getActiveKernel() const { + return MKernels[MActiveCGF]; + } + + /// Graph this dynamic command-group is associated with. + std::shared_ptr MGraph; + + /// Index of active command-group + std::atomic MActiveCGF; + + /// List of kernel command-groups for dynamic command-group nodes + std::vector> MKernels; + + /// List of nodes using this dynamic command-group. + std::vector> MNodes; +}; } // namespace detail } // namespace experimental } // namespace oneapi diff --git a/sycl/source/detail/handler_impl.hpp b/sycl/source/detail/handler_impl.hpp index 2c2a4963bed98..e452eca0c8a6d 100644 --- a/sycl/source/detail/handler_impl.hpp +++ b/sycl/source/detail/handler_impl.hpp @@ -152,10 +152,6 @@ class handler_impl { ext::oneapi::experimental::detail::dynamic_parameter_impl *, int>> MDynamicParameters; - // Track whether an NDRange was used when submitting a kernel (as opposed to a - // range), needed for graph update - bool MNDRangeUsed = false; - /// The storage for the arguments passed. /// We need to store a copy of values that are passed explicitly through /// set_arg, require and so on, because we need them to be alive after diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 78ee43a320989..091504a983ff3 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2527,23 +2527,16 @@ static ur_result_t SetKernelParamsAndLaunch( return Error; } -ur_result_t enqueueImpCommandBufferKernel( - context Ctx, DeviceImplPtr DeviceImpl, - ur_exp_command_buffer_handle_t CommandBuffer, - const CGExecKernel &CommandGroup, - std::vector &SyncPoints, - ur_exp_command_buffer_sync_point_t *OutSyncPoint, - ur_exp_command_buffer_command_handle_t *OutCommand, - const std::function &getMemAllocationFunc) { - auto ContextImpl = sycl::detail::getSyclObjImpl(Ctx); - const sycl::detail::AdapterPtr &Adapter = ContextImpl->getAdapter(); +namespace { +std::tuple, + const KernelArgMask *> +getCGKernelInfo(const CGExecKernel &CommandGroup, ContextImplPtr ContextImpl, + DeviceImplPtr DeviceImpl, + std::vector &UrKernelsToRelease, + std::vector &UrProgramsToRelease) { + ur_kernel_handle_t UrKernel = nullptr; - ur_program_handle_t UrProgram = nullptr; - std::shared_ptr SyclKernelImpl = nullptr; std::shared_ptr DeviceImageImpl = nullptr; - - auto Kernel = CommandGroup.MSyclKernel; - auto KernelBundleImplPtr = CommandGroup.MKernelBundle; const KernelArgMask *EliminatedArgMask = nullptr; // Use kernel_bundle if available unless it is interop. @@ -2551,27 +2544,74 @@ ur_result_t enqueueImpCommandBufferKernel( // in interop kernel bundles (if any) do not have kernel_id // and can therefore not be looked up, but since they are self-contained // they can simply be launched directly. - if (KernelBundleImplPtr && !KernelBundleImplPtr->isInterop()) { + if (auto KernelBundleImplPtr = CommandGroup.MKernelBundle; + KernelBundleImplPtr && !KernelBundleImplPtr->isInterop()) { auto KernelName = CommandGroup.MKernelName; kernel_id KernelID = detail::ProgramManager::getInstance().getSYCLKernelID(KernelName); + kernel SyclKernel = KernelBundleImplPtr->get_kernel(KernelID, KernelBundleImplPtr); - SyclKernelImpl = detail::getSyclObjImpl(SyclKernel); + + auto SyclKernelImpl = detail::getSyclObjImpl(SyclKernel); UrKernel = SyclKernelImpl->getHandleRef(); DeviceImageImpl = SyclKernelImpl->getDeviceImage(); - UrProgram = DeviceImageImpl->get_ur_program_ref(); EliminatedArgMask = SyclKernelImpl->getKernelArgMask(); - } else if (Kernel != nullptr) { + } else if (auto Kernel = CommandGroup.MSyclKernel; Kernel != nullptr) { UrKernel = Kernel->getHandleRef(); - UrProgram = Kernel->getProgramRef(); EliminatedArgMask = Kernel->getKernelArgMask(); } else { + ur_program_handle_t UrProgram = nullptr; std::tie(UrKernel, std::ignore, EliminatedArgMask, UrProgram) = sycl::detail::ProgramManager::getInstance().getOrCreateKernel( ContextImpl, DeviceImpl, CommandGroup.MKernelName); + UrKernelsToRelease.push_back(UrKernel); + UrProgramsToRelease.push_back(UrProgram); + } + return std::make_tuple(UrKernel, DeviceImageImpl, EliminatedArgMask); +} +} // anonymous namespace + +ur_result_t enqueueImpCommandBufferKernel( + context Ctx, DeviceImplPtr DeviceImpl, + ur_exp_command_buffer_handle_t CommandBuffer, + const CGExecKernel &CommandGroup, + std::vector &SyncPoints, + ur_exp_command_buffer_sync_point_t *OutSyncPoint, + ur_exp_command_buffer_command_handle_t *OutCommand, + const std::function &getMemAllocationFunc) { + // List of ur objects to be released after UR call. We don't do anything + // with the ur_program_handle_t objects, but need to update their reference + // count. + std::vector UrKernelsToRelease; + std::vector UrProgramsToRelease; + + ur_kernel_handle_t UrKernel = nullptr; + std::shared_ptr DeviceImageImpl = nullptr; + const KernelArgMask *EliminatedArgMask = nullptr; + + auto ContextImpl = sycl::detail::getSyclObjImpl(Ctx); + std::tie(UrKernel, DeviceImageImpl, EliminatedArgMask) = + getCGKernelInfo(CommandGroup, ContextImpl, DeviceImpl, UrKernelsToRelease, + UrProgramsToRelease); + + // Build up the list of UR kernel handles that the UR command could be + // updated to use. + std::vector AltUrKernels; + const std::vector> + &AlternativeKernels = CommandGroup.MAlternativeKernels; + for (const auto &AltCGKernelWP : AlternativeKernels) { + auto AltCGKernel = AltCGKernelWP.lock(); + assert(AltCGKernel != nullptr); + + ur_kernel_handle_t AltUrKernel = nullptr; + std::tie(AltUrKernel, std::ignore, std::ignore) = + getCGKernelInfo(*AltCGKernel.get(), ContextImpl, DeviceImpl, + UrKernelsToRelease, UrProgramsToRelease); + AltUrKernels.push_back(AltUrKernel); } + const sycl::detail::AdapterPtr &Adapter = ContextImpl->getAdapter(); auto SetFunc = [&Adapter, &UrKernel, &DeviceImageImpl, &Ctx, &getMemAllocationFunc](sycl::detail::ArgDesc &Arg, size_t NextTrueIndex) { @@ -2622,14 +2662,17 @@ ur_result_t enqueueImpCommandBufferKernel( ur_result_t Res = Adapter->call_nocheck( CommandBuffer, UrKernel, NDRDesc.Dims, &NDRDesc.GlobalOffset[0], - &NDRDesc.GlobalSize[0], LocalSize, 0, nullptr, SyncPoints.size(), - SyncPoints.size() ? SyncPoints.data() : nullptr, 0, nullptr, - OutSyncPoint, nullptr, + &NDRDesc.GlobalSize[0], LocalSize, AltUrKernels.size(), + AltUrKernels.size() ? AltUrKernels.data() : nullptr, + SyncPoints.size(), SyncPoints.size() ? SyncPoints.data() : nullptr, 0, + nullptr, OutSyncPoint, nullptr, CommandBufferDesc.isUpdatable ? OutCommand : nullptr); - if (!SyclKernelImpl && !Kernel) { - Adapter->call(UrKernel); - Adapter->call(UrProgram); + for (auto &Kernel : UrKernelsToRelease) { + Adapter->call(Kernel); + } + for (auto &Program : UrProgramsToRelease) { + Adapter->call(Program); } if (Res != UR_RESULT_SUCCESS) { diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 61ea7ee7be0a0..a7ac73f9e4c34 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -555,11 +555,12 @@ event handler::finalize() { // Find the last node added to the graph from this queue, so our new // node can set it as a predecessor. auto DependentNode = GraphImpl->getLastInorderNode(MQueue); - - NodeImpl = DependentNode - ? GraphImpl->add(NodeType, std::move(CommandGroup), - {DependentNode}) - : GraphImpl->add(NodeType, std::move(CommandGroup)); + std::vector> + Deps; + if (DependentNode) { + Deps.push_back(DependentNode); + } + NodeImpl = GraphImpl->add(NodeType, std::move(CommandGroup), Deps); // If we are recording an in-order queue remember the new node, so it // can be used as a dependency for any more nodes recorded from this @@ -567,12 +568,13 @@ event handler::finalize() { GraphImpl->setLastInorderNode(MQueue, NodeImpl); } else { auto LastBarrierRecordedFromQueue = GraphImpl->getBarrierDep(MQueue); + std::vector> + Deps; + if (LastBarrierRecordedFromQueue) { - NodeImpl = GraphImpl->add(NodeType, std::move(CommandGroup), - {LastBarrierRecordedFromQueue}); - } else { - NodeImpl = GraphImpl->add(NodeType, std::move(CommandGroup)); + Deps.push_back(LastBarrierRecordedFromQueue); } + NodeImpl = GraphImpl->add(NodeType, std::move(CommandGroup), Deps); if (NodeImpl->MCGType == sycl::detail::CGType::Barrier) { GraphImpl->setBarrierDep(MQueue, NodeImpl); @@ -582,8 +584,6 @@ event handler::finalize() { // Associate an event with this new node and return the event. GraphImpl->addEventForNode(EventImpl, NodeImpl); - NodeImpl->MNDRangeUsed = impl->MNDRangeUsed; - return detail::createSyclObjFromImpl(EventImpl); } @@ -2008,7 +2008,9 @@ std::tuple, bool> handler::getMaxWorkGroups_v2() { return {std::array{0, 0, 0}, false}; } -void handler::setNDRangeUsed(bool Value) { impl->MNDRangeUsed = Value; } +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES +void handler::setNDRangeUsed(bool Value) { (void)Value; } +#endif void handler::registerDynamicParameter( ext::oneapi::experimental::detail::dynamic_parameter_base &DynamicParamBase, diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_accessor.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_accessor.cpp new file mode 100644 index 0000000000000..a5e5a1ea78b87 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_accessor.cpp @@ -0,0 +1,61 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// XFAIL: level_zero +// XFAIL-TRACKER: OFNAAO-307 + +// Tests using dynamic command-group objects with buffer accessors + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + buffer Buf{sycl::range<1>(Size)}; + Buf.set_write_back(false); + auto Acc = Buf.get_access(); + + exp_ext::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {exp_ext::property::graph::assume_buffer_outlives_graph{}}}; + + int PatternA = 42; + auto CGFA = [&](handler &CGH) { + CGH.require(Acc); + CGH.parallel_for(Size, + [=](item<1> Item) { Acc[Item.get_id()] = PatternA; }); + }; + + int PatternB = 0xA; + auto CGFB = [&](handler &CGH) { + CGH.require(Acc); + CGH.parallel_for(Size, + [=](item<1> Item) { Acc[Item.get_id()] = PatternB; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + std::vector HostData(Size, 0); + Queue.copy(Acc, HostData.data()).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == PatternA); + } + + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(Acc, HostData.data()).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == PatternB); + } + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_deps.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_deps.cpp new file mode 100644 index 0000000000000..7b477edacff98 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_deps.cpp @@ -0,0 +1,79 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// XFAIL: level_zero +// XFAIL-TRACKER: OFNAAO-307 + +// Tests adding a dynamic command-group node to a graph using buffer +// accessors for the node edges. + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + int *Ptr = (int *)sycl::malloc_device(Size, Queue); + buffer Buf{sycl::range<1>(Size)}; + Buf.set_write_back(false); + + exp_ext::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {exp_ext::property::graph::assume_buffer_outlives_graph{}}}; + + auto RootNode = Graph.add([&](handler &CGH) { + auto Acc = Buf.get_access(CGH); + CGH.parallel_for(Size, [=](item<1> Item) { Acc[Item.get_id()] = 1; }); + }); + + int PatternA = 42; + auto CGFA = [&](handler &CGH) { + auto Acc = Buf.get_access(CGH); + CGH.parallel_for(Size, + [=](item<1> Item) { Acc[Item.get_id()] += PatternA; }); + }; + + int PatternB = 0xA; + auto CGFB = [&](handler &CGH) { + auto Acc = Buf.get_access(CGH); + CGH.parallel_for(Size, + [=](item<1> Item) { Acc[Item.get_id()] += PatternB; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + + auto LeafNode = Graph.add([&](handler &CGH) { + auto Acc = Buf.get_access(CGH); + CGH.parallel_for( + Size, [=](item<1> Item) { Ptr[Item.get_id()] = Acc[Item.get_id()]; }); + }); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + + std::vector HostData(Size, 0); + Queue.copy(Ptr, HostData.data(), Size).wait(); + + int Ref = PatternA + 1; + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == Ref); + } + + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); + Ref = PatternB + 1; + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == Ref); + } + + sycl::free(Ptr, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_deps2.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_deps2.cpp new file mode 100644 index 0000000000000..a420d7deb58de --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_deps2.cpp @@ -0,0 +1,88 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// XFAIL: level_zero +// XFAIL-TRACKER: OFNAAO-307 + +// Tests adding a dynamic command-group node to a graph using buffer +// accessors for the node edges, but where different command-groups +// use different buffers that create identical edges. + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + int *Ptr = (int *)sycl::malloc_device(Size, Queue); + buffer BufA{sycl::range<1>(Size)}; + buffer BufB{sycl::range<1>(Size)}; + BufA.set_write_back(false); + BufB.set_write_back(false); + + exp_ext::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {exp_ext::property::graph::assume_buffer_outlives_graph{}}}; + + int InitA = 4; + int InitB = -4; + auto RootNode = Graph.add([&](handler &CGH) { + auto AccA = BufA.get_access(CGH); + auto AccB = BufB.get_access(CGH); + CGH.parallel_for(Size, [=](item<1> Item) { + AccA[Item.get_id()] = InitA; + AccB[Item.get_id()] = InitB; + }); + }); + + int PatternA = 42; + auto CGFA = [&](handler &CGH) { + auto AccA = BufA.get_access(CGH); + CGH.parallel_for(Size, + [=](item<1> Item) { AccA[Item.get_id()] += PatternA; }); + }; + + int PatternB = 0xA; + auto CGFB = [&](handler &CGH) { + auto AccB = BufB.get_access(CGH); + CGH.parallel_for(Size, + [=](item<1> Item) { AccB[Item.get_id()] += PatternB; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + + auto LeafNode = Graph.add([&](handler &CGH) { + auto AccA = BufA.get_access(CGH); + auto AccB = BufB.get_access(CGH); + CGH.parallel_for(Size, [=](item<1> Item) { + Ptr[Item.get_id()] = AccA[Item.get_id()] + AccB[Item.get_id()]; + }); + }); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + + std::vector HostData(Size, 0); + Queue.copy(Ptr, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == (InitA + InitB + PatternA)); + } + + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); + int Ref = InitA + InitB + PatternB; + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == Ref); + } + + sycl::free(Ptr, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_spv.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_spv.cpp new file mode 100644 index 0000000000000..0eaa714463670 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_spv.cpp @@ -0,0 +1,81 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out %S/../Inputs/Kernels/update_with_indices_accessor.spv +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out %S/../Inputs/Kernels/update_with_indices_accessor.spv 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out %S/../Inputs/Kernels/update_with_indices_accessor.spv 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// REQUIRES: level_zero +// XFAIL: level_zero +// XFAIL-TRACKER: OFNAAO-307 + +// Tests updating an accessor argument to a graph node created from SPIR-V +// using dynamic command-groups. + +#include "../graph_common.hpp" + +int main(int, char **argv) { + queue Queue{}; + sycl::kernel_bundle KernelBundle = loadKernelsFromFile(Queue, argv[1]); + const auto getKernel = + [](sycl::kernel_bundle &bundle, + const std::string &name) { + return bundle.ext_oneapi_get_kernel(name); + }; + + kernel kernel = getKernel( + KernelBundle, "_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_EUlvE_"); + + exp_ext::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {exp_ext::property::graph::assume_buffer_outlives_graph{}}}; + + buffer BufA{sycl::range<1>(Size)}; + buffer BufB{sycl::range<1>(Size)}; + BufA.set_write_back(false); + BufB.set_write_back(false); + + auto AccA = BufA.get_access(); + auto AccB = BufB.get_access(); + + auto CGFA = [&](handler &CGH) { + CGH.require(AccA); + CGH.set_arg(0, AccA); + CGH.single_task(kernel); + }; + + auto CGFB = [&](handler &CGH) { + CGH.require(AccB); + CGH.set_arg(0, AccB); + CGH.single_task(kernel); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Queue, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + + std::vector HostDataA(Size, 0); + std::vector HostDataB(Size, 0); + Queue.copy(BufA.get_access(), HostDataA.data()).wait(); + Queue.copy(BufB.get_access(), HostDataB.data()).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostDataA[i] == i); + assert(HostDataB[i] == 0); + } + + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(BufA.get_access(), HostDataA.data()).wait(); + Queue.copy(BufB.get_access(), HostDataB.data()).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostDataA[i] == i); + assert(HostDataB[i] == i); + } + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_different_arg_nums.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_different_arg_nums.cpp new file mode 100644 index 0000000000000..795dc074d882e --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_different_arg_nums.cpp @@ -0,0 +1,148 @@ +// RUN: %{build} -o %t.out +// RUN: env SYCL_UR_TRACE=2 %{run} %t.out | FileCheck %s +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// XFAIL: level_zero +// XFAIL-TRACKER: OFNAAO-307 + +// Tests updating a dynamic command-group with command-groups containing a +// different number of arguments. + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + int *Ptr = malloc_device(Size, Queue); + std::vector HostData(Size); + + // 3 kernel arguments: Ptr, PatternA, PatternB + int PatternA = 42; + int PatternB = 0xA; + auto CGFA = [&](handler &CGH) { + CGH.parallel_for( + Size, [=](item<1> Item) { Ptr[Item.get_id()] = PatternA + PatternB; }); + }; + + // 2 kernel arguments: Ptr, MyPatternStruct + struct PatternStruct { + int PatternA; + int PatternB; + }; + PatternStruct MyPatternStruct{PatternA + 1, PatternB + 1}; + auto CGFB = [&](handler &CGH) { + CGH.parallel_for(Size, [=](item<1> Item) { + Ptr[Item.get_id()] = MyPatternStruct.PatternA + MyPatternStruct.PatternB; + }); + }; + + // 1 kernel argument: Ptr + auto CGFC = [&](handler &CGH) { + CGH.parallel_for(Size, + [=](item<1> Item) { Ptr[Item.get_id()] = 42 - 0xA; }); + }; + + // 4 kernel argument: Ptr + int PatternC = -12; + auto CGFD = [&](handler &CGH) { + CGH.parallel_for(Size, [=](item<1> Item) { + Ptr[Item.get_id()] = PatternA + PatternB + PatternC; + }); + }; + + // CHECK: <--- urKernelSetArgPointer( + // CHECK-SAME: .hKernel = [[KERNEL_HANDLE1:[0-9a-fA-Fx]+]] + // CHECL-SAME: .argIndex = 0 + + // CHECK: <--- urKernelSetArgValue + // CHECK-SAME: .hKernel = [[KERNEL_HANDLE1]] + // CHECK-SAME: .argIndex = 1 + + // CHECK: <--- urKernelSetArgValue + // CHECK-SAME: .hKernel = [[KERNEL_HANDLE1]] + // CHECK-SAME: .argIndex = 2 + + // CHECK: <--- urCommandBufferAppendKernelLaunchExp + // CHECK-SAME: .hKernel = [[KERNEL_HANDLE1]] + // CHECK-SAME: .numKernelAlternatives = 3 + // CHECK-SAME: .phKernelAlternatives = {[[KERNEL_HANDLE2:[0-9a-fA-Fx]+]], [[KERNEL_HANDLE3:[0-9a-fA-Fx]+]], [[KERNEL_HANDLE4:[0-9a-fA-Fx]+]]} + auto DynamicCG = + exp_ext::dynamic_command_group(Graph, {CGFA, CGFB, CGFC, CGFD}); + auto DynamicCGNode = Graph.add(DynamicCG); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + // Verify CGFA works with 3 arguments + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); + int Ref = PatternA + PatternB; + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == Ref); + } + + // Verify CGFB works with 2 arguments + // CHECK: <--- urCommandBufferUpdateKernelLaunchExp + // CHECK-SAME: .hNewKernel = [[KERNEL_HANDLE2]] + // CHECK-SAME: .numNewMemObjArgs = 0 + // CHECK-SAME: .numNewPointerArgs = 1 + // CHECK-SAME: .numNewValueArgs = 1 + // CHECK-SAME: .stype = UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC + // CHECK-SAME: .argIndex = 0 + // CHECK-SAME: .stype = UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC + // CHECK-SAME: .argIndex = 1 + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); + Ref = (PatternA + 1) + (PatternB + 1); + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == Ref); + } + + // Verify CGFC works with 1 argument + // CHECK: <--- urCommandBufferUpdateKernelLaunchExp + // CHECK-SAME: .hNewKernel = [[KERNEL_HANDLE3]] + // CHECK-SAME: .numNewMemObjArgs = 0 + // CHECK-SAME: .numNewPointerArgs = 1 + // CHECK-SAME: .numNewValueArgs = 0 + // CHECK-SAME: .stype = UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC + // CHECK-SAME: .argIndex = 0 + DynamicCG.set_active_cgf(2); + ExecGraph.update(DynamicCGNode); + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); + Ref = PatternA - PatternB; + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == Ref); + } + + // Verify CGFD works with 4 arguments + // CHECK: <--- urCommandBufferUpdateKernelLaunchExp + // CHECK-SAME: .hNewKernel = [[KERNEL_HANDLE4]] + // CHECK-SAME: .numNewMemObjArgs = 0 + // CHECK-SAME: .numNewPointerArgs = 1 + // CHECK-SAME: .numNewValueArgs = 3 + // CHECK-SAME: .stype = UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC + // CHECK-SAME: .argIndex = 0 + // CHECK-SAME: .stype = UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC + // CHECK-SAME: .argIndex = 1 + // CHECK-SAME: .stype = UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC + // CHECK-SAME: .argIndex = 2 + // CHECK-SAME: .stype = UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC + // CHECK-SAME: .argIndex = 3 + DynamicCG.set_active_cgf(3); + ExecGraph.update(DynamicCGNode); + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); + Ref = PatternA + PatternB + PatternC; + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == Ref); + } + + sycl::free(Ptr, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_event_deps.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_event_deps.cpp new file mode 100644 index 0000000000000..0964f6e0c354e --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_event_deps.cpp @@ -0,0 +1,72 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// XFAIL: level_zero +// XFAIL-TRACKER: OFNAAO-307 + +// Tests adding a dynamic command-group node to a graph using graph limited +// events for dependencies. + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + int *PtrA = malloc_device(Size, Queue); + int *PtrB = malloc_device(Size, Queue); + int *PtrC = malloc_device(Size, Queue); + std::vector HostData(Size); + + Graph.begin_recording(Queue); + int PatternA = 42; + auto EventA = Queue.fill(PtrA, PatternA, Size); + int PatternB = 0xA; + auto EventB = Queue.fill(PtrB, PatternB, Size); + Graph.end_recording(Queue); + + auto CGFA = [&](handler &CGH) { + CGH.depends_on({EventA, EventB}); + CGH.parallel_for(Size, [=](item<1> Item) { + auto I = Item.get_id(); + PtrC[I] = PtrA[I] * PtrB[I]; + }); + }; + + auto CGFB = [&](handler &CGH) { + CGH.depends_on({EventA, EventB}); + CGH.parallel_for(Size, [=](item<1> Item) { + auto I = Item.get_id(); + PtrC[I] = PtrA[I] + PtrB[I]; + }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(PtrC, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == PatternA * PatternB); + } + + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(PtrC, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == PatternA + PatternB); + } + + sycl::free(PtrA, Queue); + sycl::free(PtrB, Queue); + sycl::free(PtrC, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_ndrange.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_ndrange.cpp new file mode 100644 index 0000000000000..237e9173f253e --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_ndrange.cpp @@ -0,0 +1,72 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// XFAIL: level_zero +// XFAIL-TRACKER: OFNAAO-307 + +// Tests updating a dynamic command-group node where the dynamic command-groups +// have different ranges/nd-ranges + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + int *Ptr = malloc_device(Size, Queue); + std::vector HostData(Size); + + auto RootNode = + Graph.add([&](handler &cgh) { cgh.memset(Ptr, 0, Size * sizeof(int)); }); + + int PatternA = 42; + size_t ItemsA = Size / 2; + sycl::range<1> RangeA{ItemsA}; + auto CGFA = [&](handler &CGH) { + CGH.parallel_for(RangeA, + [=](item<1> Item) { Ptr[Item.get_id()] = PatternA; }); + }; + + int PatternB = 0xA; + size_t ItemsB = Size / 4; + sycl::nd_range<1> RangeB{sycl::range{ItemsB}, sycl::range{16}}; + auto CGFB = [&](handler &CGH) { + CGH.parallel_for( + RangeB, [=](nd_item<1> Item) { Ptr[Item.get_global_id()] = PatternB; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB}); + auto DynamicCGNode = + Graph.add(DynamicCG, exp_ext::property::node::depends_on(RootNode)); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + if (i < ItemsA) { + assert(HostData[i] == PatternA); + } else { + assert(HostData[i] == 0); + } + } + + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + if (i < ItemsB) { + assert(HostData[i] == PatternB); + } else { + assert(HostData[i] == 0); + } + } + + sycl::free(Ptr, Queue); + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_ndrange_3D.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_ndrange_3D.cpp new file mode 100644 index 0000000000000..261ac6ecf5c3b --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_ndrange_3D.cpp @@ -0,0 +1,80 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// XFAIL: level_zero +// XFAIL-TRACKER: OFNAAO-307 + +// Tests updating a dynamic command-group node where the dynamic command-groups +// have different range/nd-range dimensions + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + const size_t N = 64; + int *Ptr = malloc_device(N, Queue); + std::vector HostData(N); + + auto RootNode = + Graph.add([&](handler &cgh) { cgh.memset(Ptr, 0, N * sizeof(int)); }); + + int PatternA = 42; + sycl::range<1> RangeA{N}; + auto CGFA = [&](handler &CGH) { + CGH.parallel_for(RangeA, + [=](item<1> Item) { Ptr[Item.get_id()] = PatternA; }); + }; + + int PatternB = 0xA; + sycl::nd_range<3> RangeB{sycl::range{4, 4, 4}, sycl::range{2, 2, 2}}; + auto CGFB = [&](handler &CGH) { + CGH.parallel_for(RangeB, [=](nd_item<3> Item) { + Ptr[Item.get_global_linear_id()] = PatternB; + }); + }; + + int PatternC = 7; + sycl::range<2> RangeC{8, 8}; + auto CGFC = [&](handler &CGH) { + CGH.parallel_for( + RangeC, [=](item<2> Item) { Ptr[Item.get_linear_id()] = PatternC; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB, CGFC}); + auto DynamicCGNode = + Graph.add(DynamicCG, exp_ext::property::node::depends_on(RootNode)); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), N).wait(); + for (size_t i = 0; i < N; i++) { + assert(HostData[i] == PatternA); + } + + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), N).wait(); + for (size_t i = 0; i < N; i++) { + assert(HostData[i] == PatternB); + } + + DynamicCG.set_active_cgf(2); + ExecGraph.update(DynamicCGNode); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), N).wait(); + for (size_t i = 0; i < N; i++) { + assert(HostData[i] == PatternC); + } + + sycl::free(Ptr, Queue); + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_overwrite_range.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_overwrite_range.cpp new file mode 100644 index 0000000000000..04697077bec36 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_overwrite_range.cpp @@ -0,0 +1,57 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// Tests how the nd-range of a node is overwritten by the active command-group + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + std::vector HostData(Size); + int *Ptr = malloc_device(Size, Queue); + Queue.memset(Ptr, 0, Size * sizeof(int)).wait(); + + int PatternA = 42; + auto CGFA = [&](handler &CGH) { + CGH.parallel_for(Size, + [=](item<1> Item) { Ptr[Item.get_id()] = PatternA; }); + }; + + int PatternB = 0xA; + auto CGFB = [&](handler &CGH) { + CGH.parallel_for(Size, + [=](item<1> Item) { Ptr[Item.get_id()] = PatternB; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + + size_t NewRange = Size / 2; + sycl::range<1> UpdateRange(NewRange); + DynamicCGNode.update_range(UpdateRange); + + DynamicCG.set_active_cgf(1); + + // Check that the UpdateRange from active CGF 0 is preserved + DynamicCG.set_active_cgf(0); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + if (i < NewRange) { + assert(HostData[i] == PatternA); + } else { + assert(HostData[i] == 0); + } + } + + sycl::free(Ptr, Queue); + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_parameters.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_parameters.cpp new file mode 100644 index 0000000000000..7049b5bdde305 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_parameters.cpp @@ -0,0 +1,71 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// XFAIL: level_zero +// XFAIL-TRACKER: OFNAAO-307 + +// Tests updating kernel code using dynamic command-groups that have different +// parameters in each command-group. + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + int *PtrA = malloc_device(Size, Queue); + int *PtrB = malloc_device(Size, Queue); + + std::vector HostDataA(Size); + std::vector HostDataB(Size); + + Queue.memset(PtrA, 0, Size * sizeof(int)); + Queue.memset(PtrB, 0, Size * sizeof(int)); + Queue.wait(); + + int PatternA = 0xA; + auto CGFA = [&](handler &CGH) { + CGH.parallel_for(Size, + [=](item<1> Item) { PtrA[Item.get_id()] = PatternA; }); + }; + + int PatternB = 42; + auto CGFB = [&](handler &CGH) { + CGH.parallel_for(Size, + [=](item<1> Item) { PtrB[Item.get_id()] = PatternB; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(PtrA, HostDataA.data(), Size); + Queue.copy(PtrB, HostDataB.data(), Size); + Queue.wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostDataA[i] == PatternA); + assert(HostDataB[i] == 0); + } + + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(PtrA, HostDataA.data(), Size); + Queue.copy(PtrB, HostDataB.data(), Size); + Queue.wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostDataA[i] == PatternA); + assert(HostDataB[i] == PatternB); + } + + sycl::free(PtrA, Queue); + sycl::free(PtrB, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_shared_nodes.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_shared_nodes.cpp new file mode 100644 index 0000000000000..1f98200791b6c --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_shared_nodes.cpp @@ -0,0 +1,75 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// XFAIL: level_zero +// XFAIL-TRACKER: OFNAAO-307 + +// Tests using the same dynamic command-group in more than one graph node. + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + std::vector HostData(Size); + int *Ptr = malloc_device(Size, Queue); + + auto RootNode = + Graph.add([&](handler &CGH) { CGH.memset(Ptr, 0, Size * sizeof(int)); }); + + int PatternA = 42; + auto CGFA = [&](handler &CGH) { + CGH.parallel_for(Size, + [=](item<1> Item) { Ptr[Item.get_id()] += PatternA; }); + }; + + int PatternB = 0xA; + auto CGFB = [&](handler &CGH) { + CGH.parallel_for(Size, + [=](item<1> Item) { Ptr[Item.get_id()] += PatternB; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB}); + auto Node1 = + Graph.add(DynamicCG, exp_ext::property::node::depends_on(RootNode)); + + auto Node2 = Graph.add( + [&](handler &cgh) { + cgh.parallel_for(Size, [=](item<1> Item) { Ptr[Item.get_id()] *= 2; }); + }, + exp_ext::property::node::depends_on(Node1)); + + auto Node3 = Graph.add(DynamicCG, exp_ext::property::node::depends_on(Node2)); + + // This ND-Range affects Node 1 as well, as the range is tied to the node. + sycl::range<1> Node3Range(Size / 2); + Node3.update_range(Node3Range); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + int Ref = (i < Node3Range.get(0)) ? (PatternA * 3) : 0; + assert(HostData[i] == Ref); + } + + DynamicCG.set_active_cgf(1); + ExecGraph.update(Node1); + ExecGraph.update(Node3); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); + int Ref = (PatternB * 3); + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == Ref); + } + + sycl::free(Ptr, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_update_before_finalize.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_update_before_finalize.cpp new file mode 100644 index 0000000000000..8c0c705960ef6 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_update_before_finalize.cpp @@ -0,0 +1,46 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// Tests updating a dynamic command-group node after it has been added to +// a graph but before the graph has been finalized + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + int *Ptr = malloc_device(Size, Queue); + std::vector HostData(Size); + + int PatternA = 42; + auto CGFA = [&](handler &CGH) { + CGH.parallel_for(Size, + [=](item<1> Item) { Ptr[Item.get_id()] = PatternA; }); + }; + + int PatternB = 0xA; + auto CGFB = [&](handler &CGH) { + CGH.parallel_for(Size, + [=](item<1> Item) { Ptr[Item.get_id()] = PatternB; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + DynamicCG.set_active_cgf(1); + auto ExecGraph = Graph.finalize(); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == PatternB); + } + + sycl::free(Ptr, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_usm.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_usm.cpp new file mode 100644 index 0000000000000..100701f7b62aa --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_usm.cpp @@ -0,0 +1,56 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// XFAIL: level_zero +// XFAIL-TRACKER: OFNAAO-307 + +// Tests updating usm kernel code using dynamic command-groups + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + int *Ptr = malloc_device(Size, Queue); + std::vector HostData(Size); + + int PatternA = 42; + auto CGFA = [&](handler &CGH) { + CGH.parallel_for(Size, + [=](item<1> Item) { Ptr[Item.get_id()] = PatternA; }); + }; + + int PatternB = 0xA; + auto CGFB = [&](handler &CGH) { + CGH.parallel_for(Size, + [=](item<1> Item) { Ptr[Item.get_id()] = PatternB; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == PatternA); + } + + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == PatternB); + } + + sycl::free(Ptr, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_with_all_dyn_params.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_with_all_dyn_params.cpp new file mode 100644 index 0000000000000..53b34d1add289 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_with_all_dyn_params.cpp @@ -0,0 +1,120 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// XFAIL: level_zero +// XFAIL-TRACKER: OFNAAO-307 + +// Tests using a dynamic command-group object with dynamic parameters inside it + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + int *PtrA = malloc_device(Size, Queue); + int *PtrB = malloc_device(Size, Queue); + int *PtrC = malloc_device(Size, Queue); + + std::vector HostDataA(Size); + std::vector HostDataB(Size); + std::vector HostDataC(Size); + + exp_ext::dynamic_parameter DynParam1(Graph, PtrA); + exp_ext::dynamic_parameter DynParam2(Graph, PtrC); + + auto CGFA = [&](handler &CGH) { + CGH.set_arg(0, DynParam1); + // TODO: Use the free function kernel extension instead of regular kernels + // when available. + CGH.single_task([=]() { + for (size_t i = 0; i < Size; i++) { + PtrA[i] = i; + } + }); + }; + + auto CGFB = [&](handler &CGH) { + CGH.set_arg(0, DynParam1); + // TODO: Use the free function kernel extension instead of regular kernels + // when available. + CGH.single_task([=]() { + for (size_t i = 0; i < Size; i++) { + PtrA[i] = i; + } + }); + }; + + auto CGFC = [&](handler &CGH) { + CGH.set_arg(0, DynParam2); + // TODO: Use the free function kernel extension instead of regular kernels + // when available. + CGH.single_task([=]() { + for (size_t i = 0; i < Size; i++) { + PtrC[i] = i; + } + }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB, CGFC}); + auto DynamicCGNode = Graph.add(DynamicCG); + auto KernelNode = Graph.add( + [&](handler &CGH) { + CGH.set_arg(0, DynParam2); + // TODO: Use the free function kernel extension instead of regular + // kernels when available. + CGH.single_task([=]() { + for (size_t i = 0; i < Size; i++) { + PtrC[i] += i; + } + }); + }, + exp_ext::property::node::depends_on(DynamicCGNode)); + + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + auto ExecuteGraphAndVerifyResults = [&](bool A, bool B, bool C) { + Queue.memset(PtrA, 0, Size * sizeof(int)); + Queue.memset(PtrB, 0, Size * sizeof(int)); + Queue.memset(PtrC, 0, Size * sizeof(int)); + Queue.wait(); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(PtrA, HostDataA.data(), Size); + Queue.copy(PtrB, HostDataB.data(), Size); + Queue.copy(PtrC, HostDataC.data(), Size); + Queue.wait(); + + for (size_t i = 0; i < Size; i++) { + assert(HostDataA[i] == (A ? i : 0)); + assert(HostDataB[i] == (B ? i : 0)); + assert(HostDataC[i] == (C ? (2 * i) : i)); + } + }; + ExecuteGraphAndVerifyResults(true, false, false); + + DynParam1.update(PtrB); + ExecGraph.update(DynamicCGNode); + ExecuteGraphAndVerifyResults(false, true, false); + + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + ExecuteGraphAndVerifyResults(false, true, false); + + DynamicCG.set_active_cgf(2); + // Should be ignored as DynParam1 not used in active node + DynParam1.update(PtrA); + ExecGraph.update(DynamicCGNode); + ExecuteGraphAndVerifyResults(false, false, true); + + sycl::free(PtrA, Queue); + sycl::free(PtrB, Queue); + sycl::free(PtrC, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_with_different_type_dyn_params.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_with_different_type_dyn_params.cpp new file mode 100644 index 0000000000000..00482185ebc27 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_with_different_type_dyn_params.cpp @@ -0,0 +1,136 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// XFAIL: level_zero +// XFAIL-TRACKER: OFNAAO-307 + +// Tests using a dynamic command-group object with dynamic parameters of +// different types + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + int *PtrA = malloc_device(Size, Queue); + int *PtrB = malloc_device(Size, Queue); + int *PtrC = malloc_device(Size, Queue); + + std::vector HostDataA(Size); + std::vector HostDataB(Size); + std::vector HostDataC(Size); + + int ScalarValue = 17; + exp_ext::dynamic_parameter DynParamScalar(Graph, ScalarValue); + exp_ext::dynamic_parameter DynParamPtr(Graph, PtrA); + + // Kernel has 2 dynamic parameters, one of scalar type & one of ptr type + auto CGFA = [&](handler &CGH) { + CGH.set_arg(0, DynParamPtr); + CGH.set_arg(1, DynParamScalar); + // TODO: Use the free function kernel extension instead of regular kernels + // when available. + CGH.single_task([=]() { + for (size_t i = 0; i < Size; i++) { + PtrA[i] = ScalarValue; + } + }); + }; + + // Kernel has a single argument, a dynamic parameter of ptr type + auto CGFB = [&](handler &CGH) { + CGH.set_arg(0, DynParamPtr); + // TODO: Use the free function kernel extension instead of regular kernels + // when available. + CGH.single_task([=]() { + for (size_t i = 0; i < Size; i++) { + PtrA[i] = ScalarValue; + } + }); + }; + + // Kernel has a two arguments, an immutable ptr type argument and a + // dynamic parameter of scalar type. + auto CGFC = [&](handler &CGH) { + CGH.set_arg(1, DynParamScalar); + // TODO: Use the free function kernel extension instead of regular kernels + // when available. + CGH.single_task([=]() { + for (size_t i = 0; i < Size; i++) { + PtrC[i] = ScalarValue; + } + }); + }; + + // Kernel has a single argument, of immutable pointer type + auto CGFD = [&](handler &CGH) { + // TODO: Use the free function kernel extension instead of regular kernels + // when available. + CGH.single_task([=]() { + for (size_t i = 0; i < Size; i++) { + PtrA[i] = ScalarValue; + } + }); + }; + + auto DynamicCG = + exp_ext::dynamic_command_group(Graph, {CGFA, CGFB, CGFC, CGFD}); + auto DynamicCGNode = Graph.add(DynamicCG); + + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + auto ExecuteGraphAndVerifyResults = [&](int A, int B, int C) { + Queue.memset(PtrA, 0, Size * sizeof(int)); + Queue.memset(PtrB, 0, Size * sizeof(int)); + Queue.memset(PtrC, 0, Size * sizeof(int)); + Queue.wait(); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(PtrA, HostDataA.data(), Size); + Queue.copy(PtrB, HostDataB.data(), Size); + Queue.copy(PtrC, HostDataC.data(), Size); + Queue.wait(); + + for (size_t i = 0; i < Size; i++) { + assert(HostDataA[i] == A); + assert(HostDataB[i] == B); + assert(HostDataC[i] == C); + } + }; + // CGFA using PtrA and ScalarValue in its dynamic parameters + ExecuteGraphAndVerifyResults(ScalarValue, 0, 0); + + // CGFA using PtrB and UpdatedScalarValue in its dynamic parameters + DynParamPtr.update(PtrB); + int UpdatedScalarValue = 42; + DynParamScalar.update(UpdatedScalarValue); + ExecGraph.update(DynamicCGNode); + ExecuteGraphAndVerifyResults(0, UpdatedScalarValue, 0); + + // CGFB using PtrB in its dynamic parameter and immutable ScalarValue + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + ExecuteGraphAndVerifyResults(0, ScalarValue, false); + + // CGFC using immutable PtrC and UpdatedScalarValue in its dynamic parameter + DynamicCG.set_active_cgf(2); + ExecGraph.update(DynamicCGNode); + ExecuteGraphAndVerifyResults(0, 0, UpdatedScalarValue); + + // CGFD using immutable PtrA and immutable ScalarValue for arguments + DynamicCG.set_active_cgf(3); + ExecGraph.update(DynamicCGNode); + ExecuteGraphAndVerifyResults(ScalarValue, 0, 0); + + sycl::free(PtrA, Queue); + sycl::free(PtrB, Queue); + sycl::free(PtrC, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_with_some_dyn_params.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_with_some_dyn_params.cpp new file mode 100644 index 0000000000000..3213fc4eec2fe --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_with_some_dyn_params.cpp @@ -0,0 +1,106 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// XFAIL: level_zero +// XFAIL-TRACKER: OFNAAO-307 + +// Tests using a dynamic command-group object where some but not all the +// command-groups use dynamic parameters. + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + int *PtrA = malloc_device(Size, Queue); + int *PtrB = malloc_device(Size, Queue); + int *PtrC = malloc_device(Size, Queue); + + std::vector HostDataA(Size); + std::vector HostDataB(Size); + std::vector HostDataC(Size); + + exp_ext::dynamic_parameter DynParam(Graph, PtrA); + + auto CGFA = [&](handler &CGH) { + CGH.set_arg(0, DynParam); + // TODO: Use the free function kernel extension instead of regular kernels + // when available. + CGH.single_task([=]() { + for (size_t i = 0; i < Size; i++) { + PtrA[i] = i; + } + }); + }; + + auto CGFB = [&](handler &CGH) { + CGH.set_arg(0, DynParam); + // TODO: Use the free function kernel extension instead of regular kernels + // when available. + CGH.single_task([=]() { + for (size_t i = 0; i < Size; i++) { + PtrA[i] = i; + } + }); + }; + + auto CGFC = [&](handler &CGH) { + CGH.single_task([=]() { + for (size_t i = 0; i < Size; i++) { + PtrC[i] = i; + } + }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB, CGFC}); + auto DynamicCGNode = Graph.add(DynamicCG); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + auto ExecuteGraphAndVerifyResults = [&](bool A, bool B, bool C) { + Queue.memset(PtrA, 0, Size * sizeof(int)); + Queue.memset(PtrB, 0, Size * sizeof(int)); + Queue.memset(PtrC, 0, Size * sizeof(int)); + Queue.wait(); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(PtrA, HostDataA.data(), Size); + Queue.copy(PtrB, HostDataB.data(), Size); + Queue.copy(PtrC, HostDataC.data(), Size); + Queue.wait(); + + for (size_t i = 0; i < Size; i++) { + assert(HostDataA[i] == (A ? i : 0)); + assert(HostDataB[i] == (B ? i : 0)); + assert(HostDataC[i] == (C ? i : 0)); + } + }; + // CGFA with DynParam using PtrA + ExecuteGraphAndVerifyResults(true, false, false); + + // CGFA with DynParam using PtrB + DynParam.update(PtrB); + ExecGraph.update(DynamicCGNode); + ExecuteGraphAndVerifyResults(false, true, false); + + // CGFB with DynParam using PtrB + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + ExecuteGraphAndVerifyResults(false, true, false); + + // CGFC unconditionally using PtrC + DynamicCG.set_active_cgf(2); + ExecGraph.update(DynamicCGNode); + ExecuteGraphAndVerifyResults(false, false, true); + + sycl::free(PtrA, Queue); + sycl::free(PtrB, Queue); + sycl::free(PtrC, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/update_ndrange_to_range.cpp b/sycl/test-e2e/Graph/Update/update_ndrange_to_range.cpp new file mode 100644 index 0000000000000..43db9d172e618 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/update_ndrange_to_range.cpp @@ -0,0 +1,54 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// + +// Tests updating a graph node from sycl::nd_range to sycl::range + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + int *PtrA = malloc_device(Size, Queue); + + std::vector HostDataA(Size); + + Queue.memset(PtrA, 0, Size * sizeof(int)).wait(); + + nd_range<1> NDRange{range{Size}, range{32}}; + + auto KernelNode = Graph.add([&](handler &cgh) { + cgh.parallel_for(NDRange, [=](nd_item<1> Item) { + size_t GlobalID = Item.get_global_id(); + PtrA[GlobalID] += GlobalID; + }); + }); + + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + // first half of PtrA should be filled with values + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(PtrA, HostDataA.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostDataA[i] == i); + } + + // Update NDRange to target first half only + size_t UpdateSize = Size / 2; + KernelNode.update_range(range<1>{UpdateSize}); + ExecGraph.update(KernelNode); + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(PtrA, HostDataA.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostDataA[i] == (i >= UpdateSize ? i : i * 2)); + } + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/update_range_to_ndrange.cpp b/sycl/test-e2e/Graph/Update/update_range_to_ndrange.cpp new file mode 100644 index 0000000000000..94052c8379b58 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/update_range_to_ndrange.cpp @@ -0,0 +1,55 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// + +// Tests updating a graph node from using a sycl::range to a sycl::nd_range + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + int *PtrA = malloc_device(Size, Queue); + + std::vector HostDataA(Size); + + Queue.memset(PtrA, 0, Size * sizeof(int)).wait(); + + range<1> Range{Size}; + + auto KernelNode = Graph.add([&](handler &cgh) { + cgh.parallel_for(Range, [=](item<1> Item) { + size_t GlobalID = Item.get_id(); + PtrA[GlobalID] += GlobalID; + }); + }); + + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + // first half of PtrA should be filled with values + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(PtrA, HostDataA.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostDataA[i] == i); + } + + // Update NDRange to target first half only + size_t UpdateSize = Size / 2; + nd_range<1> NDRange{range{UpdateSize}, range{32}}; + KernelNode.update_nd_range(NDRange); + ExecGraph.update(KernelNode); + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(PtrA, HostDataA.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostDataA[i] == (i >= UpdateSize ? i : i * 2)); + } + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/whole_update_dynamic_cgf.cpp b/sycl/test-e2e/Graph/Update/whole_update_dynamic_cgf.cpp new file mode 100644 index 0000000000000..0e8b87c0725f2 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/whole_update_dynamic_cgf.cpp @@ -0,0 +1,76 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// XFAIL: level_zero +// XFAIL-TRACKER: OFNAAO-307 + +// Tests interaction of whole graph update and dynamic command-groups + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph GraphA{Queue.get_context(), Queue.get_device()}; + exp_ext::command_graph GraphB{Queue.get_context(), Queue.get_device()}; + + int *Ptr = malloc_device(Size, Queue); + std::vector HostData(Size); + + int PatternA = 42; + auto CGFA = [&](handler &CGH) { + CGH.parallel_for(Size, + [=](item<1> Item) { Ptr[Item.get_id()] = PatternA; }); + }; + + int PatternB = 0xA; + auto CGFB = [&](handler &CGH) { + CGH.parallel_for(Size, + [=](item<1> Item) { Ptr[Item.get_id()] = PatternB; }); + }; + + auto DynamicCGA = exp_ext::dynamic_command_group(GraphA, {CGFA, CGFB}); + auto DynamicCGNodeA = GraphA.add(DynamicCGA); + + auto DynamicCGB = exp_ext::dynamic_command_group(GraphB, {CGFA, CGFB}); + auto DynamicCGNodeB = GraphB.add(DynamicCGB); + DynamicCGB.set_active_cgf(1); // Check if doesn't affect GraphA + + auto ExecGraph = GraphA.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == PatternA); + } + + // Graph B has CGF B as active, while Graph A has CGF A as active. + // Different command-groups should error due to being different + // kernels. + std::error_code ErrorCode = make_error_code(sycl::errc::success); + try { + ExecGraph.update(GraphB); + } catch (const sycl::exception &e) { + ErrorCode = e.code(); + } + assert(ErrorCode == sycl::errc::invalid); + + // Both ExecGraph and Graph B have CGFB as active, so + // whole graph update should be valid as graphs match. + DynamicCGA.set_active_cgf(1); + ExecGraph.update(DynamicCGNodeA); + ExecGraph.update(GraphB); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == PatternB); + } + + sycl::free(Ptr, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/whole_update_dynamic_param.cpp b/sycl/test-e2e/Graph/Update/whole_update_dynamic_param.cpp index c9a4922e7fd46..b894685a8bd87 100644 --- a/sycl/test-e2e/Graph/Update/whole_update_dynamic_param.cpp +++ b/sycl/test-e2e/Graph/Update/whole_update_dynamic_param.cpp @@ -5,8 +5,6 @@ // Extra run to check for immediate-command-list in Level Zero // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} -// REQUIRES: aspect-usm_shared_allocations - // Tests that whole graph update works when using dynamic parameters. #include "../graph_common.hpp" diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 11d85801727c7..621765568d50c 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3023,6 +3023,9 @@ _ZN4sycl3_V13ext6oneapi12experimental20pitched_alloc_deviceEPmRKNS3_16image_desc _ZN4sycl3_V13ext6oneapi12experimental20pitched_alloc_deviceEPmRKNS3_16image_descriptorERKNS0_6deviceERKNS0_7contextE _ZN4sycl3_V13ext6oneapi12experimental20pitched_alloc_deviceEPmmmjRKNS0_5queueE _ZN4sycl3_V13ext6oneapi12experimental20pitched_alloc_deviceEPmmmjRKNS0_6deviceERKNS0_7contextE +_ZN4sycl3_V13ext6oneapi12experimental21dynamic_command_group14set_active_cgfEm +_ZN4sycl3_V13ext6oneapi12experimental21dynamic_command_groupC1ERKNS3_13command_graphILNS3_11graph_stateE0EEERKSt6vectorISt8functionIFvRNS0_7handlerEEESaISF_EE +_ZN4sycl3_V13ext6oneapi12experimental21dynamic_command_groupC2ERKNS3_13command_graphILNS3_11graph_stateE0EEERKSt6vectorISt8functionIFvRNS0_7handlerEEESaISF_EE _ZN4sycl3_V13ext6oneapi12experimental21get_composite_devicesEv _ZN4sycl3_V13ext6oneapi12experimental22get_image_channel_typeENS3_16image_mem_handleERKNS0_5queueE _ZN4sycl3_V13ext6oneapi12experimental22get_image_channel_typeENS3_16image_mem_handleERKNS0_6deviceERKNS0_7contextE @@ -3081,6 +3084,7 @@ _ZN4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graph15begin_re _ZN4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graph15begin_recordingERNS0_5queueERKNS0_13property_listE _ZN4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graph24addGraphLeafDependenciesENS3_4nodeE _ZN4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graph7addImplERKSt6vectorINS3_4nodeESaIS7_EE +_ZN4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graph7addImplERNS3_21dynamic_command_groupERKSt6vectorINS3_4nodeESaIS9_EE _ZN4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graph7addImplESt8functionIFvRNS0_7handlerEEERKSt6vectorINS3_4nodeESaISC_EE _ZN4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graph9make_edgeERNS3_4nodeES7_ _ZN4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graphC1ERKNS0_5queueERKNS0_13property_listE @@ -3597,6 +3601,7 @@ _ZNK4sycl3_V13ext6oneapi12experimental12physical_mem10get_deviceEv _ZNK4sycl3_V13ext6oneapi12experimental12physical_mem11get_contextEv _ZNK4sycl3_V13ext6oneapi12experimental12physical_mem3mapEmmNS3_19address_access_modeEm _ZNK4sycl3_V13ext6oneapi12experimental12physical_mem4sizeEv +_ZNK4sycl3_V13ext6oneapi12experimental21dynamic_command_group14get_active_cgfEv _ZNK4sycl3_V13ext6oneapi12experimental4node14get_successorsEv _ZNK4sycl3_V13ext6oneapi12experimental4node16get_predecessorsEv _ZNK4sycl3_V13ext6oneapi12experimental4node8get_typeEv diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 2b6dbdb2fe52b..b0b7fc3f0112d 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -326,6 +326,15 @@ ??0dynamic_parameter_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@$$QEAV0123456@@Z ??0dynamic_parameter_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBV0123456@@Z ??0dynamic_parameter_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@V?$command_graph@$0A@@23456@_KPEBX@Z +?get_active_cgf@dynamic_command_group@experimental@oneapi@ext@_V1@sycl@@QEBA_KXZ +??4dynamic_command_group@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV012345@$$QEAV012345@@Z +?addImpl@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@IEAA?AVnode@34567@AEAVdynamic_command_group@34567@AEBV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@@Z +??0dynamic_command_group@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBV?$command_graph@$0A@@12345@AEBV?$vector@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@V?$allocator@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@@2@@std@@@Z +??0dynamic_command_group@experimental@oneapi@ext@_V1@sycl@@QEAA@$$QEAV012345@@Z +??0dynamic_command_group@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBV012345@@Z +?set_active_cgf@dynamic_command_group@experimental@oneapi@ext@_V1@sycl@@QEAAX_K@Z +??1dynamic_command_group@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ +??4dynamic_command_group@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV012345@AEBV012345@@Z ??0event@_V1@sycl@@AEAA@V?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@std@@@Z ??0event@_V1@sycl@@QEAA@$$QEAV012@@Z ??0event@_V1@sycl@@QEAA@AEBV012@@Z diff --git a/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp b/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp index 1542b5f34d7dc..90d95975a0245 100644 --- a/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp @@ -700,3 +700,166 @@ TEST_F(CommandGraphTest, RecordingWrongGraphDep) { }), sycl::exception); } + +// Error when a dynamic command-group is used with a graph belonging to a +// different graph. +TEST_F(CommandGraphTest, DynamicCommandGroupWrongGraph) { + experimental::command_graph Graph1{Queue.get_context(), Queue.get_device()}; + experimental::command_graph Graph2{Queue.get_context(), Queue.get_device()}; + auto CGF = [&](sycl::handler &CGH) { + CGH.single_task>([]() {}); + }; + + experimental::dynamic_command_group DynCG(Graph2, {CGF}); + ASSERT_THROW(Graph1.add(DynCG), sycl::exception); +} + +// Error when a non-kernel command-group is included in a dynamic command-group +TEST_F(CommandGraphTest, DynamicCommandGroupNotKernel) { + int *Ptr = malloc_device(1, Queue); + auto CGF = [&](sycl::handler &CGH) { CGH.memset(Ptr, 1, 0); }; + + experimental::command_graph Graph{Queue}; + ASSERT_THROW(experimental::dynamic_command_group DynCG(Graph, {CGF}), + sycl::exception); + sycl::free(Ptr, Queue); +} + +// Error if edges are not the same for all command-groups in dynamic command +// group, test using graph limited events to create edges +TEST_F(CommandGraphTest, DynamicCommandGroupMismatchEventEdges) { + size_t N = 32; + int *PtrA = malloc_device(N, Queue); + int *PtrB = malloc_device(N, Queue); + + experimental::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + Graph.begin_recording(Queue); + + auto EventA = Queue.submit([&](handler &CGH) { + CGH.parallel_for(N, [=](item<1> Item) { PtrA[Item.get_id()] = 1; }); + }); + + auto EventB = Queue.submit([&](handler &CGH) { + CGH.parallel_for(N, [=](item<1> Item) { PtrB[Item.get_id()] = 4; }); + }); + + Graph.end_recording(); + + auto CGFA = [&](handler &CGH) { + CGH.depends_on(EventA); + CGH.parallel_for(N, [=](item<1> Item) { PtrA[Item.get_id()] += 2; }); + }; + + auto CGFB = [&](handler &CGH) { + CGH.depends_on(EventB); + CGH.parallel_for(N, [=](item<1> Item) { PtrB[Item.get_id()] += 0xA; }); + }; + + experimental::dynamic_command_group DynCG(Graph, {CGFA, CGFB}); + ASSERT_THROW(Graph.add(DynCG), sycl::exception); + + sycl::free(PtrA, Queue); + sycl::free(PtrB, Queue); +} + +// Test that an exception is thrown when a graph isn't created with buffer +// property, but buffers are used. +TEST_F(CommandGraphTest, DynamicCommandGroupBufferThrows) { + size_t N = 32; + std::vector HostData(N, 0); + buffer Buf{HostData}; + Buf.set_write_back(false); + + experimental::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + auto CGFA = [&](handler &CGH) { + auto Acc = Buf.get_access(CGH); + CGH.parallel_for(N, [=](item<1> Item) { Acc[Item.get_id()] = 2; }); + }; + + auto CGFB = [&](handler &CGH) { + auto Acc = Buf.get_access(CGH); + CGH.parallel_for(N, [=](item<1> Item) { Acc[Item.get_id()] = 0xA; }); + }; + + experimental::dynamic_command_group DynCG(Graph, {CGFA, CGFB}); + ASSERT_THROW(Graph.add(DynCG), sycl::exception); +} + +// Test and exception is thrown when using a host-accessor to a buffer +// used in a non active CGF node in the graph. +TEST_F(CommandGraphTest, DynamicCommandGroupBufferHostAccThrows) { + size_t N = 32; + std::vector HostData(N, 0); + buffer Buf{HostData}; + Buf.set_write_back(false); + + int *Ptr = malloc_device(N, Queue); + + { + ext::oneapi::experimental::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {experimental::property::graph::assume_buffer_outlives_graph{}}}; + + auto CGFA = [&](handler &CGH) { + CGH.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] = 2; }); + }; + + auto CGFB = [&](handler &CGH) { + auto Acc = Buf.get_access(CGH); + CGH.parallel_for(N, [=](item<1> Item) { Acc[Item.get_id()] = 0xA; }); + }; + + experimental::dynamic_command_group DynCG(Graph, {CGFA, CGFB}); + ASSERT_NO_THROW(Graph.add(DynCG)); + + ASSERT_THROW({ host_accessor HostAcc{Buf}; }, sycl::exception); + } + + sycl::free(Ptr, Queue); +} + +// Error if edges are not the same for all command-groups in dynamic command +// group, test using accessors to create edges +TEST_F(CommandGraphTest, DynamicCommandGroupMismatchAccessorEdges) { + size_t N = 32; + std::vector HostData(N, 0); + buffer BufA{HostData}; + buffer BufB{HostData}; + BufA.set_write_back(false); + BufB.set_write_back(false); + + experimental::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {experimental::property::graph::assume_buffer_outlives_graph{}}}; + + Graph.begin_recording(Queue); + + Queue.submit([&](handler &CGH) { + auto AccA = BufA.get_access(CGH); + CGH.parallel_for(N, [=](item<1> Item) { AccA[Item.get_id()] = 1; }); + }); + + Queue.submit([&](handler &CGH) { + auto AccB = BufB.get_access(CGH); + CGH.parallel_for(N, [=](item<1> Item) { AccB[Item.get_id()] = 4; }); + }); + + Graph.end_recording(); + + auto CGFA = [&](handler &CGH) { + auto AccA = BufA.get_access(CGH); + CGH.parallel_for(N, [=](item<1> Item) { AccA[Item.get_id()] += 2; }); + }; + + auto CGFB = [&](handler &CGH) { + auto AccB = BufB.get_access(CGH); + CGH.parallel_for(N, [=](item<1> Item) { AccB[Item.get_id()] += 0xA; }); + }; + + experimental::dynamic_command_group DynCG(Graph, {CGFA, CGFB}); + ASSERT_THROW(Graph.add(DynCG), sycl::exception); +} diff --git a/sycl/unittests/Extensions/CommandGraph/Update.cpp b/sycl/unittests/Extensions/CommandGraph/Update.cpp index 94296507996dd..b943b9c43dd98 100644 --- a/sycl/unittests/Extensions/CommandGraph/Update.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Update.cpp @@ -136,7 +136,6 @@ TEST_F(CommandGraphTest, UpdateNodeTypeExceptions) { TEST_F(CommandGraphTest, UpdateRangeErrors) { // Test that the correct errors are throw when trying to update node ranges - nd_range<1> NDRange{range{128}, range{32}}; range<1> Range{128}; auto NodeNDRange = Graph.add([&](sycl::handler &cgh) { @@ -145,11 +144,12 @@ TEST_F(CommandGraphTest, UpdateRangeErrors) { // OK EXPECT_NO_THROW(NodeNDRange.update_nd_range(NDRange)); - // Can't update an nd_range node with a range - EXPECT_ANY_THROW(NodeNDRange.update_range(Range)); + // OK to update an nd_range node with a range of the same dimension + EXPECT_NO_THROW(NodeNDRange.update_range(Range)); // Can't update with a different number of dimensions EXPECT_ANY_THROW(NodeNDRange.update_nd_range( nd_range<2>{range<2>{128, 128}, range<2>{32, 32}})); + EXPECT_ANY_THROW(NodeNDRange.update_range(range<3>{32, 32, 1})); auto NodeRange = Graph.add([&](sycl::handler &cgh) { cgh.parallel_for>(range<1>{128}, [](item<1>) {}); @@ -157,10 +157,12 @@ TEST_F(CommandGraphTest, UpdateRangeErrors) { // OK EXPECT_NO_THROW(NodeRange.update_range(Range)); - // Can't update a range node with an nd_range - EXPECT_ANY_THROW(NodeRange.update_nd_range(NDRange)); + // OK to update a range node with an nd_range of the same dimension + EXPECT_NO_THROW(NodeRange.update_nd_range(NDRange)); // Can't update with a different number of dimensions EXPECT_ANY_THROW(NodeRange.update_range(range<2>{128, 128})); + EXPECT_ANY_THROW(NodeRange.update_nd_range( + nd_range<3>{range<3>{8, 8, 8}, range<3>{8, 8, 8}})); } class WholeGraphUpdateTest : public CommandGraphTest { From 21018abfa8e86dc2f5e6b6cfa02fbfae2b807912 Mon Sep 17 00:00:00 2001 From: Callum Fare Date: Fri, 8 Nov 2024 12:36:38 +0000 Subject: [PATCH 14/24] Bump UR tag to ed9fe09 for L0 loader update (#16013) Update UR tag to pull in https://github.com/oneapi-src/unified-runtime/pull/2279 --- sycl/cmake/modules/UnifiedRuntimeTag.cmake | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index b739ed6beadc8..0137ef119ce41 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -1,7 +1,7 @@ -# commit 09ae26af4e4e4301177db704b3b109ecd388c846 -# Merge: d2f69680 dd920712 +# commit ed9fe09f9987bbe86715f191e6dbe5695ebf0306 +# Merge: 09ae26af dc5246dd # Author: Callum Fare -# Date: Thu Nov 7 11:43:19 2024 +0000 -# Merge pull request #2233 from martygrant/martin/memory-cts-spec-gap-2 -# Improvements to align CTS and Spec for Memory -set(UNIFIED_RUNTIME_TAG 09ae26af4e4e4301177db704b3b109ecd388c846) +# Date: Fri Nov 8 10:29:25 2024 +0000 +# Merge pull request #2279 from nrspruit/update_loader_1_18 +# [L0] Update Level Zero Loader version to v1.18.5 +set(UNIFIED_RUNTIME_TAG ed9fe09f9987bbe86715f191e6dbe5695ebf0306) From e68ebeb8cf60cd6720907523d526772764d6d8c0 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Fri, 8 Nov 2024 08:10:06 -0800 Subject: [PATCH 15/24] [SYCL] SYCL_JIT support on Windows for kernel_compiler (#16018) Presently the kernel_compiler extension uses the sycl-jit compiler on Linux, but on Windows it simply reports back that the support is unavailable. In this PR sycl-jit is made available for Windows as well for its use by the kernel_compiler. In a follow-on PR I will remove `sycl_jit` as a source language enumeration and make the SYCL_JIT mechanism the default when the kernel_compiler is compiling SYCL code, as well as remove the invoking one, rather than trying to do everything at once. --- sycl-jit/CMakeLists.txt | 30 +++--- sycl-jit/jit-compiler/CMakeLists.txt | 4 + sycl-jit/jit-compiler/include/KernelFusion.h | 38 +++++--- sycl-jit/jit-compiler/lib/KernelFusion.cpp | 30 +++--- .../lib/rtc/DeviceCompilation.cpp | 47 +++++++++ sycl-jit/passes/CMakeLists.txt | 95 ++++++++++--------- sycl-jit/passes/target/TargetFusionInfo.cpp | 9 +- sycl/CMakeLists.txt | 5 - sycl/source/detail/jit_compiler.cpp | 14 +++ sycl/source/detail/os_util.cpp | 4 +- sycl/source/detail/scheduler/scheduler.cpp | 6 +- sycl/source/detail/windows_os_utils.hpp | 2 + .../kernel_compiler_sycl_jit.cpp | 12 +-- sycl/test/CMakeLists.txt | 5 +- 14 files changed, 193 insertions(+), 108 deletions(-) diff --git a/sycl-jit/CMakeLists.txt b/sycl-jit/CMakeLists.txt index 874856a63d363..b790455ceeed0 100644 --- a/sycl-jit/CMakeLists.txt +++ b/sycl-jit/CMakeLists.txt @@ -9,19 +9,23 @@ set(SYCL_JIT_BASE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) # directories, similar to how clang/CMakeLists.txt does it. set(LLVM_SPIRV_INCLUDE_DIRS "${LLVM_MAIN_SRC_DIR}/../llvm-spirv/include") -# Set library-wide warning options. -set(SYCL_JIT_WARNING_FLAGS -Wall -Wextra) +if (NOT WIN32 AND NOT CYGWIN) + # Set library-wide warning options. + set(SYCL_JIT_WARNING_FLAGS -Wall -Wextra) -option(SYCL_JIT_ENABLE_WERROR "Treat all warnings as errors in SYCL kernel JIT library" ON) -if(SYCL_JIT_ENABLE_WERROR) - list(APPEND SYCL_JIT_WARNING_FLAGS -Werror) -endif(SYCL_JIT_ENABLE_WERROR) + option(SYCL_JIT_ENABLE_WERROR "Treat all warnings as errors in SYCL kernel JIT library" ON) + if(SYCL_JIT_ENABLE_WERROR) + list(APPEND SYCL_JIT_WARNING_FLAGS -Werror) + endif(SYCL_JIT_ENABLE_WERROR) +endif() -if(WIN32) - message(WARNING "Kernel JIT not yet supported on Windows") -else(WIN32) - add_subdirectory(common) - add_subdirectory(jit-compiler) - add_subdirectory(passes) + +add_subdirectory(common) +add_subdirectory(jit-compiler) +add_subdirectory(passes) + +# Loadable plugins for opt aren't supported on Windows, +# so we can't execute the tests. +if (NOT WIN32 AND NOT CYGWIN) add_subdirectory(test) -endif(WIN32) +endif() diff --git a/sycl-jit/jit-compiler/CMakeLists.txt b/sycl-jit/jit-compiler/CMakeLists.txt index 09af2de6853ae..6dc5154486c6f 100644 --- a/sycl-jit/jit-compiler/CMakeLists.txt +++ b/sycl-jit/jit-compiler/CMakeLists.txt @@ -40,6 +40,10 @@ add_llvm_library(sycl-jit clangSerialization ) +if(WIN32) + target_link_libraries(sycl-jit PRIVATE Shlwapi) +endif() + target_compile_options(sycl-jit PRIVATE ${SYCL_JIT_WARNING_FLAGS}) # Mark LLVM and SPIR-V headers as system headers to ignore warnigns in them. diff --git a/sycl-jit/jit-compiler/include/KernelFusion.h b/sycl-jit/jit-compiler/include/KernelFusion.h index f149e05692627..d3575f33189aa 100644 --- a/sycl-jit/jit-compiler/include/KernelFusion.h +++ b/sycl-jit/jit-compiler/include/KernelFusion.h @@ -9,6 +9,12 @@ #ifndef SYCL_FUSION_JIT_COMPILER_KERNELFUSION_H #define SYCL_FUSION_JIT_COMPILER_KERNELFUSION_H +#ifdef _WIN32 +#define KF_EXPORT_SYMBOL __declspec(dllexport) +#else +#define KF_EXPORT_SYMBOL +#endif + #include "Kernel.h" #include "Options.h" #include "Parameter.h" @@ -55,25 +61,31 @@ extern "C" { #ifdef __clang__ #pragma clang diagnostic ignored "-Wreturn-type-c-linkage" #endif // __clang__ -JITResult fuseKernels(View KernelInformation, - const char *FusedKernelName, - View Identities, - BarrierFlags BarriersFlags, - View Internalization, - View JITConstants); -JITResult materializeSpecConstants(const char *KernelName, - jit_compiler::SYCLKernelBinaryInfo &BinInfo, - View SpecConstBlob); +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4190) +#endif // _MSC_VER + +KF_EXPORT_SYMBOL JITResult +fuseKernels(View KernelInformation, const char *FusedKernelName, + View Identities, BarrierFlags BarriersFlags, + View Internalization, + View JITConstants); + +KF_EXPORT_SYMBOL JITResult materializeSpecConstants( + const char *KernelName, jit_compiler::SYCLKernelBinaryInfo &BinInfo, + View SpecConstBlob); -JITResult compileSYCL(InMemoryFile SourceFile, View IncludeFiles, - View UserArgs); +KF_EXPORT_SYMBOL JITResult compileSYCL(InMemoryFile SourceFile, + View IncludeFiles, + View UserArgs); /// Clear all previously set options. -void resetJITConfiguration(); +KF_EXPORT_SYMBOL void resetJITConfiguration(); /// Add an option to the configuration. -void addToJITConfiguration(OptionStorage &&Opt); +KF_EXPORT_SYMBOL void addToJITConfiguration(OptionStorage &&Opt); } // end of extern "C" diff --git a/sycl-jit/jit-compiler/lib/KernelFusion.cpp b/sycl-jit/jit-compiler/lib/KernelFusion.cpp index 81037438061ae..86317c23e78de 100644 --- a/sycl-jit/jit-compiler/lib/KernelFusion.cpp +++ b/sycl-jit/jit-compiler/lib/KernelFusion.cpp @@ -71,10 +71,9 @@ static bool isTargetFormatSupported(BinaryFormat TargetFormat) { } } -extern "C" JITResult -materializeSpecConstants(const char *KernelName, - jit_compiler::SYCLKernelBinaryInfo &BinInfo, - View SpecConstBlob) { +extern "C" KF_EXPORT_SYMBOL JITResult materializeSpecConstants( + const char *KernelName, jit_compiler::SYCLKernelBinaryInfo &BinInfo, + View SpecConstBlob) { auto &JITCtx = JITContext::getInstance(); TargetInfo TargetInfo = ConfigHelper::get(); @@ -115,12 +114,11 @@ materializeSpecConstants(const char *KernelName, return JITResult{MaterializerKernelInfo}; } -extern "C" JITResult fuseKernels(View KernelInformation, - const char *FusedKernelName, - View Identities, - BarrierFlags BarriersFlags, - View Internalization, - View Constants) { +extern "C" KF_EXPORT_SYMBOL JITResult +fuseKernels(View KernelInformation, const char *FusedKernelName, + View Identities, BarrierFlags BarriersFlags, + View Internalization, + View Constants) { std::vector KernelsToFuse; llvm::transform(KernelInformation, std::back_inserter(KernelsToFuse), @@ -236,9 +234,9 @@ extern "C" JITResult fuseKernels(View KernelInformation, return JITResult{FusedKernelInfo}; } -extern "C" JITResult compileSYCL(InMemoryFile SourceFile, - View IncludeFiles, - View UserArgs) { +extern "C" KF_EXPORT_SYMBOL JITResult +compileSYCL(InMemoryFile SourceFile, View IncludeFiles, + View UserArgs) { auto ModuleOrErr = compileDeviceCode(SourceFile, IncludeFiles, UserArgs); if (!ModuleOrErr) { return errorToFusionResult(ModuleOrErr.takeError(), @@ -261,8 +259,10 @@ extern "C" JITResult compileSYCL(InMemoryFile SourceFile, return JITResult{Kernel}; } -extern "C" void resetJITConfiguration() { ConfigHelper::reset(); } +extern "C" KF_EXPORT_SYMBOL void resetJITConfiguration() { + ConfigHelper::reset(); +} -extern "C" void addToJITConfiguration(OptionStorage &&Opt) { +extern "C" KF_EXPORT_SYMBOL void addToJITConfiguration(OptionStorage &&Opt) { ConfigHelper::getConfig().set(std::move(Opt)); } diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index 6054cc5927eae..f694c8cd57136 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -20,6 +20,49 @@ static char X; // Dummy symbol, used as an anchor for `dlinfo` below. #endif +#ifdef _WIN32 +#include // For std::filesystem::path ( C++17 only ) +#include // For PathRemoveFileSpec +#include // For GetModuleFileName, HMODULE, DWORD, MAX_PATH + +// cribbed from sycl/source/detail/os_util.cpp +using OSModuleHandle = intptr_t; +static constexpr OSModuleHandle ExeModuleHandle = -1; +static OSModuleHandle getOSModuleHandle(const void *VirtAddr) { + HMODULE PhModule; + DWORD Flag = GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | + GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT; + auto LpModuleAddr = reinterpret_cast(VirtAddr); + if (!GetModuleHandleExA(Flag, LpModuleAddr, &PhModule)) { + // Expect the caller to check for zero and take + // necessary action + return 0; + } + if (PhModule == GetModuleHandleA(nullptr)) + return ExeModuleHandle; + return reinterpret_cast(PhModule); +} + +// cribbed from sycl/source/detail/os_util.cpp +/// Returns an absolute path where the object was found. +std::wstring getCurrentDSODir() { + wchar_t Path[MAX_PATH]; + auto Handle = getOSModuleHandle(reinterpret_cast(&getCurrentDSODir)); + DWORD Ret = GetModuleFileName( + reinterpret_cast(ExeModuleHandle == Handle ? 0 : Handle), Path, + MAX_PATH); + assert(Ret < MAX_PATH && "Path is longer than MAX_PATH?"); + assert(Ret > 0 && "GetModuleFileName failed"); + (void)Ret; + + BOOL RetCode = PathRemoveFileSpec(Path); + assert(RetCode && "PathRemoveFileSpec failed"); + (void)RetCode; + + return Path; +} +#endif // _WIN32 + static constexpr auto InvalidDPCPPRoot = ""; static const std::string &getDPCPPRoot() { @@ -42,6 +85,10 @@ static const std::string &getDPCPPRoot() { } #endif // _GNU_SOURCE +#ifdef _WIN32 + DPCPPRoot = std::filesystem::path(getCurrentDSODir()).parent_path().string(); +#endif // _WIN32 + // TODO: Implemenent other means of determining the DPCPP root, e.g. // evaluating the `CMPLR_ROOT` env. diff --git a/sycl-jit/passes/CMakeLists.txt b/sycl-jit/passes/CMakeLists.txt index 29e83d225d81b..b6cb30bd809f3 100644 --- a/sycl-jit/passes/CMakeLists.txt +++ b/sycl-jit/passes/CMakeLists.txt @@ -1,49 +1,54 @@ -# Module library for usage as library/pass-plugin with LLVM opt. -add_llvm_library(SYCLKernelJIT MODULE - SYCLFusionPasses.cpp - kernel-fusion/Builtins.cpp - kernel-fusion/SYCLKernelFusion.cpp - kernel-fusion/SYCLSpecConstMaterializer.cpp - kernel-info/SYCLKernelInfo.cpp - internalization/Internalization.cpp - syclcp/SYCLCP.cpp - cleanup/Cleanup.cpp - debug/PassDebug.cpp - target/TargetFusionInfo.cpp - - DEPENDS - intrinsics_gen -) +# See llvm/examples/Bye/CmakeLists.txt as to why this kind of loadable plugin libraries +# isn't supported on Windows. +if (NOT WIN32 AND NOT CYGWIN) + # Module library for usage as library/pass-plugin with LLVM opt. + add_llvm_library(SYCLKernelJIT MODULE + SYCLFusionPasses.cpp + kernel-fusion/Builtins.cpp + kernel-fusion/SYCLKernelFusion.cpp + kernel-fusion/SYCLSpecConstMaterializer.cpp + kernel-info/SYCLKernelInfo.cpp + internalization/Internalization.cpp + syclcp/SYCLCP.cpp + cleanup/Cleanup.cpp + debug/PassDebug.cpp + target/TargetFusionInfo.cpp + + DEPENDS + intrinsics_gen + ) + + target_compile_options(SYCLKernelJIT PRIVATE ${SYCL_JIT_WARNING_FLAGS}) + + # Mark LLVM headers as system headers to ignore warnigns in them. This + # classification remains intact even if the same path is added as a normal + # include path in GCC and Clang. + target_include_directories(SYCLKernelJIT + SYSTEM PRIVATE + ${LLVM_MAIN_INCLUDE_DIR} + ) + target_include_directories(SYCLKernelJIT + PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR} + PRIVATE + ${SYCL_JIT_BASE_DIR}/common/include + ) + + target_link_libraries(SYCLKernelJIT + PRIVATE + sycl-jit-common + ) + + add_dependencies(SYCLKernelJIT sycl-headers) + + if("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) + target_compile_definitions(SYCLKernelJIT PRIVATE JIT_SUPPORT_PTX) + endif() + + if("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD) + target_compile_definitions(SYCLKernelJIT PRIVATE JIT_SUPPORT_AMDGCN) + endif() -target_compile_options(SYCLKernelJIT PRIVATE ${SYCL_JIT_WARNING_FLAGS}) - -# Mark LLVM headers as system headers to ignore warnigns in them. This -# classification remains intact even if the same path is added as a normal -# include path in GCC and Clang. -target_include_directories(SYCLKernelJIT - SYSTEM PRIVATE - ${LLVM_MAIN_INCLUDE_DIR} -) -target_include_directories(SYCLKernelJIT - PUBLIC - ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE - ${SYCL_JIT_BASE_DIR}/common/include -) - -target_link_libraries(SYCLKernelJIT - PRIVATE - sycl-jit-common -) - -add_dependencies(SYCLKernelJIT sycl-headers) - -if("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) - target_compile_definitions(SYCLKernelJIT PRIVATE JIT_SUPPORT_PTX) -endif() - -if("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD) - target_compile_definitions(SYCLKernelJIT PRIVATE JIT_SUPPORT_AMDGCN) endif() # Static library for linking with the jit_compiler diff --git a/sycl-jit/passes/target/TargetFusionInfo.cpp b/sycl-jit/passes/target/TargetFusionInfo.cpp index af6589a1609ab..eacd339595432 100644 --- a/sycl-jit/passes/target/TargetFusionInfo.cpp +++ b/sycl-jit/passes/target/TargetFusionInfo.cpp @@ -356,9 +356,12 @@ class SPIRVTargetFusionInfo : public TargetFusionInfoImpl { Name = Name.drop_front(Name.find(SPIRVBuiltinPrefix) + SPIRVBuiltinPrefix.size()); // Check that Name does not start with any name in UnsafeBuiltIns - const auto *Iter = - std::upper_bound(UnsafeBuiltIns.begin(), UnsafeBuiltIns.end(), Name); - return Iter == UnsafeBuiltIns.begin() || !Name.starts_with(*(Iter - 1)); + for (const StringRef &Unsafe : UnsafeBuiltIns) { + if (Name.starts_with(Unsafe)) { + return false; + } + } + return true; } unsigned getIndexSpaceBuiltinBitwidth() const override { return 64; } diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index 385c70a04679f..a29bfc6310e39 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -30,11 +30,6 @@ endif() # Option to enable JIT, this in turn makes kernel fusion and spec constant # materialization possible. option(SYCL_ENABLE_EXTENSION_JIT "Enable extension to JIT kernels" ON) -if(SYCL_ENABLE_EXTENSION_JIT AND WIN32) - message(WARNING "Extension to JIT kernels not yet supported on Windows") - set(SYCL_ENABLE_EXTENSION_JIT OFF CACHE - BOOL "Extension to JIT kernels not yet supported on Windows" FORCE) -endif() if (NOT XPTI_INCLUDES) set(XPTI_INCLUDES ${CMAKE_CURRENT_SOURCE_DIR}/../xpti/include) diff --git a/sycl/source/detail/jit_compiler.cpp b/sycl/source/detail/jit_compiler.cpp index c0c22954822b7..19f1915943f05 100644 --- a/sycl/source/detail/jit_compiler.cpp +++ b/sycl/source/detail/jit_compiler.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -30,7 +31,12 @@ static inline void printPerformanceWarning(const std::string &Message) { jit_compiler::jit_compiler() { auto checkJITLibrary = [this]() -> bool { +#ifdef _WIN32 + static const std::string dir = sycl::detail::OSUtil::getCurrentDSODir(); + static const std::string JITLibraryName = dir + "\\" + "sycl-jit.dll"; +#else static const std::string JITLibraryName = "libsycl-jit.so"; +#endif void *LibraryPtr = sycl::detail::ur::loadOsLibrary(JITLibraryName); if (LibraryPtr == nullptr) { @@ -625,6 +631,7 @@ ur_kernel_handle_t jit_compiler::materializeSpecConstants( QueueImplPtr Queue, const RTDeviceBinaryImage *BinImage, const std::string &KernelName, const std::vector &SpecConstBlob) { +#ifndef _WIN32 if (!BinImage) { throw sycl::exception(sycl::make_error_code(sycl::errc::invalid), "No suitable IR available for materializing"); @@ -716,6 +723,13 @@ ur_kernel_handle_t jit_compiler::materializeSpecConstants( } return NewKernel; +#else // _WIN32 + (void)Queue; + (void)BinImage; + (void)KernelName; + (void)SpecConstBlob; + return nullptr; +#endif // _WIN32 } std::unique_ptr diff --git a/sycl/source/detail/os_util.cpp b/sycl/source/detail/os_util.cpp index 860ad71f9f7ea..c8ee3b8f33c11 100644 --- a/sycl/source/detail/os_util.cpp +++ b/sycl/source/detail/os_util.cpp @@ -148,8 +148,8 @@ std::string OSUtil::getDirName(const char *Path) { #elif defined(__SYCL_RT_OS_WINDOWS) /// Returns an absolute path where the object was found. -// ur_win_proxy_loader.dll uses this same logic. If it is changed -// significantly, it might be wise to change it there too. +// ur_win_proxy_loader.dll and sycl-jit.dll use this same logic. If it is +// changed significantly, it might be wise to change it there too. std::string OSUtil::getCurrentDSODir() { char Path[MAX_PATH]; Path[0] = '\0'; diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index ac1d8ca44c5dc..efbbb52acab73 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -583,16 +583,16 @@ ur_kernel_handle_t Scheduler::completeSpecConstMaterialization( [[maybe_unused]] const RTDeviceBinaryImage *BinImage, [[maybe_unused]] const std::string &KernelName, [[maybe_unused]] std::vector &SpecConstBlob) { -#if SYCL_EXT_JIT_ENABLE +#if SYCL_EXT_JIT_ENABLE && !_WIN32 return detail::jit_compiler::get_instance().materializeSpecConstants( Queue, BinImage, KernelName, SpecConstBlob); -#else // SYCL_EXT_JIT_ENABLE +#else // SYCL_EXT_JIT_ENABLE && !_WIN32 if (detail::SYCLConfig::get() > 0) { std::cerr << "WARNING: Materialization of spec constants not supported by " "this build\n"; } return nullptr; -#endif // SYCL_EXT_JIT_ENABLE +#endif // SYCL_EXT_JIT_ENABLE && !_WIN32 } EventImplPtr Scheduler::addCommandGraphUpdate( diff --git a/sycl/source/detail/windows_os_utils.hpp b/sycl/source/detail/windows_os_utils.hpp index 690fbba46371c..f9141f2a4c5e1 100644 --- a/sycl/source/detail/windows_os_utils.hpp +++ b/sycl/source/detail/windows_os_utils.hpp @@ -10,6 +10,8 @@ #include +// ur_proxy_loader.dll and sycl-jit.dll use this same logic. If it changed +// significantly, then it'd be wise to update those versions as well. using OSModuleHandle = intptr_t; constexpr OSModuleHandle ExeModuleHandle = -1; inline OSModuleHandle getOSModuleHandle(const void *VirtAddr) { diff --git a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp index cc45096b8564c..1588d55dfd2e2 100644 --- a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp +++ b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp @@ -9,10 +9,6 @@ // REQUIRES: (opencl || level_zero) // UNSUPPORTED: accelerator -// UNSUPPORTED: windows -// UNSUPPORTED-TRACKER: CMPLRLLVM-63166 -// in CMakeLists). - // RUN: %{build} -o %t.out // RUN: %{run} %t.out // RUN: %{l0_leak_check} %{run} %t.out @@ -95,7 +91,7 @@ void test_1(sycl::queue &Queue, sycl::kernel &Kernel, int seed) { sycl::free(usmPtr, Queue); } -void test_build_and_run() { +int test_build_and_run() { namespace syclex = sycl::ext::oneapi::experimental; using source_kb = sycl::kernel_bundle; using exe_kb = sycl::kernel_bundle; @@ -110,7 +106,7 @@ void test_build_and_run() { "kernel bundle extension: " << q.get_device().get_info() << std::endl; - return; + return -1; } // Create from source. @@ -141,12 +137,14 @@ void test_build_and_run() { // Test the kernels. test_1(q, k, 37 + 5); // ff_cp seeds 37. AddEm will add 5 more. + + return 0; } int main() { #ifdef SYCL_EXT_ONEAPI_KERNEL_COMPILER - test_build_and_run(); + return test_build_and_run(); #else static_assert(false, "Kernel Compiler feature test macro undefined"); #endif diff --git a/sycl/test/CMakeLists.txt b/sycl/test/CMakeLists.txt index be2332d4a4c8a..144e2204361ef 100644 --- a/sycl/test/CMakeLists.txt +++ b/sycl/test/CMakeLists.txt @@ -96,6 +96,7 @@ add_lit_testsuite(check-sycl-dumps "Running ABI dump tests only" EXCLUDE_FROM_CHECK_ALL ) -if(SYCL_ENABLE_EXTENSION_JIT) +if(NOT WIN32 AND SYCL_ENABLE_EXTENSION_JIT) + # lit-based testing of JIT passes isn't supported on Windows. add_dependencies(check-sycl check-sycl-jit) -endif(SYCL_ENABLE_EXTENSION_JIT) +endif() From 22cb417203681919e58f2f44dded72e260a554e8 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Mon, 11 Nov 2024 07:49:22 +0100 Subject: [PATCH 16/24] [SYCL][E2E] Fix build flag for device_global_copy (#15994) The new test device_global_copy fails to build on Windows due to unrecognized -std option. This commit fixes the flag. Signed-off-by: Larsen, Steffen --- sycl/test-e2e/DeviceGlobal/device_global_copy.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sycl/test-e2e/DeviceGlobal/device_global_copy.cpp b/sycl/test-e2e/DeviceGlobal/device_global_copy.cpp index 2d9ea347179ce..e46c6286e2e7a 100644 --- a/sycl/test-e2e/DeviceGlobal/device_global_copy.cpp +++ b/sycl/test-e2e/DeviceGlobal/device_global_copy.cpp @@ -1,4 +1,6 @@ -// RUN: %{build} -std=c++23 -o %t.out +// DEFINE: %{cpp23} = %if cl_options %{/std:c++23%} %else %{-std=c++23%} + +// RUN: %{build} %{cpp23} -o %t.out // RUN: %{run} %t.out // // UNSUPPORTED: opencl && gpu From 7dd09d9effd6a56d2c0c7abca38b21baa4d77fc8 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Mon, 11 Nov 2024 14:52:02 +0800 Subject: [PATCH 17/24] [SYCL] Add asan AOT libdevice for different GPU/CPU (#15939) Device address sanitizer libraries can be optimized in AOT compilation mode, some redundant code checking which device current kernel is running on can be removed. This PR creates corresponding AOT address sanitizer device libraries for CPU, PVC, DG2 target and link them according to real AOT target. This PR also renames previous libsycl-sanitizer to libsycl-asan since current device sanitizer only supports address sanitizer and memory sanitizer will be added in the future. --------- Signed-off-by: jinge90 --- clang/lib/Driver/ToolChains/SYCL.cpp | 233 ++++++++++++------ ...bsycl-sanitizer.bc => libsycl-asan-cpu.bc} | 0 ...libsycl-sanitizer.o => libsycl-asan-cpu.o} | 0 .../Inputs/SYCL/lib/libsycl-asan-dg2.bc | 0 .../Driver/Inputs/SYCL/lib/libsycl-asan-dg2.o | 0 .../Inputs/SYCL/lib/libsycl-asan-pvc.bc | 0 .../Driver/Inputs/SYCL/lib/libsycl-asan-pvc.o | 0 .../Driver/Inputs/SYCL/lib/libsycl-asan.bc | 0 .../Driver/Inputs/SYCL/lib/libsycl-asan.o | 0 .../test/Driver/sycl-device-lib-old-model.cpp | 92 ++++++- clang/test/Driver/sycl-device-lib.cpp | 132 +++++++++- libdevice/cmake/modules/SYCLLibdevice.cmake | 74 +++++- libdevice/sanitizer_utils.cpp | 8 + sycl/test-e2e/Config/kernel_from_file.cpp | 6 +- 14 files changed, 460 insertions(+), 85 deletions(-) rename clang/test/Driver/Inputs/SYCL/lib/{libsycl-sanitizer.bc => libsycl-asan-cpu.bc} (100%) rename clang/test/Driver/Inputs/SYCL/lib/{libsycl-sanitizer.o => libsycl-asan-cpu.o} (100%) create mode 100644 clang/test/Driver/Inputs/SYCL/lib/libsycl-asan-dg2.bc create mode 100644 clang/test/Driver/Inputs/SYCL/lib/libsycl-asan-dg2.o create mode 100644 clang/test/Driver/Inputs/SYCL/lib/libsycl-asan-pvc.bc create mode 100644 clang/test/Driver/Inputs/SYCL/lib/libsycl-asan-pvc.o create mode 100644 clang/test/Driver/Inputs/SYCL/lib/libsycl-asan.bc create mode 100644 clang/test/Driver/Inputs/SYCL/lib/libsycl-asan.o diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 6435618ae7f6a..7e8067f5ec2e4 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -345,6 +345,84 @@ static bool selectBfloatLibs(const llvm::Triple &Triple, const Compilation &C, return NeedLibs; } +struct OclocInfo { + const char *DeviceName; + const char *PackageName; + const char *Version; + SmallVector HexValues; +}; + +// The PVCDevices data structure is organized by device name, with the +// corresponding ocloc split release, version and possible Hex representations +// of various PVC devices. This information is gathered from the following: +// https://github.com/intel/compute-runtime/blob/master/shared/source/dll/devices/devices_base.inl +// https://github.com/intel/compute-runtime/blob/master/shared/source/dll/devices/devices_additional.inl +static OclocInfo PVCDevices[] = { + {"pvc-sdv", "gen12+", "12.60.1", {}}, + {"pvc", + "gen12+", + "12.60.7", + {0x0BD0, 0x0BD5, 0x0BD6, 0x0BD7, 0x0BD8, 0x0BD9, 0x0BDA, 0x0BDB}}}; + +static std::string getDeviceArg(const ArgStringList &CmdArgs) { + bool DeviceSeen = false; + std::string DeviceArg; + for (StringRef Arg : CmdArgs) { + // -device comes in as a single arg, split up all potential space + // separated values. + SmallVector SplitArgs; + Arg.split(SplitArgs, ' '); + for (StringRef SplitArg : SplitArgs) { + if (DeviceSeen) { + DeviceArg = SplitArg.str(); + break; + } + if (SplitArg == "-device") + DeviceSeen = true; + } + if (DeviceSeen) + break; + } + + return DeviceArg; +} + +static bool checkPVCDevice(std::string SingleArg, std::string &DevArg) { + // Handle shortened versions. + bool CheckShortVersion = true; + for (auto Char : SingleArg) { + if (!std::isdigit(Char) && Char != '.') { + CheckShortVersion = false; + break; + } + } + // Check for device, version or hex (literal values) + for (unsigned int I = 0; I < std::size(PVCDevices); I++) { + if (StringRef(SingleArg).equals_insensitive(PVCDevices[I].DeviceName) || + StringRef(SingleArg).equals_insensitive(PVCDevices[I].Version)) { + DevArg = SingleArg; + return true; + } + + for (int HexVal : PVCDevices[I].HexValues) { + int Value = 0; + if (!StringRef(SingleArg).getAsInteger(0, Value) && Value == HexVal) { + // TODO: Pass back the hex string to use for -device_options when + // IGC is updated to allow. Currently -device_options only accepts + // the device ID (i.e. pvc) or the version (12.60.7). + return true; + } + } + if (CheckShortVersion && + StringRef(PVCDevices[I].Version).starts_with(SingleArg)) { + DevArg = SingleArg; + return true; + } + } + + return false; +} + SmallVector SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, bool IsSpirvAOT) { @@ -360,6 +438,8 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, StringRef DeviceLibOption; }; + enum { JIT = 0, AOT_CPU, AOT_DG2, AOT_PVC }; + // Currently, all SYCL device libraries will be linked by default. llvm::StringMap DeviceLibLinkInfo = { {"libc", true}, {"libm-fp32", true}, {"libm-fp64", true}, @@ -460,8 +540,11 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, {"libsycl-itt-compiler-wrappers", "internal"}, {"libsycl-itt-stubs", "internal"}}; #if !defined(_WIN32) - const SYCLDeviceLibsList SYCLDeviceSanitizerLibs = { - {"libsycl-sanitizer", "internal"}}; + const SYCLDeviceLibsList SYCLDeviceAsanLibs = { + {"libsycl-asan", "internal"}, + {"libsycl-asan-cpu", "internal"}, + {"libsycl-asan-dg2", "internal"}, + {"libsycl-asan-pvc", "internal"}}; #endif const SYCLDeviceLibsList SYCLNativeCpuDeviceLibs = { @@ -493,6 +576,66 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, } }; + auto addSingleLibrary = [&](const DeviceLibOptInfo &Lib) { + if (!DeviceLibLinkInfo[Lib.DeviceLibOption]) + return; + SmallString<128> LibName(Lib.DeviceLibName); + llvm::sys::path::replace_extension(LibName, LibSuffix); + LibraryList.push_back(Args.MakeArgString(LibName)); + }; + + // This function is used to check whether there is only one GPU device + // (PVC or DG2) specified in AOT compilation mode. If yes, we can use + // corresponding libsycl-asan-* to improve device sanitizer performance, + // otherwise stick to fallback device sanitizer library used in JIT mode. + auto getSpecificGPUTarget = [](const ArgStringList &CmdArgs) -> size_t { + std::string DeviceArg = getDeviceArg(CmdArgs); + if ((DeviceArg.empty()) || (DeviceArg.find(",") != std::string::npos)) + return JIT; + + std::string Temp; + if (checkPVCDevice(DeviceArg, Temp)) + return AOT_PVC; + + if (DeviceArg == "dg2") + return AOT_DG2; + + return JIT; + }; + + auto getSingleBuildTarget = [&]() -> size_t { + if (!IsSpirvAOT) + return JIT; + + llvm::opt::Arg *SYCLTarget = Args.getLastArg(options::OPT_fsycl_targets_EQ); + if (!SYCLTarget || (SYCLTarget->getValues().size() != 1)) + return JIT; + + StringRef SYCLTargetStr = SYCLTarget->getValue(); + if (SYCLTargetStr.starts_with("spir64_x86_64")) + return AOT_CPU; + + if (SYCLTargetStr == "intel_gpu_pvc") + return AOT_PVC; + + if (SYCLTargetStr.starts_with("intel_gpu_dg2")) + return AOT_DG2; + + if (SYCLTargetStr.starts_with("spir64_gen")) { + ArgStringList TargArgs; + Args.AddAllArgValues(TargArgs, options::OPT_Xs, options::OPT_Xs_separate); + Args.AddAllArgValues(TargArgs, options::OPT_Xsycl_backend); + llvm::opt::Arg *A = nullptr; + if ((A = Args.getLastArg(options::OPT_Xsycl_backend_EQ)) && + StringRef(A->getValue()).starts_with("spir64_gen")) + TargArgs.push_back(A->getValue(1)); + + return getSpecificGPUTarget(TargArgs); + } + + return JIT; + }; + addLibraries(SYCLDeviceWrapperLibs); if (IsSpirvAOT) addLibraries(SYCLDeviceFallbackLibs); @@ -512,13 +655,14 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, addLibraries(SYCLDeviceAnnotationLibs); #if !defined(_WIN32) + size_t sanitizer_lib_idx = getSingleBuildTarget(); if (Arg *A = Args.getLastArg(options::OPT_fsanitize_EQ, options::OPT_fno_sanitize_EQ)) { if (A->getOption().matches(options::OPT_fsanitize_EQ) && A->getValues().size() == 1) { std::string SanitizeVal = A->getValue(); if (SanitizeVal == "address") - addLibraries(SYCLDeviceSanitizerLibs); + addSingleLibrary(SYCLDeviceAsanLibs[sanitizer_lib_idx]); } } else { // User can pass -fsanitize=address to device compiler via @@ -546,7 +690,7 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, } if (IsDeviceAsanEnabled) - addLibraries(SYCLDeviceSanitizerLibs); + addSingleLibrary(SYCLDeviceAsanLibs[sanitizer_lib_idx]); } #endif @@ -663,7 +807,10 @@ static llvm::SmallVector SYCLDeviceLibList{ #if defined(_WIN32) "msvc-math", #else - "sanitizer", + "asan", + "asan-pvc", + "asan-cpu", + "asan-dg2", #endif "imf", "imf-fp64", @@ -1131,87 +1278,23 @@ void SYCL::fpga::BackendCompiler::ConstructJob( C.addCommand(std::move(Cmd)); } -struct OclocInfo { - const char *DeviceName; - const char *PackageName; - const char *Version; - SmallVector HexValues; -}; - -// The PVCDevices data structure is organized by device name, with the -// corresponding ocloc split release, version and possible Hex representations -// of various PVC devices. This information is gathered from the following: -// https://github.com/intel/compute-runtime/blob/master/shared/source/dll/devices/devices_base.inl -// https://github.com/intel/compute-runtime/blob/master/shared/source/dll/devices/devices_additional.inl -static OclocInfo PVCDevices[] = { - {"pvc-sdv", "gen12+", "12.60.1", {}}, - {"pvc", - "gen12+", - "12.60.7", - {0x0BD0, 0x0BD5, 0x0BD6, 0x0BD7, 0x0BD8, 0x0BD9, 0x0BDA, 0x0BDB}}}; - // Determine if any of the given arguments contain any PVC based values for // the -device option. static bool hasPVCDevice(const ArgStringList &CmdArgs, std::string &DevArg) { - bool DeviceSeen = false; - StringRef DeviceArg; - for (StringRef Arg : CmdArgs) { - // -device comes in as a single arg, split up all potential space - // separated values. - SmallVector SplitArgs; - Arg.split(SplitArgs, ' '); - for (StringRef SplitArg : SplitArgs) { - if (DeviceSeen) { - DeviceArg = SplitArg; - break; - } - if (SplitArg == "-device") - DeviceSeen = true; - } - if (DeviceSeen) - break; - } - if (DeviceArg.empty()) + std::string Res = getDeviceArg(CmdArgs); + if (Res.empty()) return false; - // Go through all of the arguments to '-device' and determine if any of these // are pvc based. We only match literal values and will not find a match // when ranges or wildcards are used. // Here we parse the targets, tokenizing via ',' + StringRef DeviceArg(Res.c_str()); SmallVector SplitArgs; DeviceArg.split(SplitArgs, ","); for (const auto &SingleArg : SplitArgs) { - StringRef OclocTarget; - // Handle shortened versions. - bool CheckShortVersion = true; - for (auto Char : SingleArg.str()) { - if (!std::isdigit(Char) && Char != '.') { - CheckShortVersion = false; - break; - } - } - // Check for device, version or hex (literal values) - for (unsigned int I = 0; I < std::size(PVCDevices); I++) { - if (SingleArg.equals_insensitive(PVCDevices[I].DeviceName) || - SingleArg.equals_insensitive(PVCDevices[I].Version)) { - DevArg = SingleArg.str(); - return true; - } - for (int HexVal : PVCDevices[I].HexValues) { - int Value = 0; - if (!SingleArg.getAsInteger(0, Value) && Value == HexVal) { - // TODO: Pass back the hex string to use for -device_options when - // IGC is updated to allow. Currently -device_options only accepts - // the device ID (i.e. pvc) or the version (12.60.7). - return true; - } - } - if (CheckShortVersion && - StringRef(PVCDevices[I].Version).starts_with(SingleArg)) { - DevArg = SingleArg.str(); - return true; - } - } + bool IsPVC = checkPVCDevice(SingleArg.str(), DevArg); + if (IsPVC) + return true; } return false; } diff --git a/clang/test/Driver/Inputs/SYCL/lib/libsycl-sanitizer.bc b/clang/test/Driver/Inputs/SYCL/lib/libsycl-asan-cpu.bc similarity index 100% rename from clang/test/Driver/Inputs/SYCL/lib/libsycl-sanitizer.bc rename to clang/test/Driver/Inputs/SYCL/lib/libsycl-asan-cpu.bc diff --git a/clang/test/Driver/Inputs/SYCL/lib/libsycl-sanitizer.o b/clang/test/Driver/Inputs/SYCL/lib/libsycl-asan-cpu.o similarity index 100% rename from clang/test/Driver/Inputs/SYCL/lib/libsycl-sanitizer.o rename to clang/test/Driver/Inputs/SYCL/lib/libsycl-asan-cpu.o diff --git a/clang/test/Driver/Inputs/SYCL/lib/libsycl-asan-dg2.bc b/clang/test/Driver/Inputs/SYCL/lib/libsycl-asan-dg2.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/SYCL/lib/libsycl-asan-dg2.o b/clang/test/Driver/Inputs/SYCL/lib/libsycl-asan-dg2.o new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/SYCL/lib/libsycl-asan-pvc.bc b/clang/test/Driver/Inputs/SYCL/lib/libsycl-asan-pvc.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/SYCL/lib/libsycl-asan-pvc.o b/clang/test/Driver/Inputs/SYCL/lib/libsycl-asan-pvc.o new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/SYCL/lib/libsycl-asan.bc b/clang/test/Driver/Inputs/SYCL/lib/libsycl-asan.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/SYCL/lib/libsycl-asan.o b/clang/test/Driver/Inputs/SYCL/lib/libsycl-asan.o new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/sycl-device-lib-old-model.cpp b/clang/test/Driver/sycl-device-lib-old-model.cpp index cd3597ad20d74..ab8f900f6710d 100644 --- a/clang/test/Driver/sycl-device-lib-old-model.cpp +++ b/clang/test/Driver/sycl-device-lib-old-model.cpp @@ -196,7 +196,7 @@ // SYCL_LLVM_LINK_USER_ONLY_NEEDED: llvm-link{{.*}} "-only-needed" "{{.*}}" "-o" "{{.*}}.bc" "--suppress-warnings" /// ########################################################################### -/// test behavior of libsycl-sanitizer.o linking when -fsanitize=address is available +/// test behavior of libsycl-asan.bc linking when -fsanitize=address is available // RUN: %clangxx -fsycl --no-offload-new-driver %s --sysroot=%S/Inputs/SYCL -fsanitize=address -### 2>&1 \ // RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_SANITIZER // RUN: %clangxx -fsycl --no-offload-new-driver %s --sysroot=%S/Inputs/SYCL -Xsycl-target-frontend -fsanitize=address -### 2>&1 \ @@ -226,8 +226,94 @@ // SYCL_DEVICE_LIB_SANITIZER-SAME: "{{.*}}libsycl-fallback-imf.bc" // SYCL_DEVICE_LIB_SANITIZER-SAME: "{{.*}}libsycl-fallback-imf-fp64.bc" // SYCL_DEVICE_LIB_SANITIZER-SAME: "{{.*}}libsycl-fallback-imf-bf16.bc" -// SYCL_DEVICE_LIB_SANITIZER-SAME: "{{.*}}libsycl-sanitizer.bc" +// SYCL_DEVICE_LIB_SANITIZER-SAME: "{{.*}}libsycl-asan.bc" // SYCL_DEVICE_ASAN_MACRO: "-cc1" // SYCL_DEVICE_ASAN_MACRO-SAME: "USE_SYCL_DEVICE_ASAN" // SYCL_DEVICE_ASAN_MACRO: llvm-link{{.*}} "-only-needed" -// SYCL_DEVICE_ASAN_MACRO-SAME: "{{.*}}libsycl-sanitizer.bc" +// SYCL_DEVICE_ASAN_MACRO-SAME: "{{.*}}libsycl-asan.bc" + +/// ########################################################################### +/// test behavior of linking libsycl-asan-pvc for PVC target AOT compilation when asan flag is applied. +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc --no-offload-new-driver %s --sysroot=%S/Inputs/SYCL \ +// RUN: -Xarch_device -fsanitize=address -### 2>&1 | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_PVC +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device pvc" --no-offload-new-driver %s \ +// RUN: --sysroot=%S/Inputs/SYCL -Xarch_device -fsanitize=address -### 2>&1 | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_PVC +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend=spir64_gen "-device pvc" --no-offload-new-driver %s \ +// RUN: --sysroot=%S/Inputs/SYCL -Xarch_device -fsanitize=address -### 2>&1 | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_PVC +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device 12.60.7" --no-offload-new-driver %s \ +// RUN: --sysroot=%S/Inputs/SYCL -Xarch_device -fsanitize=address -### 2>&1 | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_PVC +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xs "-device 12.60.7" --no-offload-new-driver %s --sysroot=%S/Inputs/SYCL \ +// RUN: -Xarch_device -fsanitize=address -### 2>&1 | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_PVC +// SYCL_DEVICE_LIB_ASAN_PVC: llvm-link{{.*}} "{{.*}}libsycl-crt.bc" +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: "{{.*}}libsycl-complex.bc" +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: "{{.*}}libsycl-complex-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: "{{.*}}libsycl-cmath.bc" +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: "{{.*}}libsycl-cmath-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: "{{.*}}libsycl-imf.bc" +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: "{{.*}}libsycl-imf-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: "{{.*}}libsycl-imf-bf16.bc" +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: "{{.*}}libsycl-fallback-cassert.bc" +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: "{{.*}}libsycl-fallback-cstring.bc" +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: "{{.*}}libsycl-fallback-complex.bc" +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: "{{.*}}libsycl-fallback-complex-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: "{{.*}}libsycl-fallback-cmath.bc" +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: "{{.*}}libsycl-fallback-cmath-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: "{{.*}}libsycl-fallback-imf.bc" +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: "{{.*}}libsycl-fallback-imf-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: "{{.*}}libsycl-fallback-imf-bf16.bc" +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: "{{.*}}libsycl-asan-pvc.bc" + +/// ########################################################################### +/// test behavior of linking libsycl-asan-cpu for CPU target AOT compilation when asan flag is applied. +// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 --no-offload-new-driver %s --sysroot=%S/Inputs/SYCL \ +// RUN: -Xarch_device -fsanitize=address -### 2>&1 | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_CPU +// SYCL_DEVICE_LIB_ASAN_CPU: llvm-link{{.*}} "{{.*}}libsycl-crt.bc" +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: "{{.*}}libsycl-complex.bc" +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: "{{.*}}libsycl-complex-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: "{{.*}}libsycl-cmath.bc" +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: "{{.*}}libsycl-cmath-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: "{{.*}}libsycl-imf.bc" +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: "{{.*}}libsycl-imf-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: "{{.*}}libsycl-imf-bf16.bc" +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: "{{.*}}libsycl-fallback-cassert.bc" +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: "{{.*}}libsycl-fallback-cstring.bc" +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: "{{.*}}libsycl-fallback-complex.bc" +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: "{{.*}}libsycl-fallback-complex-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: "{{.*}}libsycl-fallback-cmath.bc" +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: "{{.*}}libsycl-fallback-cmath-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: "{{.*}}libsycl-fallback-imf.bc" +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: "{{.*}}libsycl-fallback-imf-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: "{{.*}}libsycl-fallback-imf-bf16.bc" +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: "{{.*}}libsycl-asan-cpu.bc" + +/// ########################################################################### +/// test behavior of linking libsycl-asan-dg2 for DG2 target AOT compilation when asan flag is applied. +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_dg2_g10 --no-offload-new-driver %s --sysroot=%S/Inputs/SYCL \ +// RUN: -Xarch_device -fsanitize=address -### 2>&1 | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_DG2 +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device dg2" --no-offload-new-driver %s \ +// RUN: --sysroot=%S/Inputs/SYCL -Xarch_device -fsanitize=address -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_DG2 +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend=spir64_gen "-device dg2" --no-offload-new-driver %s \ +// RUN: --sysroot=%S/Inputs/SYCL -Xarch_device -fsanitize=address -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_DG2 +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xs "-device dg2" --no-offload-new-driver %s \ +// RUN: --sysroot=%S/Inputs/SYCL -Xarch_device -fsanitize=address -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_DG2 +// SYCL_DEVICE_LIB_ASAN_DG2: llvm-link{{.*}} "{{.*}}libsycl-crt.bc" +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: "{{.*}}libsycl-complex.bc" +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: "{{.*}}libsycl-complex-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: "{{.*}}libsycl-cmath.bc" +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: "{{.*}}libsycl-cmath-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: "{{.*}}libsycl-imf.bc" +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: "{{.*}}libsycl-imf-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: "{{.*}}libsycl-imf-bf16.bc" +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: "{{.*}}libsycl-fallback-cassert.bc" +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: "{{.*}}libsycl-fallback-cstring.bc" +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: "{{.*}}libsycl-fallback-complex.bc" +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: "{{.*}}libsycl-fallback-complex-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: "{{.*}}libsycl-fallback-cmath.bc" +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: "{{.*}}libsycl-fallback-cmath-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: "{{.*}}libsycl-fallback-imf.bc" +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: "{{.*}}libsycl-fallback-imf-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: "{{.*}}libsycl-fallback-imf-bf16.bc" +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: "{{.*}}libsycl-asan-dg2.bc" diff --git a/clang/test/Driver/sycl-device-lib.cpp b/clang/test/Driver/sycl-device-lib.cpp index 197f7fc5e46d9..e84eaadc5405a 100644 --- a/clang/test/Driver/sycl-device-lib.cpp +++ b/clang/test/Driver/sycl-device-lib.cpp @@ -153,7 +153,7 @@ // SYCL_NO_DEVICE_LIB_INVALID_VALUE: error: unsupported argument '[[Val]]' to option '-fno-sycl-device-lib=' /// ########################################################################### -/// test behavior of libsycl-sanitizer.o linking when -fsanitize=address is available +/// test behavior of libsycl-asan.o linking when -fsanitize=address is available // RUN: %clangxx -fsycl --offload-new-driver %s --sysroot=%S/Inputs/SYCL -fsanitize=address -### 2>&1 \ // RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_SANITIZER // RUN: %clangxx -fsycl --offload-new-driver %s --sysroot=%S/Inputs/SYCL -Xsycl-target-frontend -fsanitize=address -### 2>&1 \ @@ -184,7 +184,133 @@ // SYCL_DEVICE_LIB_SANITIZER-SAME: {{.*}}libsycl-fallback-imf.new.o // SYCL_DEVICE_LIB_SANITIZER-SAME: {{.*}}libsycl-fallback-imf-fp64.new.o // SYCL_DEVICE_LIB_SANITIZER-SAME: {{.*}}libsycl-fallback-imf-bf16.new.o -// SYCL_DEVICE_LIB_SANITIZER-SAME: {{.*}}libsycl-sanitizer.new.o +// SYCL_DEVICE_LIB_SANITIZER-SAME: {{.*}}libsycl-asan.new.o // SYCL_DEVICE_ASAN_MACRO: "-cc1" // SYCL_DEVICE_ASAN_MACRO-SAME: "USE_SYCL_DEVICE_ASAN" -// SYCL_DEVICE_ASAN_MACRO: libsycl-sanitizer.new.o +// SYCL_DEVICE_ASAN_MACRO: libsycl-asan.new.o + + +/// ########################################################################### +/// test behavior of linking libsycl-asan-pvc for PVC target AOT compilation when asan flag is applied. +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc --offload-new-driver %s --sysroot=%S/Inputs/SYCL \ +// RUN: -Xarch_device -fsanitize=address -### 2>&1 | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_PVC +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen --offload-new-driver %s --sysroot=%S/Inputs/SYCL \ +// RUN: -Xarch_device -fsanitize=address -Xsycl-target-backend "-device pvc" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_PVC +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen --offload-new-driver %s --sysroot=%S/Inputs/SYCL \ +// RUN: -Xarch_device -fsanitize=address -Xsycl-target-backend=spir64_gen "-device pvc" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_PVC +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen --offload-new-driver %s --sysroot=%S/Inputs/SYCL \ +// RUN: -Xarch_device -fsanitize=address -Xsycl-target-backend "-device 12.60.7" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_PVC +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen --offload-new-driver %s --sysroot=%S/Inputs/SYCL \ +// RUN: -Xarch_device -fsanitize=address -Xs "-device pvc" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_PVC +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen --offload-new-driver %s --sysroot=%S/Inputs/SYCL \ +// RUN: -Xarch_device -fsanitize=address -Xs "-device 12.60.7" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_PVC +// SYCL_DEVICE_LIB_ASAN_PVC: clang-linker-wrapper{{.*}} "-sycl-device-libraries +// SYCL_DEVICE_LIB_ASAN_PVC: {{.*}}libsycl-crt.new.o +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: {{.*}}libsycl-complex. +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: {{.*}}libsycl-complex-fp64. +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: {{.*}}libsycl-cmath.new.o +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: {{.*}}libsycl-cmath-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: {{.*}}libsycl-imf.new.o +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: {{.*}}libsycl-imf-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: {{.*}}libsycl-imf-bf16.new.o +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: {{.*}}libsycl-fallback-cassert.new.o +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: {{.*}}libsycl-fallback-cstring.new.o +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: {{.*}}libsycl-fallback-complex.new.o +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: {{.*}}libsycl-fallback-complex-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: {{.*}}libsycl-fallback-cmath.new.o +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: {{.*}}libsycl-fallback-cmath-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: {{.*}}libsycl-fallback-imf.new.o +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: {{.*}}libsycl-fallback-imf-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: {{.*}}libsycl-fallback-imf-bf16.new.o +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: {{.*}}libsycl-asan-pvc.new.o + +/// ########################################################################### +/// test behavior of linking libsycl-asan-cpu for CPU target AOT compilation when asan flag is applied. +// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 --offload-new-driver %s --sysroot=%S/Inputs/SYCL \ +// RUN: -Xarch_device -fsanitize=address -### 2>&1 | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_CPU +// SYCL_DEVICE_LIB_ASAN_CPU: clang-linker-wrapper{{.*}} "-sycl-device-libraries +// SYCL_DEVICE_LIB_ASAN_CPU: {{.*}}libsycl-crt.new.o +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: {{.*}}libsycl-complex. +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: {{.*}}libsycl-complex-fp64. +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: {{.*}}libsycl-cmath.new.o +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: {{.*}}libsycl-cmath-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: {{.*}}libsycl-imf.new.o +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: {{.*}}libsycl-imf-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: {{.*}}libsycl-imf-bf16.new.o +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: {{.*}}libsycl-fallback-cassert.new.o +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: {{.*}}libsycl-fallback-cstring.new.o +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: {{.*}}libsycl-fallback-complex.new.o +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: {{.*}}libsycl-fallback-complex-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: {{.*}}libsycl-fallback-cmath.new.o +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: {{.*}}libsycl-fallback-cmath-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: {{.*}}libsycl-fallback-imf.new.o +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: {{.*}}libsycl-fallback-imf-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: {{.*}}libsycl-fallback-imf-bf16.new.o +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: {{.*}}libsycl-asan-cpu.new.o + +/// ########################################################################### +/// test behavior of linking libsycl-asan-dg2 for DG2 target AOT compilation when asan flag is applied. +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_dg2_g10 --offload-new-driver %s --sysroot=%S/Inputs/SYCL \ +// RUN: -Xarch_device -fsanitize=address -### 2>&1 | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_DG2 +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen --offload-new-driver %s --sysroot=%S/Inputs/SYCL \ +// RUN: -Xarch_device -fsanitize=address -Xsycl-target-backend "-device dg2" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_DG2 +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen --offload-new-driver %s --sysroot=%S/Inputs/SYCL \ +// RUN: -Xarch_device -fsanitize=address -Xsycl-target-backend=spir64_gen "-device dg2" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_DG2 +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen --offload-new-driver %s --sysroot=%S/Inputs/SYCL \ +// RUN: -Xarch_device -fsanitize=address -Xs "-device dg2" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_DG2 +// SYCL_DEVICE_LIB_ASAN_DG2: clang-linker-wrapper{{.*}} "-sycl-device-libraries +// SYCL_DEVICE_LIB_ASAN_DG2: {{.*}}libsycl-crt.new.o +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: {{.*}}libsycl-complex. +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: {{.*}}libsycl-complex-fp64. +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: {{.*}}libsycl-cmath.new.o +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: {{.*}}libsycl-cmath-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: {{.*}}libsycl-imf.new.o +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: {{.*}}libsycl-imf-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: {{.*}}libsycl-imf-bf16.new.o +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: {{.*}}libsycl-fallback-cassert.new.o +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: {{.*}}libsycl-fallback-cstring.new.o +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: {{.*}}libsycl-fallback-complex.new.o +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: {{.*}}libsycl-fallback-complex-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: {{.*}}libsycl-fallback-cmath.new.o +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: {{.*}}libsycl-fallback-cmath-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: {{.*}}libsycl-fallback-imf.new.o +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: {{.*}}libsycl-fallback-imf-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: {{.*}}libsycl-fallback-imf-bf16.new.o +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: {{.*}}libsycl-asan-dg2.new.o + +/// ########################################################################### +/// test behavior of linking libsycl-asan for multiple targets AOT compilation +/// when asan flag is applied. +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen --offload-new-driver %s --sysroot=%S/Inputs/SYCL \ +// RUN: -Xarch_device -fsanitize=address -Xsycl-target-backend "-device pvc,dg2" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_MUL +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen --offload-new-driver %s --sysroot=%S/Inputs/SYCL \ +// RUN: -Xarch_device -fsanitize=address -Xsycl-target-backend=spir64_gen "-device pvc,dg2" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_MUL +// SYCL_DEVICE_LIB_ASAN_MUL: clang-linker-wrapper{{.*}} "-sycl-device-libraries +// SYCL_DEVICE_LIB_ASAN_MUL: {{.*}}libsycl-crt.new.o +// SYCL_DEVICE_LIB_ASAN_MUL-SAME: {{.*}}libsycl-complex. +// SYCL_DEVICE_LIB_ASAN_MUL-SAME: {{.*}}libsycl-complex-fp64. +// SYCL_DEVICE_LIB_ASAN_MUL-SAME: {{.*}}libsycl-cmath.new.o +// SYCL_DEVICE_LIB_ASAN_MUL-SAME: {{.*}}libsycl-cmath-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_MUL-SAME: {{.*}}libsycl-imf.new.o +// SYCL_DEVICE_LIB_ASAN_MUL-SAME: {{.*}}libsycl-imf-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_MUL-SAME: {{.*}}libsycl-imf-bf16.new.o +// SYCL_DEVICE_LIB_ASAN_MUL-SAME: {{.*}}libsycl-fallback-cassert.new.o +// SYCL_DEVICE_LIB_ASAN_MUL-SAME: {{.*}}libsycl-fallback-cstring.new.o +// SYCL_DEVICE_LIB_ASAN_MUL-SAME: {{.*}}libsycl-fallback-complex.new.o +// SYCL_DEVICE_LIB_ASAN_MUL-SAME: {{.*}}libsycl-fallback-complex-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_MUL-SAME: {{.*}}libsycl-fallback-cmath.new.o +// SYCL_DEVICE_LIB_ASAN_MUL-SAME: {{.*}}libsycl-fallback-cmath-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_MUL-SAME: {{.*}}libsycl-fallback-imf.new.o +// SYCL_DEVICE_LIB_ASAN_MUL-SAME: {{.*}}libsycl-fallback-imf-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_MUL-SAME: {{.*}}libsycl-fallback-imf-bf16.new.o +// SYCL_DEVICE_LIB_ASAN_MUL-SAME: {{.*}}libsycl-asan.new.o diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index 043ffc49e2fac..02da6b7283209 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -35,6 +35,18 @@ string(CONCAT sycl_targets_opt "spir64-unknown-unknown," "spirv64-unknown-unknown") +string(CONCAT sycl_pvc_target_opt + "-fsycl-targets=" + "intel_gpu_pvc") + +string(CONCAT sycl_cpu_target_opt + "-fsycl-targets=" + "spir64_x86_64-unknown-unknown") + +string(CONCAT sycl_dg2_target_opt + "-fsycl-targets=" + "spir64_gen-unknown-unknown") + set(compile_opts # suppress an error about SYCL_EXTERNAL being used for # a function with a raw pointer parameter. @@ -223,6 +235,55 @@ if (NOT MSVC AND UR_SANITIZER_INCLUDE_DIR) include/sanitizer_utils.hpp include/spir_global_var.hpp sycl-compiler) + + set(sanitizer_generic_compile_opts ${compile_opts} + -fno-sycl-instrument-device-code + -I${UR_SANITIZER_INCLUDE_DIR}) + + set(asan_pvc_compile_opts_obj -fsycl -c + ${sanitizer_generic_compile_opts} + ${sycl_pvc_target_opt} + -D__LIBDEVICE_PVC__) + + set(asan_cpu_compile_opts_obj -fsycl -c + ${sanitizer_generic_compile_opts} + ${sycl_cpu_target_opt} + -D__LIBDEVICE_CPU__) + + set(asan_dg2_compile_opts_obj -fsycl -c + ${sanitizer_generic_compile_opts} + ${sycl_dg2_target_opt} + -D__LIBDEVICE_DG2__) + + set(asan_pvc_compile_opts_bc ${bc_device_compile_opts} + ${sanitizer_generic_compile_opts} + -D__LIBDEVICE_PVC__) + + set(asan_cpu_compile_opts_bc ${bc_device_compile_opts} + ${sanitizer_generic_compile_opts} + -D__LIBDEVICE_CPU__) + + set(asan_dg2_compile_opts_bc ${bc_device_compile_opts} + ${sanitizer_generic_compile_opts} + -D__LIBDEVICE_DG2__) + + set(asan_pvc_compile_opts_obj-new-offload -fsycl -c --offload-new-driver + -foffload-lto=thin + ${sanitizer_generic_compile_opts} + ${sycl_pvc_target_opt} + -D__LIBDEVICE_PVC__) + + set(asan_cpu_compile_opts_obj-new-offload -fsycl -c --offload-new-driver + -foffload-lto=thin + ${sanitizer_generic_compile_opts} + ${sycl_cpu_target_opt} + -D__LIBDEVICE_CPU__) + + set(asan_dg2_compile_opts_obj-new-offload -fsycl -c --offload-new-driver + -foffload-lto=thin + ${sanitizer_generic_compile_opts} + ${sycl_dg2_target_opt} + -D__LIBDEVICE_DG2__) endif() if("native_cpu" IN_LIST SYCL_ENABLE_BACKENDS) @@ -285,10 +346,21 @@ if(MSVC) DEPENDENCIES ${cmath_obj_deps}) else() if(UR_SANITIZER_INCLUDE_DIR) - add_devicelibs(libsycl-sanitizer + add_devicelibs(libsycl-asan SRC sanitizer_utils.cpp DEPENDENCIES ${sanitizer_obj_deps} EXTRA_OPTS -fno-sycl-instrument-device-code -I${UR_SANITIZER_INCLUDE_DIR}) + set(asan_filetypes obj obj-new-offload bc) + set(asan_devicetypes pvc cpu dg2) + foreach(asan_ft IN LISTS asan_filetypes) + foreach(asan_device IN LISTS asan_devicetypes) + compile_lib_ext(libsycl-asan-${asan_device} + SRC sanitizer_utils.cpp + FILETYPE ${asan_ft} + DEPENDENCIES ${sanitizer_obj_deps} + OPTS ${asan_${asan_device}_compile_opts_${asan_ft}}) + endforeach() + endforeach() endif() endif() diff --git a/libdevice/sanitizer_utils.cpp b/libdevice/sanitizer_utils.cpp index 097cd97a4f706..e71bdeea8b501 100644 --- a/libdevice/sanitizer_utils.cpp +++ b/libdevice/sanitizer_utils.cpp @@ -301,6 +301,13 @@ inline uptr MemToShadow_PVC(uptr addr, uint32_t as) { inline uptr MemToShadow(uptr addr, uint32_t as) { uptr shadow_ptr = 0; +#if defined(__LIBDEVICE_PVC__) + shadow_ptr = MemToShadow_PVC(addr, as); +#elif defined(__LIBDEVICE_CPU__) + shadow_ptr = MemToShadow_CPU(addr); +#elif defined(__LIBDEVICE_DG2__) + shadow_ptr = MemToShadow_DG2(addr, as); +#else auto launch_info = (__SYCL_GLOBAL__ const LaunchInfo *)__AsanLaunchInfo; if (launch_info->DeviceTy == DeviceType::CPU) { shadow_ptr = MemToShadow_CPU(addr); @@ -314,6 +321,7 @@ inline uptr MemToShadow(uptr addr, uint32_t as) { __asan_report_unknown_device(); return 0; } +#endif ASAN_DEBUG( if (shadow_ptr) { diff --git a/sycl/test-e2e/Config/kernel_from_file.cpp b/sycl/test-e2e/Config/kernel_from_file.cpp index 8450d6eae2573..9cd3bdf1c12f3 100644 --- a/sycl/test-e2e/Config/kernel_from_file.cpp +++ b/sycl/test-e2e/Config/kernel_from_file.cpp @@ -7,11 +7,11 @@ // As we are doing a separate device compilation here, we need to explicitly // add the device lib instrumentation (itt_compiler_wrapper) // RUN: %clangxx -Wno-error=ignored-attributes -DSYCL_DISABLE_FALLBACK_ASSERT %cxx_std_optionc++17 -fsycl-device-only -fno-sycl-dead-args-optimization -Xclang -fsycl-int-header=%t.h %s -o %t.bc -Xclang -verify-ignore-unexpected=note,warning -Wno-sycl-strict -// >> ---- unbundle compiler wrapper and sanitizer device objects +// >> ---- unbundle compiler wrapper and asan device objects // RUN: clang-offload-bundler -type=o -targets=sycl-spir64-unknown-unknown -input=%sycl_static_libs_dir/libsycl-itt-compiler-wrappers%obj_ext -output=%t_compiler_wrappers.bc -unbundle -// RUN: %if linux %{ clang-offload-bundler -type=o -targets=sycl-spir64-unknown-unknown -input=%sycl_static_libs_dir/libsycl-sanitizer%obj_ext -output=%t_sanitizer.bc -unbundle %} +// RUN: %if linux %{ clang-offload-bundler -type=o -targets=sycl-spir64-unknown-unknown -input=%sycl_static_libs_dir/libsycl-asan%obj_ext -output=%t_asan.bc -unbundle %} // >> ---- link device code -// RUN: %if linux %{ llvm-link -o=%t_app.bc %t.bc %t_compiler_wrappers.bc %t_sanitizer.bc %} %else %{ llvm-link -o=%t_app.bc %t.bc %t_compiler_wrappers.bc %} +// RUN: %if linux %{ llvm-link -o=%t_app.bc %t.bc %t_compiler_wrappers.bc %t_asan.bc %} %else %{ llvm-link -o=%t_app.bc %t.bc %t_compiler_wrappers.bc %} // >> ---- translate to SPIR-V // RUN: llvm-spirv -o %t.spv %t_app.bc // RUN: %clangxx -Wno-error=ignored-attributes -DSYCL_DISABLE_FALLBACK_ASSERT %cxx_std_optionc++17 %include_option %t.h %s -o %t.out %sycl_options -fno-sycl-dead-args-optimization -Xclang -verify-ignore-unexpected=note,warning From f215f43c8b059e26088385babc080b66e673eb30 Mon Sep 17 00:00:00 2001 From: Callum Fare Date: Mon, 11 Nov 2024 10:04:15 +0000 Subject: [PATCH 18/24] [UR] Bump UR tag to c94dbc8 (#16033) Pulls in https://github.com/oneapi-src/unified-runtime/pull/2302 --- sycl/cmake/modules/UnifiedRuntimeTag.cmake | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index 0137ef119ce41..fc5327dc28d6e 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -1,7 +1,7 @@ -# commit ed9fe09f9987bbe86715f191e6dbe5695ebf0306 -# Merge: 09ae26af dc5246dd +# commit c94dbc8fa25f62b812a3db707b78f1c217b33bc3 +# Merge: 9ae8e65b 5bc8c925 # Author: Callum Fare -# Date: Fri Nov 8 10:29:25 2024 +0000 -# Merge pull request #2279 from nrspruit/update_loader_1_18 -# [L0] Update Level Zero Loader version to v1.18.5 -set(UNIFIED_RUNTIME_TAG ed9fe09f9987bbe86715f191e6dbe5695ebf0306) +# Date: Fri Nov 8 17:14:06 2024 +0000 +# Merge pull request #2302 from callumfare/callum/revert_ur_2100 +# Revert #2100 "Raise MSVC warning level from /W3 to /W4" +set(UNIFIED_RUNTIME_TAG c94dbc8fa25f62b812a3db707b78f1c217b33bc3) From d2836c25ca9d8b3bebb9c353c745480b8f8a5fc4 Mon Sep 17 00:00:00 2001 From: Nikita Kornev Date: Mon, 11 Nov 2024 10:04:27 +0000 Subject: [PATCH 19/24] [SYCL][E2E] Add more checks to exp-std-complex-edge-cases.hpp (#16020) To cover the rest of checks from https://en.cppreference.com/w/cpp/numeric/complex/exp --- .../exp/exp-std-complex-edge-cases.hpp | 50 +++++++++++++++++-- 1 file changed, 46 insertions(+), 4 deletions(-) diff --git a/sycl/test-e2e/DeviceLib/exp/exp-std-complex-edge-cases.hpp b/sycl/test-e2e/DeviceLib/exp/exp-std-complex-edge-cases.hpp index d807a461e574f..992cd6147b535 100644 --- a/sycl/test-e2e/DeviceLib/exp/exp-std-complex-edge-cases.hpp +++ b/sycl/test-e2e/DeviceLib/exp/exp-std-complex-edge-cases.hpp @@ -199,12 +199,18 @@ template bool test() { sycl::buffer> data(testcases, sycl::range{N}); sycl::buffer> results(sycl::range{N}); + sycl::buffer> exp_conj(sycl::range{N}); + sycl::buffer> conj_exp(sycl::range{N}); q.submit([&](sycl::handler &cgh) { sycl::accessor acc_data(data, cgh, sycl::read_only); - sycl::accessor acc(results, cgh, sycl::write_only); + sycl::accessor acc_results(results, cgh, sycl::write_only); + sycl::accessor acc_exp_conj(exp_conj, cgh, sycl::write_only); + sycl::accessor acc_conj_exp(conj_exp, cgh, sycl::write_only); cgh.parallel_for(sycl::range{N}, [=](sycl::item<1> it) { - acc[it] = std::exp(acc_data[it]); + acc_results[it] = std::exp(acc_data[it]); + acc_exp_conj[it] = std::exp(std::conj(acc_data[it])); + acc_conj_exp[it] = std::conj(std::exp(acc_data[it])); }); }).wait_and_throw(); @@ -219,9 +225,18 @@ template bool test() { // Based on https://en.cppreference.com/w/cpp/numeric/complex/exp // z below refers to the argument passed to std::exp(complex) - sycl::host_accessor acc(results); + sycl::host_accessor acc_results(results); + sycl::host_accessor acc_exp_conj(exp_conj); + sycl::host_accessor acc_conj_exp(conj_exp); for (unsigned i = 0; i < N; ++i) { - std::complex r = acc[i]; + // std::exp(std::conj(z)) == std::conj(std::exp(z)) + // NAN is not equal to NAN in floating-point arithmetic, therefore compare + // only results without NAN + if (!std::isnan(acc_exp_conj[i].real()) && + !std::isnan(acc_exp_conj[i].imag())) + CHECK(acc_exp_conj[i] == acc_conj_exp[i], passed, i); + + std::complex r = acc_results[i]; // If z is (+/-0, +0), the result is (1, +0) if (testcases[i].real() == 0 && testcases[i].imag() == 0 && !std::signbit(testcases[i].imag())) { @@ -247,6 +262,33 @@ template bool test() { CHECK(r.imag() == 0, passed, i); CHECK(std::signbit(testcases[i].imag()) == std::signbit(r.imag()), passed, i); + // If z is (-inf, y) (for any finite y), the result is +0cis(y) where + // cis(y) is cos(y) + isin(y) + } else if (std::isinf(testcases[i].real()) && + std::signbit(testcases[i].real()) && + std::isfinite(testcases[i].imag())) { + CHECK(r.real() == 0, passed, i) + CHECK(std::signbit(r.real()) == + std::signbit(std::cos(testcases[i].imag())), + passed, i) + CHECK(r.imag() == 0, passed, i) + CHECK(std::signbit(r.imag()) == + std::signbit(std::sin(testcases[i].imag())), + passed, i) + // If z is (+inf, y) (for any finite nonzero y), the result is +∞cis(y) + // where cis(y) is cos(y) + isin(y) + } else if (std::isinf(testcases[i].real()) && + !std::signbit(testcases[i].real()) && + std::isfinite(testcases[i].imag()) && + testcases[i].imag() != 0) { + CHECK(std::isinf(r.real()), passed, i) + CHECK(std::signbit(r.real()) == + std::signbit(std::cos(testcases[i].imag())), + passed, i) + CHECK(std::isinf(r.imag()), passed, i) + CHECK(std::signbit(r.imag()) == + std::signbit(std::sin(testcases[i].imag())), + passed, i) // If z is (-inf, +inf), the result is (+/-0, +/-0) (signs are // unspecified) } else if (std::isinf(testcases[i].real()) && testcases[i].real() < 0 && From c0cb28e858cfda8cd4dc2b90a2e1c7b1806f6ca5 Mon Sep 17 00:00:00 2001 From: Nikita Kornev Date: Mon, 11 Nov 2024 10:04:50 +0000 Subject: [PATCH 20/24] [SYCL][E2E] Update pf-wg-atomic64.cpp (#16029) This DISABLED does nothing. As I understand this test should be run on a device without atomic64 support. --- sycl/test-e2e/Regression/pf-wg-atomic64.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sycl/test-e2e/Regression/pf-wg-atomic64.cpp b/sycl/test-e2e/Regression/pf-wg-atomic64.cpp index 2cf4dd1ee39ec..31b0c7188059d 100644 --- a/sycl/test-e2e/Regression/pf-wg-atomic64.cpp +++ b/sycl/test-e2e/Regression/pf-wg-atomic64.cpp @@ -1,4 +1,6 @@ -// DISABLED: aspect-atomic64 +// UNSUPPORTED: aspect-atomic64 +// UNSUPPORTED-INTENDED: The test is intended for devices without atomic64 +// support. // RUN: %{build} -o %t.out // RUN: %{run} %t.out From 82d874f49293d342eb2e2ffe2eb8174778d4ceab Mon Sep 17 00:00:00 2001 From: Victor Perez Date: Mon, 11 Nov 2024 11:05:19 +0100 Subject: [PATCH 21/24] [SYCL] Add `(raw|decorated)_generic_ptr` aliases (#15389) https://github.com/KhronosGroup/SYCL-Docs/pull/598 adds these aliases to the spec. Prepare for spec change by adding them to the implementation beforehand. Extend tests so all `multi_ptr` aliases are checked. --------- Signed-off-by: Victor Perez --- sycl/include/sycl/pointers.hpp | 10 +++ .../extensions/address_cast.cpp | 6 -- sycl/test/multi_ptr/aliases.cpp | 87 +++++++++++++++++++ 3 files changed, 97 insertions(+), 6 deletions(-) create mode 100644 sycl/test/multi_ptr/aliases.cpp diff --git a/sycl/include/sycl/pointers.hpp b/sycl/include/sycl/pointers.hpp index c74a8e104ab82..cd082b9f79904 100644 --- a/sycl/include/sycl/pointers.hpp +++ b/sycl/include/sycl/pointers.hpp @@ -47,6 +47,11 @@ using private_ptr = // The interface exposes non-decorated pointer while keeping the // address space information internally. +template +using raw_generic_ptr = + multi_ptr; + template using raw_global_ptr = multi_ptr +using decorated_generic_ptr = + multi_ptr; + template using decorated_global_ptr = multi_ptr -using decorated_generic_ptr = - multi_ptr; - namespace static_as_cast { // CHECK-LABEL: define dso_local spir_func void @_ZN14static_as_cast19to_global_decoratedEN4sycl3_V19multi_ptrIiLNS1_6access13address_spaceE6ELNS3_9decoratedE1EEE( // CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::multi_ptr") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::multi_ptr.0") align 8 [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !srcloc [[META6:![0-9]+]] !sycl_fixed_targets [[META7:![0-9]+]] { diff --git a/sycl/test/multi_ptr/aliases.cpp b/sycl/test/multi_ptr/aliases.cpp new file mode 100644 index 0000000000000..1911602e74386 --- /dev/null +++ b/sycl/test/multi_ptr/aliases.cpp @@ -0,0 +1,87 @@ +//==--------------- aliases.cpp - SYCL multi_ptr aliases test --------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// RUN: %clangxx -fsycl -fsyntax-only -Xclang -verify %s -Xclang -verify-ignore-unexpected=note,warning + +// expected-no-diagnostics + +#include + +#include + +template +void test_address_space_aliases() { + static_assert(std::is_same_v< + sycl::generic_ptr, + sycl::multi_ptr>); + static_assert(std::is_same_v< + sycl::global_ptr, + sycl::multi_ptr>); + static_assert(std::is_same_v< + sycl::local_ptr, + sycl::multi_ptr>); + static_assert(std::is_same_v< + sycl::private_ptr, + sycl::multi_ptr>); +} + +template void test_aliases() { + // Template specialization aliases for different pointer address spaces + test_address_space_aliases(); + test_address_space_aliases(); + test_address_space_aliases(); + + // Template specialization aliases for different pointer address spaces. + // The interface exposes non-decorated pointer while keeping the + // address space information internally. + static_assert(std::is_same_v< + sycl::raw_generic_ptr, + sycl::multi_ptr>); + static_assert(std::is_same_v< + sycl::raw_global_ptr, + sycl::multi_ptr>); + static_assert(std::is_same_v< + sycl::raw_local_ptr, + sycl::multi_ptr>); + static_assert(std::is_same_v< + sycl::raw_private_ptr, + sycl::multi_ptr>); + + // Template specialization aliases for different pointer address spaces. + // The interface exposes decorated pointer. + static_assert(std::is_same_v< + sycl::decorated_generic_ptr, + sycl::multi_ptr>); + static_assert(std::is_same_v< + sycl::decorated_global_ptr, + sycl::multi_ptr>); + static_assert(std::is_same_v< + sycl::decorated_local_ptr, + sycl::multi_ptr>); + static_assert(std::is_same_v< + sycl::decorated_private_ptr, + sycl::multi_ptr>); +} + +// Test "minimal set of types" in the CTS. As we are just testing aliases are +// present in this test, this should work for any type. + +template void test_aliases(); +template void test_aliases(); From 73f199e67c1d8c6df4310e1c12b0fa4d7e900099 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Mon, 11 Nov 2024 12:56:02 +0100 Subject: [PATCH 22/24] [SYCL] Remove deprecation of legacy multi_ptr (#15893) With https://github.com/KhronosGroup/SYCL-Docs/pull/604 the SYCL specification no longer deprecates legacy multi_ptr. This commit removes the corresponding deprecation warnings from the implementation of the specializations and the enum value. --------- Signed-off-by: Larsen, Steffen --- sycl/include/sycl/access/access.hpp | 7 +------ sycl/include/sycl/multi_ptr.hpp | 15 +++------------ sycl/test/warnings/sycl_2020_deprecations.cpp | 8 +------- 3 files changed, 5 insertions(+), 25 deletions(-) diff --git a/sycl/include/sycl/access/access.hpp b/sycl/include/sycl/access/access.hpp index ad95479e9d885..e5c8670e7fda8 100644 --- a/sycl/include/sycl/access/access.hpp +++ b/sycl/include/sycl/access/access.hpp @@ -60,12 +60,7 @@ enum class address_space : int { generic_space = 6, // TODO generic_space address space is not supported yet }; -enum class decorated : int { - no = 0, - yes = 1, - legacy __SYCL2020_DEPRECATED("sycl::access::decorated::legacy " - "is deprecated since SYCL 2020") = 2 -}; +enum class decorated : int { no = 0, yes = 1, legacy = 2 }; } // namespace access using access::target; diff --git a/sycl/include/sycl/multi_ptr.hpp b/sycl/include/sycl/multi_ptr.hpp index e84e1094d2ecb..b1946fb68afc4 100644 --- a/sycl/include/sycl/multi_ptr.hpp +++ b/sycl/include/sycl/multi_ptr.hpp @@ -747,11 +747,8 @@ inline constexpr auto decoration_space = Space; } // namespace detail // Legacy specialization of multi_ptr. -// TODO: Add deprecation warning here when possible. template -class __SYCL2020_DEPRECATED( - "decorated::legacy multi_ptr specialization is deprecated since SYCL 2020.") - multi_ptr { +class multi_ptr { static constexpr auto DecorationSpace = detail::decoration_space; public: @@ -1101,11 +1098,8 @@ class __SYCL2020_DEPRECATED( }; // Legacy specialization of multi_ptr for void. -// TODO: Add deprecation warning here when possible. template -class __SYCL2020_DEPRECATED( - "decorated::legacy multi_ptr specialization is deprecated since SYCL 2020.") - multi_ptr { +class multi_ptr { static constexpr auto DecorationSpace = detail::decoration_space; public: @@ -1264,11 +1258,8 @@ class __SYCL2020_DEPRECATED( }; // Legacy specialization of multi_ptr for const void. -// TODO: Add deprecation warning here when possible. template -class __SYCL2020_DEPRECATED( - "decorated::legacy multi_ptr specialization is deprecated since SYCL 2020.") - multi_ptr { +class multi_ptr { static constexpr auto DecorationSpace = detail::decoration_space; public: diff --git a/sycl/test/warnings/sycl_2020_deprecations.cpp b/sycl/test/warnings/sycl_2020_deprecations.cpp index c3f7d8a3450d3..4614707606830 100644 --- a/sycl/test/warnings/sycl_2020_deprecations.cpp +++ b/sycl/test/warnings/sycl_2020_deprecations.cpp @@ -287,8 +287,6 @@ int main() { [=](sycl::nd_item<1> Idx) { int PrivateVal = 0; - // expected-warning@+6{{'legacy' is deprecated: sycl::access::decorated::legacy is deprecated since SYCL 2020}} - // expected-warning@+8{{'legacy' is deprecated: sycl::access::decorated::legacy is deprecated since SYCL 2020}} // expected-warning@+8{{'get_pointer' is deprecated: accessor::get_pointer() is deprecated, please use get_multi_ptr()}} // expected-warning@+7{{'get_pointer' is deprecated: accessor::get_pointer() is deprecated, please use get_multi_ptr()}} // expected-warning@+4{{'make_ptr' is deprecated: make_ptr is deprecated since SYCL 2020. Please use address_space_cast instead.}} @@ -298,8 +296,6 @@ int main() { sycl::make_ptr( GlobalAcc.get_pointer()); - // expected-warning@+5{{'legacy' is deprecated: sycl::access::decorated::legacy is deprecated since SYCL 2020}} - // expected-warning@+7{{'legacy' is deprecated: sycl::access::decorated::legacy is deprecated since SYCL 2020}} // expected-warning@+7{{'get_pointer' is deprecated: local_accessor::get_pointer() is deprecated, please use get_multi_ptr()}} // expected-warning@+4{{'make_ptr' is deprecated: make_ptr is deprecated since SYCL 2020. Please use address_space_cast instead.}} sycl::multi_ptr( LocalAcc.get_pointer()); - // expected-warning@+4{{'legacy' is deprecated: sycl::access::decorated::legacy is deprecated since SYCL 2020}} - // expected-warning@+5{{'make_ptr' is deprecated: make_ptr is deprecated since SYCL 2020. Please use address_space_cast instead.}} - // expected-warning@+6{{'legacy' is deprecated: sycl::access::decorated::legacy is deprecated since SYCL 2020}} + // expected-warning@+4{{'make_ptr' is deprecated: make_ptr is deprecated since SYCL 2020. Please use address_space_cast instead.}} sycl::multi_ptr LegacyPrivateMptr = From 6d1f4fc9e8bfc34b40892504d7d30ec009ff6486 Mon Sep 17 00:00:00 2001 From: Martin Grant Date: Mon, 11 Nov 2024 15:49:44 +0000 Subject: [PATCH 23/24] [UR] Bump tag to 2eae687 for UR#2112 (#15506) For https://github.com/oneapi-src/unified-runtime/pull/2112 --------- Co-authored-by: Callum Fare --- sycl/cmake/modules/UnifiedRuntimeTag.cmake | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index fc5327dc28d6e..a4c494620fc92 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -1,7 +1,7 @@ -# commit c94dbc8fa25f62b812a3db707b78f1c217b33bc3 -# Merge: 9ae8e65b 5bc8c925 +# commit 2eae687a4cf24ba02ee8e9ebb9552c1d392972ee +# Merge: 1ba7f39a 2a081891 # Author: Callum Fare -# Date: Fri Nov 8 17:14:06 2024 +0000 -# Merge pull request #2302 from callumfare/callum/revert_ur_2100 -# Revert #2100 "Raise MSVC warning level from /W3 to /W4" -set(UNIFIED_RUNTIME_TAG c94dbc8fa25f62b812a3db707b78f1c217b33bc3) +# Date: Mon Nov 11 13:35:24 2024 +0000 +# Merge pull request #2112 from martygrant/martin/context-cts-spec-gap +# Improvements to align CTS and Spec for Context +set(UNIFIED_RUNTIME_TAG 2eae687a4cf24ba02ee8e9ebb9552c1d392972ee) From ac3a03e34b23420f0522e379013e488a8146a43f Mon Sep 17 00:00:00 2001 From: Omar Ahmed Date: Mon, 11 Nov 2024 17:21:11 +0000 Subject: [PATCH 24/24] Change SYCL_PI_TESTS to SYCL_UR_TESTS in CI (#16010) `SYCL_PI_TESTS` was deprecated and changed to `SYCL_UR_TESTS`. --- .github/workflows/sycl-linux-build.yml | 1 - .github/workflows/sycl-macos-build-and-test.yml | 3 +-- sycl/unittests/CMakeLists.txt | 8 +------- 3 files changed, 2 insertions(+), 10 deletions(-) diff --git a/.github/workflows/sycl-linux-build.yml b/.github/workflows/sycl-linux-build.yml index 0b6eebda045ab..0dc956dfea752 100644 --- a/.github/workflows/sycl-linux-build.yml +++ b/.github/workflows/sycl-linux-build.yml @@ -169,7 +169,6 @@ jobs: --cmake-opt=-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ --cmake-opt="-DLLVM_INSTALL_UTILS=ON" \ --cmake-opt="-DNATIVECPU_USE_OCK=Off" \ - --cmake-opt="-DSYCL_PI_TESTS=OFF" \ --cmake-opt="-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=SPIRV" - name: Compile id: build diff --git a/.github/workflows/sycl-macos-build-and-test.yml b/.github/workflows/sycl-macos-build-and-test.yml index d6a6a316a9597..f25e847d8a341 100644 --- a/.github/workflows/sycl-macos-build-and-test.yml +++ b/.github/workflows/sycl-macos-build-and-test.yml @@ -52,7 +52,6 @@ jobs: --ci-defaults $ARGS \ --cmake-opt=-DCMAKE_C_COMPILER_LAUNCHER=ccache \ --cmake-opt=-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ - --cmake-opt="-DLLVM_INSTALL_UTILS=ON" \ - --cmake-opt="-DSYCL_PI_TESTS=OFF" + --cmake-opt="-DLLVM_INSTALL_UTILS=ON" - name: Compile run: cmake --build $GITHUB_WORKSPACE/build --target deploy-sycl-toolchain diff --git a/sycl/unittests/CMakeLists.txt b/sycl/unittests/CMakeLists.txt index 29d7b22d56391..8831426784de2 100644 --- a/sycl/unittests/CMakeLists.txt +++ b/sycl/unittests/CMakeLists.txt @@ -25,13 +25,7 @@ include(AddSYCLUnitTest) add_custom_target(check-sycl-unittests) -# TODO UR tests require real hardware and must be moved to sycl/test-e2e. -option(SYCL_UR_TESTS "Enable UR-specific unit tests" OFF) - -if (SYCL_UR_TESTS) - add_subdirectory(ur) -endif() - +add_subdirectory(ur) add_subdirectory(allowlist) add_subdirectory(config) add_subdirectory(misc)