diff --git a/.github/workflows/sycl-linux-build.yml b/.github/workflows/sycl-linux-build.yml index 0b6eebda045ab..0dc956dfea752 100644 --- a/.github/workflows/sycl-linux-build.yml +++ b/.github/workflows/sycl-linux-build.yml @@ -169,7 +169,6 @@ jobs: --cmake-opt=-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ --cmake-opt="-DLLVM_INSTALL_UTILS=ON" \ --cmake-opt="-DNATIVECPU_USE_OCK=Off" \ - --cmake-opt="-DSYCL_PI_TESTS=OFF" \ --cmake-opt="-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=SPIRV" - name: Compile id: build diff --git a/.github/workflows/sycl-macos-build-and-test.yml b/.github/workflows/sycl-macos-build-and-test.yml index d6a6a316a9597..f25e847d8a341 100644 --- a/.github/workflows/sycl-macos-build-and-test.yml +++ b/.github/workflows/sycl-macos-build-and-test.yml @@ -52,7 +52,6 @@ jobs: --ci-defaults $ARGS \ --cmake-opt=-DCMAKE_C_COMPILER_LAUNCHER=ccache \ --cmake-opt=-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ - --cmake-opt="-DLLVM_INSTALL_UTILS=ON" \ - --cmake-opt="-DSYCL_PI_TESTS=OFF" + --cmake-opt="-DLLVM_INSTALL_UTILS=ON" - name: Compile run: cmake --build $GITHUB_WORKSPACE/build --target deploy-sycl-toolchain diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 3fa80d479936f..f90ba124e5a09 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -10353,33 +10353,48 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA, assert(JA.getInputs().size() == Inputs.size() && "Not have inputs for all dependence actions??"); - // For FPGA, we wrap the host objects before archiving them when using - // -fsycl-link. This allows for better extraction control from the - // archive when we need the host objects for subsequent compilations. if (OffloadingKind == Action::OFK_None && - C.getArgs().hasArg(options::OPT_fintelfpga) && C.getArgs().hasArg(options::OPT_fsycl_link_EQ)) { - // Add offload targets and inputs. - CmdArgs.push_back(C.getArgs().MakeArgString( - Twine("-kind=") + Action::GetOffloadKindName(OffloadingKind))); - CmdArgs.push_back( - TCArgs.MakeArgString(Twine("-target=") + Triple.getTriple())); + // For FPGA, we wrap the host objects before archiving them when using + // -fsycl-link. This allows for better extraction control from the + // archive when we need the host objects for subsequent compilations. + if (C.getArgs().hasArg(options::OPT_fintelfpga)) { - if (Inputs[0].getType() == types::TY_Tempfiletable || - Inputs[0].getType() == types::TY_Tempfilelist) - // Input files are passed via the batch job file table. - CmdArgs.push_back(C.getArgs().MakeArgString("-batch")); + // Add offload targets and inputs. + CmdArgs.push_back(C.getArgs().MakeArgString( + Twine("-kind=") + Action::GetOffloadKindName(OffloadingKind))); + CmdArgs.push_back( + TCArgs.MakeArgString(Twine("-target=") + Triple.getTriple())); - // Add input. - assert(Inputs[0].isFilename() && "Invalid input."); - CmdArgs.push_back(TCArgs.MakeArgString(Inputs[0].getFilename())); + if (Inputs[0].getType() == types::TY_Tempfiletable || + Inputs[0].getType() == types::TY_Tempfilelist) + // Input files are passed via the batch job file table. + CmdArgs.push_back(C.getArgs().MakeArgString("-batch")); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::None(), - TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())), - CmdArgs, Inputs)); - return; + // Add input. + assert(Inputs[0].isFilename() && "Invalid input."); + CmdArgs.push_back(TCArgs.MakeArgString(Inputs[0].getFilename())); + + C.addCommand(std::make_unique( + JA, *this, ResponseFileSupport::None(), + TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())), + CmdArgs, Inputs)); + return; + } else { + // When compiling and linking separately, we need to propagate the + // compression related CLI options to offload-wrapper. Don't propagate + // these options when wrapping objects for FPGA. + if (C.getInputArgs().getLastArg(options::OPT_offload_compress)) { + CmdArgs.push_back( + C.getArgs().MakeArgString(Twine("-offload-compress"))); + // -offload-compression-level=<> + if (Arg *A = C.getInputArgs().getLastArg( + options::OPT_offload_compression_level_EQ)) + CmdArgs.push_back(C.getArgs().MakeArgString( + Twine("-offload-compression-level=") + A->getValue())); + } + } } // Add offload targets and inputs. diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 6435618ae7f6a..7e8067f5ec2e4 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -345,6 +345,84 @@ static bool selectBfloatLibs(const llvm::Triple &Triple, const Compilation &C, return NeedLibs; } +struct OclocInfo { + const char *DeviceName; + const char *PackageName; + const char *Version; + SmallVector HexValues; +}; + +// The PVCDevices data structure is organized by device name, with the +// corresponding ocloc split release, version and possible Hex representations +// of various PVC devices. This information is gathered from the following: +// https://github.com/intel/compute-runtime/blob/master/shared/source/dll/devices/devices_base.inl +// https://github.com/intel/compute-runtime/blob/master/shared/source/dll/devices/devices_additional.inl +static OclocInfo PVCDevices[] = { + {"pvc-sdv", "gen12+", "12.60.1", {}}, + {"pvc", + "gen12+", + "12.60.7", + {0x0BD0, 0x0BD5, 0x0BD6, 0x0BD7, 0x0BD8, 0x0BD9, 0x0BDA, 0x0BDB}}}; + +static std::string getDeviceArg(const ArgStringList &CmdArgs) { + bool DeviceSeen = false; + std::string DeviceArg; + for (StringRef Arg : CmdArgs) { + // -device comes in as a single arg, split up all potential space + // separated values. + SmallVector SplitArgs; + Arg.split(SplitArgs, ' '); + for (StringRef SplitArg : SplitArgs) { + if (DeviceSeen) { + DeviceArg = SplitArg.str(); + break; + } + if (SplitArg == "-device") + DeviceSeen = true; + } + if (DeviceSeen) + break; + } + + return DeviceArg; +} + +static bool checkPVCDevice(std::string SingleArg, std::string &DevArg) { + // Handle shortened versions. + bool CheckShortVersion = true; + for (auto Char : SingleArg) { + if (!std::isdigit(Char) && Char != '.') { + CheckShortVersion = false; + break; + } + } + // Check for device, version or hex (literal values) + for (unsigned int I = 0; I < std::size(PVCDevices); I++) { + if (StringRef(SingleArg).equals_insensitive(PVCDevices[I].DeviceName) || + StringRef(SingleArg).equals_insensitive(PVCDevices[I].Version)) { + DevArg = SingleArg; + return true; + } + + for (int HexVal : PVCDevices[I].HexValues) { + int Value = 0; + if (!StringRef(SingleArg).getAsInteger(0, Value) && Value == HexVal) { + // TODO: Pass back the hex string to use for -device_options when + // IGC is updated to allow. Currently -device_options only accepts + // the device ID (i.e. pvc) or the version (12.60.7). + return true; + } + } + if (CheckShortVersion && + StringRef(PVCDevices[I].Version).starts_with(SingleArg)) { + DevArg = SingleArg; + return true; + } + } + + return false; +} + SmallVector SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, bool IsSpirvAOT) { @@ -360,6 +438,8 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, StringRef DeviceLibOption; }; + enum { JIT = 0, AOT_CPU, AOT_DG2, AOT_PVC }; + // Currently, all SYCL device libraries will be linked by default. llvm::StringMap DeviceLibLinkInfo = { {"libc", true}, {"libm-fp32", true}, {"libm-fp64", true}, @@ -460,8 +540,11 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, {"libsycl-itt-compiler-wrappers", "internal"}, {"libsycl-itt-stubs", "internal"}}; #if !defined(_WIN32) - const SYCLDeviceLibsList SYCLDeviceSanitizerLibs = { - {"libsycl-sanitizer", "internal"}}; + const SYCLDeviceLibsList SYCLDeviceAsanLibs = { + {"libsycl-asan", "internal"}, + {"libsycl-asan-cpu", "internal"}, + {"libsycl-asan-dg2", "internal"}, + {"libsycl-asan-pvc", "internal"}}; #endif const SYCLDeviceLibsList SYCLNativeCpuDeviceLibs = { @@ -493,6 +576,66 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, } }; + auto addSingleLibrary = [&](const DeviceLibOptInfo &Lib) { + if (!DeviceLibLinkInfo[Lib.DeviceLibOption]) + return; + SmallString<128> LibName(Lib.DeviceLibName); + llvm::sys::path::replace_extension(LibName, LibSuffix); + LibraryList.push_back(Args.MakeArgString(LibName)); + }; + + // This function is used to check whether there is only one GPU device + // (PVC or DG2) specified in AOT compilation mode. If yes, we can use + // corresponding libsycl-asan-* to improve device sanitizer performance, + // otherwise stick to fallback device sanitizer library used in JIT mode. + auto getSpecificGPUTarget = [](const ArgStringList &CmdArgs) -> size_t { + std::string DeviceArg = getDeviceArg(CmdArgs); + if ((DeviceArg.empty()) || (DeviceArg.find(",") != std::string::npos)) + return JIT; + + std::string Temp; + if (checkPVCDevice(DeviceArg, Temp)) + return AOT_PVC; + + if (DeviceArg == "dg2") + return AOT_DG2; + + return JIT; + }; + + auto getSingleBuildTarget = [&]() -> size_t { + if (!IsSpirvAOT) + return JIT; + + llvm::opt::Arg *SYCLTarget = Args.getLastArg(options::OPT_fsycl_targets_EQ); + if (!SYCLTarget || (SYCLTarget->getValues().size() != 1)) + return JIT; + + StringRef SYCLTargetStr = SYCLTarget->getValue(); + if (SYCLTargetStr.starts_with("spir64_x86_64")) + return AOT_CPU; + + if (SYCLTargetStr == "intel_gpu_pvc") + return AOT_PVC; + + if (SYCLTargetStr.starts_with("intel_gpu_dg2")) + return AOT_DG2; + + if (SYCLTargetStr.starts_with("spir64_gen")) { + ArgStringList TargArgs; + Args.AddAllArgValues(TargArgs, options::OPT_Xs, options::OPT_Xs_separate); + Args.AddAllArgValues(TargArgs, options::OPT_Xsycl_backend); + llvm::opt::Arg *A = nullptr; + if ((A = Args.getLastArg(options::OPT_Xsycl_backend_EQ)) && + StringRef(A->getValue()).starts_with("spir64_gen")) + TargArgs.push_back(A->getValue(1)); + + return getSpecificGPUTarget(TargArgs); + } + + return JIT; + }; + addLibraries(SYCLDeviceWrapperLibs); if (IsSpirvAOT) addLibraries(SYCLDeviceFallbackLibs); @@ -512,13 +655,14 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, addLibraries(SYCLDeviceAnnotationLibs); #if !defined(_WIN32) + size_t sanitizer_lib_idx = getSingleBuildTarget(); if (Arg *A = Args.getLastArg(options::OPT_fsanitize_EQ, options::OPT_fno_sanitize_EQ)) { if (A->getOption().matches(options::OPT_fsanitize_EQ) && A->getValues().size() == 1) { std::string SanitizeVal = A->getValue(); if (SanitizeVal == "address") - addLibraries(SYCLDeviceSanitizerLibs); + addSingleLibrary(SYCLDeviceAsanLibs[sanitizer_lib_idx]); } } else { // User can pass -fsanitize=address to device compiler via @@ -546,7 +690,7 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, } if (IsDeviceAsanEnabled) - addLibraries(SYCLDeviceSanitizerLibs); + addSingleLibrary(SYCLDeviceAsanLibs[sanitizer_lib_idx]); } #endif @@ -663,7 +807,10 @@ static llvm::SmallVector SYCLDeviceLibList{ #if defined(_WIN32) "msvc-math", #else - "sanitizer", + "asan", + "asan-pvc", + "asan-cpu", + "asan-dg2", #endif "imf", "imf-fp64", @@ -1131,87 +1278,23 @@ void SYCL::fpga::BackendCompiler::ConstructJob( C.addCommand(std::move(Cmd)); } -struct OclocInfo { - const char *DeviceName; - const char *PackageName; - const char *Version; - SmallVector HexValues; -}; - -// The PVCDevices data structure is organized by device name, with the -// corresponding ocloc split release, version and possible Hex representations -// of various PVC devices. This information is gathered from the following: -// https://github.com/intel/compute-runtime/blob/master/shared/source/dll/devices/devices_base.inl -// https://github.com/intel/compute-runtime/blob/master/shared/source/dll/devices/devices_additional.inl -static OclocInfo PVCDevices[] = { - {"pvc-sdv", "gen12+", "12.60.1", {}}, - {"pvc", - "gen12+", - "12.60.7", - {0x0BD0, 0x0BD5, 0x0BD6, 0x0BD7, 0x0BD8, 0x0BD9, 0x0BDA, 0x0BDB}}}; - // Determine if any of the given arguments contain any PVC based values for // the -device option. static bool hasPVCDevice(const ArgStringList &CmdArgs, std::string &DevArg) { - bool DeviceSeen = false; - StringRef DeviceArg; - for (StringRef Arg : CmdArgs) { - // -device comes in as a single arg, split up all potential space - // separated values. - SmallVector SplitArgs; - Arg.split(SplitArgs, ' '); - for (StringRef SplitArg : SplitArgs) { - if (DeviceSeen) { - DeviceArg = SplitArg; - break; - } - if (SplitArg == "-device") - DeviceSeen = true; - } - if (DeviceSeen) - break; - } - if (DeviceArg.empty()) + std::string Res = getDeviceArg(CmdArgs); + if (Res.empty()) return false; - // Go through all of the arguments to '-device' and determine if any of these // are pvc based. We only match literal values and will not find a match // when ranges or wildcards are used. // Here we parse the targets, tokenizing via ',' + StringRef DeviceArg(Res.c_str()); SmallVector SplitArgs; DeviceArg.split(SplitArgs, ","); for (const auto &SingleArg : SplitArgs) { - StringRef OclocTarget; - // Handle shortened versions. - bool CheckShortVersion = true; - for (auto Char : SingleArg.str()) { - if (!std::isdigit(Char) && Char != '.') { - CheckShortVersion = false; - break; - } - } - // Check for device, version or hex (literal values) - for (unsigned int I = 0; I < std::size(PVCDevices); I++) { - if (SingleArg.equals_insensitive(PVCDevices[I].DeviceName) || - SingleArg.equals_insensitive(PVCDevices[I].Version)) { - DevArg = SingleArg.str(); - return true; - } - for (int HexVal : PVCDevices[I].HexValues) { - int Value = 0; - if (!SingleArg.getAsInteger(0, Value) && Value == HexVal) { - // TODO: Pass back the hex string to use for -device_options when - // IGC is updated to allow. Currently -device_options only accepts - // the device ID (i.e. pvc) or the version (12.60.7). - return true; - } - } - if (CheckShortVersion && - StringRef(PVCDevices[I].Version).starts_with(SingleArg)) { - DevArg = SingleArg.str(); - return true; - } - } + bool IsPVC = checkPVCDevice(SingleArg.str(), DevArg); + if (IsPVC) + return true; } return false; } diff --git a/clang/test/Driver/Inputs/SYCL/lib/libsycl-sanitizer.bc b/clang/test/Driver/Inputs/SYCL/lib/libsycl-asan-cpu.bc similarity index 100% rename from clang/test/Driver/Inputs/SYCL/lib/libsycl-sanitizer.bc rename to clang/test/Driver/Inputs/SYCL/lib/libsycl-asan-cpu.bc diff --git a/clang/test/Driver/Inputs/SYCL/lib/libsycl-sanitizer.o b/clang/test/Driver/Inputs/SYCL/lib/libsycl-asan-cpu.o similarity index 100% rename from clang/test/Driver/Inputs/SYCL/lib/libsycl-sanitizer.o rename to clang/test/Driver/Inputs/SYCL/lib/libsycl-asan-cpu.o diff --git a/clang/test/Driver/Inputs/SYCL/lib/libsycl-asan-dg2.bc b/clang/test/Driver/Inputs/SYCL/lib/libsycl-asan-dg2.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/SYCL/lib/libsycl-asan-dg2.o b/clang/test/Driver/Inputs/SYCL/lib/libsycl-asan-dg2.o new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/SYCL/lib/libsycl-asan-pvc.bc b/clang/test/Driver/Inputs/SYCL/lib/libsycl-asan-pvc.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/SYCL/lib/libsycl-asan-pvc.o b/clang/test/Driver/Inputs/SYCL/lib/libsycl-asan-pvc.o new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/SYCL/lib/libsycl-asan.bc b/clang/test/Driver/Inputs/SYCL/lib/libsycl-asan.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/SYCL/lib/libsycl-asan.o b/clang/test/Driver/Inputs/SYCL/lib/libsycl-asan.o new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/sycl-device-lib-old-model.cpp b/clang/test/Driver/sycl-device-lib-old-model.cpp index cd3597ad20d74..ab8f900f6710d 100644 --- a/clang/test/Driver/sycl-device-lib-old-model.cpp +++ b/clang/test/Driver/sycl-device-lib-old-model.cpp @@ -196,7 +196,7 @@ // SYCL_LLVM_LINK_USER_ONLY_NEEDED: llvm-link{{.*}} "-only-needed" "{{.*}}" "-o" "{{.*}}.bc" "--suppress-warnings" /// ########################################################################### -/// test behavior of libsycl-sanitizer.o linking when -fsanitize=address is available +/// test behavior of libsycl-asan.bc linking when -fsanitize=address is available // RUN: %clangxx -fsycl --no-offload-new-driver %s --sysroot=%S/Inputs/SYCL -fsanitize=address -### 2>&1 \ // RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_SANITIZER // RUN: %clangxx -fsycl --no-offload-new-driver %s --sysroot=%S/Inputs/SYCL -Xsycl-target-frontend -fsanitize=address -### 2>&1 \ @@ -226,8 +226,94 @@ // SYCL_DEVICE_LIB_SANITIZER-SAME: "{{.*}}libsycl-fallback-imf.bc" // SYCL_DEVICE_LIB_SANITIZER-SAME: "{{.*}}libsycl-fallback-imf-fp64.bc" // SYCL_DEVICE_LIB_SANITIZER-SAME: "{{.*}}libsycl-fallback-imf-bf16.bc" -// SYCL_DEVICE_LIB_SANITIZER-SAME: "{{.*}}libsycl-sanitizer.bc" +// SYCL_DEVICE_LIB_SANITIZER-SAME: "{{.*}}libsycl-asan.bc" // SYCL_DEVICE_ASAN_MACRO: "-cc1" // SYCL_DEVICE_ASAN_MACRO-SAME: "USE_SYCL_DEVICE_ASAN" // SYCL_DEVICE_ASAN_MACRO: llvm-link{{.*}} "-only-needed" -// SYCL_DEVICE_ASAN_MACRO-SAME: "{{.*}}libsycl-sanitizer.bc" +// SYCL_DEVICE_ASAN_MACRO-SAME: "{{.*}}libsycl-asan.bc" + +/// ########################################################################### +/// test behavior of linking libsycl-asan-pvc for PVC target AOT compilation when asan flag is applied. +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc --no-offload-new-driver %s --sysroot=%S/Inputs/SYCL \ +// RUN: -Xarch_device -fsanitize=address -### 2>&1 | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_PVC +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device pvc" --no-offload-new-driver %s \ +// RUN: --sysroot=%S/Inputs/SYCL -Xarch_device -fsanitize=address -### 2>&1 | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_PVC +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend=spir64_gen "-device pvc" --no-offload-new-driver %s \ +// RUN: --sysroot=%S/Inputs/SYCL -Xarch_device -fsanitize=address -### 2>&1 | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_PVC +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device 12.60.7" --no-offload-new-driver %s \ +// RUN: --sysroot=%S/Inputs/SYCL -Xarch_device -fsanitize=address -### 2>&1 | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_PVC +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xs "-device 12.60.7" --no-offload-new-driver %s --sysroot=%S/Inputs/SYCL \ +// RUN: -Xarch_device -fsanitize=address -### 2>&1 | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_PVC +// SYCL_DEVICE_LIB_ASAN_PVC: llvm-link{{.*}} "{{.*}}libsycl-crt.bc" +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: "{{.*}}libsycl-complex.bc" +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: "{{.*}}libsycl-complex-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: "{{.*}}libsycl-cmath.bc" +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: "{{.*}}libsycl-cmath-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: "{{.*}}libsycl-imf.bc" +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: "{{.*}}libsycl-imf-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: "{{.*}}libsycl-imf-bf16.bc" +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: "{{.*}}libsycl-fallback-cassert.bc" +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: "{{.*}}libsycl-fallback-cstring.bc" +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: "{{.*}}libsycl-fallback-complex.bc" +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: "{{.*}}libsycl-fallback-complex-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: "{{.*}}libsycl-fallback-cmath.bc" +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: "{{.*}}libsycl-fallback-cmath-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: "{{.*}}libsycl-fallback-imf.bc" +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: "{{.*}}libsycl-fallback-imf-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: "{{.*}}libsycl-fallback-imf-bf16.bc" +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: "{{.*}}libsycl-asan-pvc.bc" + +/// ########################################################################### +/// test behavior of linking libsycl-asan-cpu for CPU target AOT compilation when asan flag is applied. +// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 --no-offload-new-driver %s --sysroot=%S/Inputs/SYCL \ +// RUN: -Xarch_device -fsanitize=address -### 2>&1 | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_CPU +// SYCL_DEVICE_LIB_ASAN_CPU: llvm-link{{.*}} "{{.*}}libsycl-crt.bc" +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: "{{.*}}libsycl-complex.bc" +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: "{{.*}}libsycl-complex-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: "{{.*}}libsycl-cmath.bc" +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: "{{.*}}libsycl-cmath-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: "{{.*}}libsycl-imf.bc" +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: "{{.*}}libsycl-imf-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: "{{.*}}libsycl-imf-bf16.bc" +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: "{{.*}}libsycl-fallback-cassert.bc" +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: "{{.*}}libsycl-fallback-cstring.bc" +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: "{{.*}}libsycl-fallback-complex.bc" +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: "{{.*}}libsycl-fallback-complex-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: "{{.*}}libsycl-fallback-cmath.bc" +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: "{{.*}}libsycl-fallback-cmath-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: "{{.*}}libsycl-fallback-imf.bc" +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: "{{.*}}libsycl-fallback-imf-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: "{{.*}}libsycl-fallback-imf-bf16.bc" +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: "{{.*}}libsycl-asan-cpu.bc" + +/// ########################################################################### +/// test behavior of linking libsycl-asan-dg2 for DG2 target AOT compilation when asan flag is applied. +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_dg2_g10 --no-offload-new-driver %s --sysroot=%S/Inputs/SYCL \ +// RUN: -Xarch_device -fsanitize=address -### 2>&1 | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_DG2 +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device dg2" --no-offload-new-driver %s \ +// RUN: --sysroot=%S/Inputs/SYCL -Xarch_device -fsanitize=address -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_DG2 +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend=spir64_gen "-device dg2" --no-offload-new-driver %s \ +// RUN: --sysroot=%S/Inputs/SYCL -Xarch_device -fsanitize=address -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_DG2 +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xs "-device dg2" --no-offload-new-driver %s \ +// RUN: --sysroot=%S/Inputs/SYCL -Xarch_device -fsanitize=address -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_DG2 +// SYCL_DEVICE_LIB_ASAN_DG2: llvm-link{{.*}} "{{.*}}libsycl-crt.bc" +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: "{{.*}}libsycl-complex.bc" +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: "{{.*}}libsycl-complex-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: "{{.*}}libsycl-cmath.bc" +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: "{{.*}}libsycl-cmath-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: "{{.*}}libsycl-imf.bc" +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: "{{.*}}libsycl-imf-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: "{{.*}}libsycl-imf-bf16.bc" +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: "{{.*}}libsycl-fallback-cassert.bc" +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: "{{.*}}libsycl-fallback-cstring.bc" +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: "{{.*}}libsycl-fallback-complex.bc" +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: "{{.*}}libsycl-fallback-complex-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: "{{.*}}libsycl-fallback-cmath.bc" +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: "{{.*}}libsycl-fallback-cmath-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: "{{.*}}libsycl-fallback-imf.bc" +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: "{{.*}}libsycl-fallback-imf-fp64.bc" +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: "{{.*}}libsycl-fallback-imf-bf16.bc" +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: "{{.*}}libsycl-asan-dg2.bc" diff --git a/clang/test/Driver/sycl-device-lib.cpp b/clang/test/Driver/sycl-device-lib.cpp index 197f7fc5e46d9..e84eaadc5405a 100644 --- a/clang/test/Driver/sycl-device-lib.cpp +++ b/clang/test/Driver/sycl-device-lib.cpp @@ -153,7 +153,7 @@ // SYCL_NO_DEVICE_LIB_INVALID_VALUE: error: unsupported argument '[[Val]]' to option '-fno-sycl-device-lib=' /// ########################################################################### -/// test behavior of libsycl-sanitizer.o linking when -fsanitize=address is available +/// test behavior of libsycl-asan.o linking when -fsanitize=address is available // RUN: %clangxx -fsycl --offload-new-driver %s --sysroot=%S/Inputs/SYCL -fsanitize=address -### 2>&1 \ // RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_SANITIZER // RUN: %clangxx -fsycl --offload-new-driver %s --sysroot=%S/Inputs/SYCL -Xsycl-target-frontend -fsanitize=address -### 2>&1 \ @@ -184,7 +184,133 @@ // SYCL_DEVICE_LIB_SANITIZER-SAME: {{.*}}libsycl-fallback-imf.new.o // SYCL_DEVICE_LIB_SANITIZER-SAME: {{.*}}libsycl-fallback-imf-fp64.new.o // SYCL_DEVICE_LIB_SANITIZER-SAME: {{.*}}libsycl-fallback-imf-bf16.new.o -// SYCL_DEVICE_LIB_SANITIZER-SAME: {{.*}}libsycl-sanitizer.new.o +// SYCL_DEVICE_LIB_SANITIZER-SAME: {{.*}}libsycl-asan.new.o // SYCL_DEVICE_ASAN_MACRO: "-cc1" // SYCL_DEVICE_ASAN_MACRO-SAME: "USE_SYCL_DEVICE_ASAN" -// SYCL_DEVICE_ASAN_MACRO: libsycl-sanitizer.new.o +// SYCL_DEVICE_ASAN_MACRO: libsycl-asan.new.o + + +/// ########################################################################### +/// test behavior of linking libsycl-asan-pvc for PVC target AOT compilation when asan flag is applied. +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc --offload-new-driver %s --sysroot=%S/Inputs/SYCL \ +// RUN: -Xarch_device -fsanitize=address -### 2>&1 | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_PVC +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen --offload-new-driver %s --sysroot=%S/Inputs/SYCL \ +// RUN: -Xarch_device -fsanitize=address -Xsycl-target-backend "-device pvc" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_PVC +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen --offload-new-driver %s --sysroot=%S/Inputs/SYCL \ +// RUN: -Xarch_device -fsanitize=address -Xsycl-target-backend=spir64_gen "-device pvc" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_PVC +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen --offload-new-driver %s --sysroot=%S/Inputs/SYCL \ +// RUN: -Xarch_device -fsanitize=address -Xsycl-target-backend "-device 12.60.7" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_PVC +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen --offload-new-driver %s --sysroot=%S/Inputs/SYCL \ +// RUN: -Xarch_device -fsanitize=address -Xs "-device pvc" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_PVC +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen --offload-new-driver %s --sysroot=%S/Inputs/SYCL \ +// RUN: -Xarch_device -fsanitize=address -Xs "-device 12.60.7" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_PVC +// SYCL_DEVICE_LIB_ASAN_PVC: clang-linker-wrapper{{.*}} "-sycl-device-libraries +// SYCL_DEVICE_LIB_ASAN_PVC: {{.*}}libsycl-crt.new.o +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: {{.*}}libsycl-complex. +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: {{.*}}libsycl-complex-fp64. +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: {{.*}}libsycl-cmath.new.o +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: {{.*}}libsycl-cmath-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: {{.*}}libsycl-imf.new.o +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: {{.*}}libsycl-imf-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: {{.*}}libsycl-imf-bf16.new.o +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: {{.*}}libsycl-fallback-cassert.new.o +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: {{.*}}libsycl-fallback-cstring.new.o +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: {{.*}}libsycl-fallback-complex.new.o +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: {{.*}}libsycl-fallback-complex-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: {{.*}}libsycl-fallback-cmath.new.o +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: {{.*}}libsycl-fallback-cmath-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: {{.*}}libsycl-fallback-imf.new.o +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: {{.*}}libsycl-fallback-imf-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: {{.*}}libsycl-fallback-imf-bf16.new.o +// SYCL_DEVICE_LIB_ASAN_PVC-SAME: {{.*}}libsycl-asan-pvc.new.o + +/// ########################################################################### +/// test behavior of linking libsycl-asan-cpu for CPU target AOT compilation when asan flag is applied. +// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 --offload-new-driver %s --sysroot=%S/Inputs/SYCL \ +// RUN: -Xarch_device -fsanitize=address -### 2>&1 | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_CPU +// SYCL_DEVICE_LIB_ASAN_CPU: clang-linker-wrapper{{.*}} "-sycl-device-libraries +// SYCL_DEVICE_LIB_ASAN_CPU: {{.*}}libsycl-crt.new.o +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: {{.*}}libsycl-complex. +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: {{.*}}libsycl-complex-fp64. +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: {{.*}}libsycl-cmath.new.o +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: {{.*}}libsycl-cmath-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: {{.*}}libsycl-imf.new.o +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: {{.*}}libsycl-imf-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: {{.*}}libsycl-imf-bf16.new.o +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: {{.*}}libsycl-fallback-cassert.new.o +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: {{.*}}libsycl-fallback-cstring.new.o +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: {{.*}}libsycl-fallback-complex.new.o +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: {{.*}}libsycl-fallback-complex-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: {{.*}}libsycl-fallback-cmath.new.o +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: {{.*}}libsycl-fallback-cmath-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: {{.*}}libsycl-fallback-imf.new.o +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: {{.*}}libsycl-fallback-imf-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: {{.*}}libsycl-fallback-imf-bf16.new.o +// SYCL_DEVICE_LIB_ASAN_CPU-SAME: {{.*}}libsycl-asan-cpu.new.o + +/// ########################################################################### +/// test behavior of linking libsycl-asan-dg2 for DG2 target AOT compilation when asan flag is applied. +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_dg2_g10 --offload-new-driver %s --sysroot=%S/Inputs/SYCL \ +// RUN: -Xarch_device -fsanitize=address -### 2>&1 | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_DG2 +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen --offload-new-driver %s --sysroot=%S/Inputs/SYCL \ +// RUN: -Xarch_device -fsanitize=address -Xsycl-target-backend "-device dg2" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_DG2 +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen --offload-new-driver %s --sysroot=%S/Inputs/SYCL \ +// RUN: -Xarch_device -fsanitize=address -Xsycl-target-backend=spir64_gen "-device dg2" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_DG2 +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen --offload-new-driver %s --sysroot=%S/Inputs/SYCL \ +// RUN: -Xarch_device -fsanitize=address -Xs "-device dg2" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_DG2 +// SYCL_DEVICE_LIB_ASAN_DG2: clang-linker-wrapper{{.*}} "-sycl-device-libraries +// SYCL_DEVICE_LIB_ASAN_DG2: {{.*}}libsycl-crt.new.o +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: {{.*}}libsycl-complex. +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: {{.*}}libsycl-complex-fp64. +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: {{.*}}libsycl-cmath.new.o +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: {{.*}}libsycl-cmath-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: {{.*}}libsycl-imf.new.o +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: {{.*}}libsycl-imf-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: {{.*}}libsycl-imf-bf16.new.o +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: {{.*}}libsycl-fallback-cassert.new.o +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: {{.*}}libsycl-fallback-cstring.new.o +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: {{.*}}libsycl-fallback-complex.new.o +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: {{.*}}libsycl-fallback-complex-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: {{.*}}libsycl-fallback-cmath.new.o +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: {{.*}}libsycl-fallback-cmath-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: {{.*}}libsycl-fallback-imf.new.o +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: {{.*}}libsycl-fallback-imf-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: {{.*}}libsycl-fallback-imf-bf16.new.o +// SYCL_DEVICE_LIB_ASAN_DG2-SAME: {{.*}}libsycl-asan-dg2.new.o + +/// ########################################################################### +/// test behavior of linking libsycl-asan for multiple targets AOT compilation +/// when asan flag is applied. +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen --offload-new-driver %s --sysroot=%S/Inputs/SYCL \ +// RUN: -Xarch_device -fsanitize=address -Xsycl-target-backend "-device pvc,dg2" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_MUL +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen --offload-new-driver %s --sysroot=%S/Inputs/SYCL \ +// RUN: -Xarch_device -fsanitize=address -Xsycl-target-backend=spir64_gen "-device pvc,dg2" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_ASAN_MUL +// SYCL_DEVICE_LIB_ASAN_MUL: clang-linker-wrapper{{.*}} "-sycl-device-libraries +// SYCL_DEVICE_LIB_ASAN_MUL: {{.*}}libsycl-crt.new.o +// SYCL_DEVICE_LIB_ASAN_MUL-SAME: {{.*}}libsycl-complex. +// SYCL_DEVICE_LIB_ASAN_MUL-SAME: {{.*}}libsycl-complex-fp64. +// SYCL_DEVICE_LIB_ASAN_MUL-SAME: {{.*}}libsycl-cmath.new.o +// SYCL_DEVICE_LIB_ASAN_MUL-SAME: {{.*}}libsycl-cmath-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_MUL-SAME: {{.*}}libsycl-imf.new.o +// SYCL_DEVICE_LIB_ASAN_MUL-SAME: {{.*}}libsycl-imf-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_MUL-SAME: {{.*}}libsycl-imf-bf16.new.o +// SYCL_DEVICE_LIB_ASAN_MUL-SAME: {{.*}}libsycl-fallback-cassert.new.o +// SYCL_DEVICE_LIB_ASAN_MUL-SAME: {{.*}}libsycl-fallback-cstring.new.o +// SYCL_DEVICE_LIB_ASAN_MUL-SAME: {{.*}}libsycl-fallback-complex.new.o +// SYCL_DEVICE_LIB_ASAN_MUL-SAME: {{.*}}libsycl-fallback-complex-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_MUL-SAME: {{.*}}libsycl-fallback-cmath.new.o +// SYCL_DEVICE_LIB_ASAN_MUL-SAME: {{.*}}libsycl-fallback-cmath-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_MUL-SAME: {{.*}}libsycl-fallback-imf.new.o +// SYCL_DEVICE_LIB_ASAN_MUL-SAME: {{.*}}libsycl-fallback-imf-fp64.new.o +// SYCL_DEVICE_LIB_ASAN_MUL-SAME: {{.*}}libsycl-fallback-imf-bf16.new.o +// SYCL_DEVICE_LIB_ASAN_MUL-SAME: {{.*}}libsycl-asan.new.o diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index 043ffc49e2fac..02da6b7283209 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -35,6 +35,18 @@ string(CONCAT sycl_targets_opt "spir64-unknown-unknown," "spirv64-unknown-unknown") +string(CONCAT sycl_pvc_target_opt + "-fsycl-targets=" + "intel_gpu_pvc") + +string(CONCAT sycl_cpu_target_opt + "-fsycl-targets=" + "spir64_x86_64-unknown-unknown") + +string(CONCAT sycl_dg2_target_opt + "-fsycl-targets=" + "spir64_gen-unknown-unknown") + set(compile_opts # suppress an error about SYCL_EXTERNAL being used for # a function with a raw pointer parameter. @@ -223,6 +235,55 @@ if (NOT MSVC AND UR_SANITIZER_INCLUDE_DIR) include/sanitizer_utils.hpp include/spir_global_var.hpp sycl-compiler) + + set(sanitizer_generic_compile_opts ${compile_opts} + -fno-sycl-instrument-device-code + -I${UR_SANITIZER_INCLUDE_DIR}) + + set(asan_pvc_compile_opts_obj -fsycl -c + ${sanitizer_generic_compile_opts} + ${sycl_pvc_target_opt} + -D__LIBDEVICE_PVC__) + + set(asan_cpu_compile_opts_obj -fsycl -c + ${sanitizer_generic_compile_opts} + ${sycl_cpu_target_opt} + -D__LIBDEVICE_CPU__) + + set(asan_dg2_compile_opts_obj -fsycl -c + ${sanitizer_generic_compile_opts} + ${sycl_dg2_target_opt} + -D__LIBDEVICE_DG2__) + + set(asan_pvc_compile_opts_bc ${bc_device_compile_opts} + ${sanitizer_generic_compile_opts} + -D__LIBDEVICE_PVC__) + + set(asan_cpu_compile_opts_bc ${bc_device_compile_opts} + ${sanitizer_generic_compile_opts} + -D__LIBDEVICE_CPU__) + + set(asan_dg2_compile_opts_bc ${bc_device_compile_opts} + ${sanitizer_generic_compile_opts} + -D__LIBDEVICE_DG2__) + + set(asan_pvc_compile_opts_obj-new-offload -fsycl -c --offload-new-driver + -foffload-lto=thin + ${sanitizer_generic_compile_opts} + ${sycl_pvc_target_opt} + -D__LIBDEVICE_PVC__) + + set(asan_cpu_compile_opts_obj-new-offload -fsycl -c --offload-new-driver + -foffload-lto=thin + ${sanitizer_generic_compile_opts} + ${sycl_cpu_target_opt} + -D__LIBDEVICE_CPU__) + + set(asan_dg2_compile_opts_obj-new-offload -fsycl -c --offload-new-driver + -foffload-lto=thin + ${sanitizer_generic_compile_opts} + ${sycl_dg2_target_opt} + -D__LIBDEVICE_DG2__) endif() if("native_cpu" IN_LIST SYCL_ENABLE_BACKENDS) @@ -285,10 +346,21 @@ if(MSVC) DEPENDENCIES ${cmath_obj_deps}) else() if(UR_SANITIZER_INCLUDE_DIR) - add_devicelibs(libsycl-sanitizer + add_devicelibs(libsycl-asan SRC sanitizer_utils.cpp DEPENDENCIES ${sanitizer_obj_deps} EXTRA_OPTS -fno-sycl-instrument-device-code -I${UR_SANITIZER_INCLUDE_DIR}) + set(asan_filetypes obj obj-new-offload bc) + set(asan_devicetypes pvc cpu dg2) + foreach(asan_ft IN LISTS asan_filetypes) + foreach(asan_device IN LISTS asan_devicetypes) + compile_lib_ext(libsycl-asan-${asan_device} + SRC sanitizer_utils.cpp + FILETYPE ${asan_ft} + DEPENDENCIES ${sanitizer_obj_deps} + OPTS ${asan_${asan_device}_compile_opts_${asan_ft}}) + endforeach() + endforeach() endif() endif() diff --git a/libdevice/fallback-complex-fp64.cpp b/libdevice/fallback-complex-fp64.cpp index 11803a1b72f83..28a5be8ab4a48 100644 --- a/libdevice/fallback-complex-fp64.cpp +++ b/libdevice/fallback-complex-fp64.cpp @@ -152,11 +152,10 @@ double __complex__ __devicelib_cexp(double __complex__ z) { } else if (__spirv_IsNan(z_real)) { if (z_imag == 0.0) return z; - else /* z_imag != 0.0 */ - return CMPLX(NAN, NAN); - } else if (__spirv_IsFinite(z_real)) { - if (__spirv_IsNan(z_imag) || __spirv_IsInf(z_imag)) - return CMPLX(NAN, NAN); + return CMPLX(NAN, NAN); + } else if (__spirv_IsFinite(z_real) && + (__spirv_IsNan(z_imag) || __spirv_IsInf(z_imag))) { + return CMPLX(NAN, NAN); } double __e = __spirv_ocl_exp(z_real); double ret_real = __e * __spirv_ocl_cos(z_imag); diff --git a/libdevice/fallback-complex.cpp b/libdevice/fallback-complex.cpp index e3f58b9eeb019..9f94195a3a407 100644 --- a/libdevice/fallback-complex.cpp +++ b/libdevice/fallback-complex.cpp @@ -141,27 +141,24 @@ DEVICE_EXTERN_C_INLINE float __complex__ __devicelib_cexpf(float __complex__ z) { float z_imag = __devicelib_cimagf(z); float z_real = __devicelib_crealf(z); + if (z_imag == 0) { + return CMPLXF(__spirv_ocl_exp(z_real), __spirv_ocl_copysign(0.f, z_imag)); + } + if (__spirv_IsInf(z_real)) { - if (z_real < 0.0f) { + if (z_real < 0.f) { if (!__spirv_IsFinite(z_imag)) z_imag = 1.0f; - } else if (z_imag == 0.0f || !__spirv_IsFinite(z_imag)) { + } else if (__spirv_IsNan(z_imag)) { + return z; + } else if (z_imag == 0.f || !__spirv_IsFinite(z_imag)) { if (__spirv_IsInf(z_imag)) - z_imag = NAN; - return CMPLXF(z_real, z_imag); + return CMPLXF(z_real, NAN); } - } else if (__spirv_IsNan(z_real) && (z_imag == 0.0f)) { - return z; } - float __e = __spirv_ocl_exp(z_real); - float ret_real = __e * __spirv_ocl_cos(z_imag); - float ret_imag = __e * __spirv_ocl_sin(z_imag); - if (__spirv_IsNan(ret_real)) - ret_real = 0.f; - if (__spirv_IsNan(ret_imag)) - ret_imag = 0.f; - return CMPLXF(ret_real, ret_imag); + float e = __spirv_ocl_exp(z_real); + return CMPLXF(e * __spirv_ocl_cos(z_imag), e * __spirv_ocl_sin(z_imag)); } DEVICE_EXTERN_C_INLINE diff --git a/libdevice/sanitizer_utils.cpp b/libdevice/sanitizer_utils.cpp index 097cd97a4f706..e71bdeea8b501 100644 --- a/libdevice/sanitizer_utils.cpp +++ b/libdevice/sanitizer_utils.cpp @@ -301,6 +301,13 @@ inline uptr MemToShadow_PVC(uptr addr, uint32_t as) { inline uptr MemToShadow(uptr addr, uint32_t as) { uptr shadow_ptr = 0; +#if defined(__LIBDEVICE_PVC__) + shadow_ptr = MemToShadow_PVC(addr, as); +#elif defined(__LIBDEVICE_CPU__) + shadow_ptr = MemToShadow_CPU(addr); +#elif defined(__LIBDEVICE_DG2__) + shadow_ptr = MemToShadow_DG2(addr, as); +#else auto launch_info = (__SYCL_GLOBAL__ const LaunchInfo *)__AsanLaunchInfo; if (launch_info->DeviceTy == DeviceType::CPU) { shadow_ptr = MemToShadow_CPU(addr); @@ -314,6 +321,7 @@ inline uptr MemToShadow(uptr addr, uint32_t as) { __asan_report_unknown_device(); return 0; } +#endif ASAN_DEBUG( if (shadow_ptr) { diff --git a/sycl-jit/CMakeLists.txt b/sycl-jit/CMakeLists.txt index 874856a63d363..b790455ceeed0 100644 --- a/sycl-jit/CMakeLists.txt +++ b/sycl-jit/CMakeLists.txt @@ -9,19 +9,23 @@ set(SYCL_JIT_BASE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) # directories, similar to how clang/CMakeLists.txt does it. set(LLVM_SPIRV_INCLUDE_DIRS "${LLVM_MAIN_SRC_DIR}/../llvm-spirv/include") -# Set library-wide warning options. -set(SYCL_JIT_WARNING_FLAGS -Wall -Wextra) +if (NOT WIN32 AND NOT CYGWIN) + # Set library-wide warning options. + set(SYCL_JIT_WARNING_FLAGS -Wall -Wextra) -option(SYCL_JIT_ENABLE_WERROR "Treat all warnings as errors in SYCL kernel JIT library" ON) -if(SYCL_JIT_ENABLE_WERROR) - list(APPEND SYCL_JIT_WARNING_FLAGS -Werror) -endif(SYCL_JIT_ENABLE_WERROR) + option(SYCL_JIT_ENABLE_WERROR "Treat all warnings as errors in SYCL kernel JIT library" ON) + if(SYCL_JIT_ENABLE_WERROR) + list(APPEND SYCL_JIT_WARNING_FLAGS -Werror) + endif(SYCL_JIT_ENABLE_WERROR) +endif() -if(WIN32) - message(WARNING "Kernel JIT not yet supported on Windows") -else(WIN32) - add_subdirectory(common) - add_subdirectory(jit-compiler) - add_subdirectory(passes) + +add_subdirectory(common) +add_subdirectory(jit-compiler) +add_subdirectory(passes) + +# Loadable plugins for opt aren't supported on Windows, +# so we can't execute the tests. +if (NOT WIN32 AND NOT CYGWIN) add_subdirectory(test) -endif(WIN32) +endif() diff --git a/sycl-jit/jit-compiler/CMakeLists.txt b/sycl-jit/jit-compiler/CMakeLists.txt index 09af2de6853ae..6dc5154486c6f 100644 --- a/sycl-jit/jit-compiler/CMakeLists.txt +++ b/sycl-jit/jit-compiler/CMakeLists.txt @@ -40,6 +40,10 @@ add_llvm_library(sycl-jit clangSerialization ) +if(WIN32) + target_link_libraries(sycl-jit PRIVATE Shlwapi) +endif() + target_compile_options(sycl-jit PRIVATE ${SYCL_JIT_WARNING_FLAGS}) # Mark LLVM and SPIR-V headers as system headers to ignore warnigns in them. diff --git a/sycl-jit/jit-compiler/include/KernelFusion.h b/sycl-jit/jit-compiler/include/KernelFusion.h index f149e05692627..d3575f33189aa 100644 --- a/sycl-jit/jit-compiler/include/KernelFusion.h +++ b/sycl-jit/jit-compiler/include/KernelFusion.h @@ -9,6 +9,12 @@ #ifndef SYCL_FUSION_JIT_COMPILER_KERNELFUSION_H #define SYCL_FUSION_JIT_COMPILER_KERNELFUSION_H +#ifdef _WIN32 +#define KF_EXPORT_SYMBOL __declspec(dllexport) +#else +#define KF_EXPORT_SYMBOL +#endif + #include "Kernel.h" #include "Options.h" #include "Parameter.h" @@ -55,25 +61,31 @@ extern "C" { #ifdef __clang__ #pragma clang diagnostic ignored "-Wreturn-type-c-linkage" #endif // __clang__ -JITResult fuseKernels(View KernelInformation, - const char *FusedKernelName, - View Identities, - BarrierFlags BarriersFlags, - View Internalization, - View JITConstants); -JITResult materializeSpecConstants(const char *KernelName, - jit_compiler::SYCLKernelBinaryInfo &BinInfo, - View SpecConstBlob); +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4190) +#endif // _MSC_VER + +KF_EXPORT_SYMBOL JITResult +fuseKernels(View KernelInformation, const char *FusedKernelName, + View Identities, BarrierFlags BarriersFlags, + View Internalization, + View JITConstants); + +KF_EXPORT_SYMBOL JITResult materializeSpecConstants( + const char *KernelName, jit_compiler::SYCLKernelBinaryInfo &BinInfo, + View SpecConstBlob); -JITResult compileSYCL(InMemoryFile SourceFile, View IncludeFiles, - View UserArgs); +KF_EXPORT_SYMBOL JITResult compileSYCL(InMemoryFile SourceFile, + View IncludeFiles, + View UserArgs); /// Clear all previously set options. -void resetJITConfiguration(); +KF_EXPORT_SYMBOL void resetJITConfiguration(); /// Add an option to the configuration. -void addToJITConfiguration(OptionStorage &&Opt); +KF_EXPORT_SYMBOL void addToJITConfiguration(OptionStorage &&Opt); } // end of extern "C" diff --git a/sycl-jit/jit-compiler/lib/KernelFusion.cpp b/sycl-jit/jit-compiler/lib/KernelFusion.cpp index 81037438061ae..86317c23e78de 100644 --- a/sycl-jit/jit-compiler/lib/KernelFusion.cpp +++ b/sycl-jit/jit-compiler/lib/KernelFusion.cpp @@ -71,10 +71,9 @@ static bool isTargetFormatSupported(BinaryFormat TargetFormat) { } } -extern "C" JITResult -materializeSpecConstants(const char *KernelName, - jit_compiler::SYCLKernelBinaryInfo &BinInfo, - View SpecConstBlob) { +extern "C" KF_EXPORT_SYMBOL JITResult materializeSpecConstants( + const char *KernelName, jit_compiler::SYCLKernelBinaryInfo &BinInfo, + View SpecConstBlob) { auto &JITCtx = JITContext::getInstance(); TargetInfo TargetInfo = ConfigHelper::get(); @@ -115,12 +114,11 @@ materializeSpecConstants(const char *KernelName, return JITResult{MaterializerKernelInfo}; } -extern "C" JITResult fuseKernels(View KernelInformation, - const char *FusedKernelName, - View Identities, - BarrierFlags BarriersFlags, - View Internalization, - View Constants) { +extern "C" KF_EXPORT_SYMBOL JITResult +fuseKernels(View KernelInformation, const char *FusedKernelName, + View Identities, BarrierFlags BarriersFlags, + View Internalization, + View Constants) { std::vector KernelsToFuse; llvm::transform(KernelInformation, std::back_inserter(KernelsToFuse), @@ -236,9 +234,9 @@ extern "C" JITResult fuseKernels(View KernelInformation, return JITResult{FusedKernelInfo}; } -extern "C" JITResult compileSYCL(InMemoryFile SourceFile, - View IncludeFiles, - View UserArgs) { +extern "C" KF_EXPORT_SYMBOL JITResult +compileSYCL(InMemoryFile SourceFile, View IncludeFiles, + View UserArgs) { auto ModuleOrErr = compileDeviceCode(SourceFile, IncludeFiles, UserArgs); if (!ModuleOrErr) { return errorToFusionResult(ModuleOrErr.takeError(), @@ -261,8 +259,10 @@ extern "C" JITResult compileSYCL(InMemoryFile SourceFile, return JITResult{Kernel}; } -extern "C" void resetJITConfiguration() { ConfigHelper::reset(); } +extern "C" KF_EXPORT_SYMBOL void resetJITConfiguration() { + ConfigHelper::reset(); +} -extern "C" void addToJITConfiguration(OptionStorage &&Opt) { +extern "C" KF_EXPORT_SYMBOL void addToJITConfiguration(OptionStorage &&Opt) { ConfigHelper::getConfig().set(std::move(Opt)); } diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index 6054cc5927eae..f694c8cd57136 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -20,6 +20,49 @@ static char X; // Dummy symbol, used as an anchor for `dlinfo` below. #endif +#ifdef _WIN32 +#include // For std::filesystem::path ( C++17 only ) +#include // For PathRemoveFileSpec +#include // For GetModuleFileName, HMODULE, DWORD, MAX_PATH + +// cribbed from sycl/source/detail/os_util.cpp +using OSModuleHandle = intptr_t; +static constexpr OSModuleHandle ExeModuleHandle = -1; +static OSModuleHandle getOSModuleHandle(const void *VirtAddr) { + HMODULE PhModule; + DWORD Flag = GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | + GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT; + auto LpModuleAddr = reinterpret_cast(VirtAddr); + if (!GetModuleHandleExA(Flag, LpModuleAddr, &PhModule)) { + // Expect the caller to check for zero and take + // necessary action + return 0; + } + if (PhModule == GetModuleHandleA(nullptr)) + return ExeModuleHandle; + return reinterpret_cast(PhModule); +} + +// cribbed from sycl/source/detail/os_util.cpp +/// Returns an absolute path where the object was found. +std::wstring getCurrentDSODir() { + wchar_t Path[MAX_PATH]; + auto Handle = getOSModuleHandle(reinterpret_cast(&getCurrentDSODir)); + DWORD Ret = GetModuleFileName( + reinterpret_cast(ExeModuleHandle == Handle ? 0 : Handle), Path, + MAX_PATH); + assert(Ret < MAX_PATH && "Path is longer than MAX_PATH?"); + assert(Ret > 0 && "GetModuleFileName failed"); + (void)Ret; + + BOOL RetCode = PathRemoveFileSpec(Path); + assert(RetCode && "PathRemoveFileSpec failed"); + (void)RetCode; + + return Path; +} +#endif // _WIN32 + static constexpr auto InvalidDPCPPRoot = ""; static const std::string &getDPCPPRoot() { @@ -42,6 +85,10 @@ static const std::string &getDPCPPRoot() { } #endif // _GNU_SOURCE +#ifdef _WIN32 + DPCPPRoot = std::filesystem::path(getCurrentDSODir()).parent_path().string(); +#endif // _WIN32 + // TODO: Implemenent other means of determining the DPCPP root, e.g. // evaluating the `CMPLR_ROOT` env. diff --git a/sycl-jit/passes/CMakeLists.txt b/sycl-jit/passes/CMakeLists.txt index 29e83d225d81b..b6cb30bd809f3 100644 --- a/sycl-jit/passes/CMakeLists.txt +++ b/sycl-jit/passes/CMakeLists.txt @@ -1,49 +1,54 @@ -# Module library for usage as library/pass-plugin with LLVM opt. -add_llvm_library(SYCLKernelJIT MODULE - SYCLFusionPasses.cpp - kernel-fusion/Builtins.cpp - kernel-fusion/SYCLKernelFusion.cpp - kernel-fusion/SYCLSpecConstMaterializer.cpp - kernel-info/SYCLKernelInfo.cpp - internalization/Internalization.cpp - syclcp/SYCLCP.cpp - cleanup/Cleanup.cpp - debug/PassDebug.cpp - target/TargetFusionInfo.cpp - - DEPENDS - intrinsics_gen -) +# See llvm/examples/Bye/CmakeLists.txt as to why this kind of loadable plugin libraries +# isn't supported on Windows. +if (NOT WIN32 AND NOT CYGWIN) + # Module library for usage as library/pass-plugin with LLVM opt. + add_llvm_library(SYCLKernelJIT MODULE + SYCLFusionPasses.cpp + kernel-fusion/Builtins.cpp + kernel-fusion/SYCLKernelFusion.cpp + kernel-fusion/SYCLSpecConstMaterializer.cpp + kernel-info/SYCLKernelInfo.cpp + internalization/Internalization.cpp + syclcp/SYCLCP.cpp + cleanup/Cleanup.cpp + debug/PassDebug.cpp + target/TargetFusionInfo.cpp + + DEPENDS + intrinsics_gen + ) + + target_compile_options(SYCLKernelJIT PRIVATE ${SYCL_JIT_WARNING_FLAGS}) + + # Mark LLVM headers as system headers to ignore warnigns in them. This + # classification remains intact even if the same path is added as a normal + # include path in GCC and Clang. + target_include_directories(SYCLKernelJIT + SYSTEM PRIVATE + ${LLVM_MAIN_INCLUDE_DIR} + ) + target_include_directories(SYCLKernelJIT + PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR} + PRIVATE + ${SYCL_JIT_BASE_DIR}/common/include + ) + + target_link_libraries(SYCLKernelJIT + PRIVATE + sycl-jit-common + ) + + add_dependencies(SYCLKernelJIT sycl-headers) + + if("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) + target_compile_definitions(SYCLKernelJIT PRIVATE JIT_SUPPORT_PTX) + endif() + + if("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD) + target_compile_definitions(SYCLKernelJIT PRIVATE JIT_SUPPORT_AMDGCN) + endif() -target_compile_options(SYCLKernelJIT PRIVATE ${SYCL_JIT_WARNING_FLAGS}) - -# Mark LLVM headers as system headers to ignore warnigns in them. This -# classification remains intact even if the same path is added as a normal -# include path in GCC and Clang. -target_include_directories(SYCLKernelJIT - SYSTEM PRIVATE - ${LLVM_MAIN_INCLUDE_DIR} -) -target_include_directories(SYCLKernelJIT - PUBLIC - ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE - ${SYCL_JIT_BASE_DIR}/common/include -) - -target_link_libraries(SYCLKernelJIT - PRIVATE - sycl-jit-common -) - -add_dependencies(SYCLKernelJIT sycl-headers) - -if("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) - target_compile_definitions(SYCLKernelJIT PRIVATE JIT_SUPPORT_PTX) -endif() - -if("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD) - target_compile_definitions(SYCLKernelJIT PRIVATE JIT_SUPPORT_AMDGCN) endif() # Static library for linking with the jit_compiler diff --git a/sycl-jit/passes/target/TargetFusionInfo.cpp b/sycl-jit/passes/target/TargetFusionInfo.cpp index af6589a1609ab..eacd339595432 100644 --- a/sycl-jit/passes/target/TargetFusionInfo.cpp +++ b/sycl-jit/passes/target/TargetFusionInfo.cpp @@ -356,9 +356,12 @@ class SPIRVTargetFusionInfo : public TargetFusionInfoImpl { Name = Name.drop_front(Name.find(SPIRVBuiltinPrefix) + SPIRVBuiltinPrefix.size()); // Check that Name does not start with any name in UnsafeBuiltIns - const auto *Iter = - std::upper_bound(UnsafeBuiltIns.begin(), UnsafeBuiltIns.end(), Name); - return Iter == UnsafeBuiltIns.begin() || !Name.starts_with(*(Iter - 1)); + for (const StringRef &Unsafe : UnsafeBuiltIns) { + if (Name.starts_with(Unsafe)) { + return false; + } + } + return true; } unsigned getIndexSpaceBuiltinBitwidth() const override { return 64; } diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index 385c70a04679f..a29bfc6310e39 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -30,11 +30,6 @@ endif() # Option to enable JIT, this in turn makes kernel fusion and spec constant # materialization possible. option(SYCL_ENABLE_EXTENSION_JIT "Enable extension to JIT kernels" ON) -if(SYCL_ENABLE_EXTENSION_JIT AND WIN32) - message(WARNING "Extension to JIT kernels not yet supported on Windows") - set(SYCL_ENABLE_EXTENSION_JIT OFF CACHE - BOOL "Extension to JIT kernels not yet supported on Windows" FORCE) -endif() if (NOT XPTI_INCLUDES) set(XPTI_INCLUDES ${CMAKE_CURRENT_SOURCE_DIR}/../xpti/include) diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index 15985fb0cc0b0..a4c494620fc92 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -1,7 +1,7 @@ -# commit 3edf99755ce2af3b53102a7d8438e0fe969efac3 -# Merge: 5955bad3 0b968661 -# Author: Ross Brunton -# Date: Wed Nov 6 11:07:29 2024 +0000 -# Merge pull request #2082 from RossBrunton/ross/multiadapt -# [CI] Add "loader" support to conformance testing -set(UNIFIED_RUNTIME_TAG 3edf99755ce2af3b53102a7d8438e0fe969efac3) +# commit 2eae687a4cf24ba02ee8e9ebb9552c1d392972ee +# Merge: 1ba7f39a 2a081891 +# Author: Callum Fare +# Date: Mon Nov 11 13:35:24 2024 +0000 +# Merge pull request #2112 from martygrant/martin/context-cts-spec-gap +# Improvements to align CTS and Spec for Context +set(UNIFIED_RUNTIME_TAG 2eae687a4cf24ba02ee8e9ebb9552c1d392972ee) diff --git a/sycl/doc/design/CommandGraph.md b/sycl/doc/design/CommandGraph.md index 2f83d42a3c57c..23d32fd9ff7e2 100644 --- a/sycl/doc/design/CommandGraph.md +++ b/sycl/doc/design/CommandGraph.md @@ -282,6 +282,29 @@ requirements for these new accessors to correctly trigger allocations before updating. This is similar to how individual graph commands are enqueued when accessors are used in a graph node. +### Dynamic Command-Group + +To implement the `dynamic_command_group` class for updating the command-groups (CG) +associated with nodes, the CG member of the node implementation class changes +from a `std::unique_ptr` to a `std::shared_ptr` so that multiple nodes and the +`dynamic_command_group_impl` object can share the same CG object. This avoids +the overhead of having to allocate and free copies of the CG when a new active +CG is selected. + +The `dynamic_command_group_impl` class contains a list of weak pointers to the +nodes which have been created with it, so that when a new active CG is selected +it can propagate the change to those nodes. The `dynamic_parameter_impl` class +also contains a list of weak pointers, but to the `dynamic_command_group_impl` +instances of any dynamic command-groups where they are used. This allows +updating the dynamic parameter to propagate to dynamic command-group nodes. + +The `sycl::detail::CGExecKernel` class has been added to, so that if the +object was created from an element in the dynamic command-group list, the class +stores a vector of weak pointers to the other alternative command-groups created +from the same dynamic command-group object. This allows the SYCL runtime to +access the list of alternative kernels when calling the UR API to append a +kernel command to a command-buffer. + ## Optimizations ### Interactions with Profiling diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_enqueue_functions.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_enqueue_functions.asciidoc index ad8d6a7f50194..933a6aabd2bd4 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_enqueue_functions.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_enqueue_functions.asciidoc @@ -716,6 +716,34 @@ optimize such partial barriers. _{endnote}_] |==== +==== Command Graph + +The functions in this section are only available if the +link:./sycl_ext_oneapi_graph.asciidoc[ + sycl_ext_oneapi_graph] extension is supported. + +|==== +a| +[frame=all,grid=none] +!==== +a! +[source,c++] +---- +namespace sycl::ext::oneapi::experimental { + +void execute_graph(sycl::queue q, command_graph &g); + +void execute_graph(sycl::handler &h, command_graph &g); + +} +---- +!==== +_Constraints_: Device and context associated with queue need to be identical +to device and context provided at command graph creation. + +_Effects_: Submits an executable command graph to the `sycl::queue` or `sycl::handler`. + +|==== == Issues diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc index 56f09c04d3055..5dff0396f07fb 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc @@ -1975,7 +1975,9 @@ Removing this restriction is something we may look at for future revisions of The command submission functions defined in link:../experimental/sycl_ext_oneapi_enqueue_functions.asciidoc[sycl_ext_oneapi_enqueue_functions] -can be used to add nodes to a graph when creating a graph from queue recording. +can be used adding nodes to a graph when creating a graph from queue recording. +New methods are also defined that enable submitting an executable graph, +e.g. directly to a queue without returning an event. == Examples and Usage Guide diff --git a/sycl/include/sycl/access/access.hpp b/sycl/include/sycl/access/access.hpp index ad95479e9d885..e5c8670e7fda8 100644 --- a/sycl/include/sycl/access/access.hpp +++ b/sycl/include/sycl/access/access.hpp @@ -60,12 +60,7 @@ enum class address_space : int { generic_space = 6, // TODO generic_space address space is not supported yet }; -enum class decorated : int { - no = 0, - yes = 1, - legacy __SYCL2020_DEPRECATED("sycl::access::decorated::legacy " - "is deprecated since SYCL 2020") = 2 -}; +enum class decorated : int { no = 0, yes = 1, legacy = 2 }; } // namespace access using access::target; diff --git a/sycl/include/sycl/ext/intel/esimd/memory_properties.hpp b/sycl/include/sycl/ext/intel/esimd/memory_properties.hpp index c5d57e6b496fa..b25f24bdc829c 100644 --- a/sycl/include/sycl/ext/intel/esimd/memory_properties.hpp +++ b/sycl/include/sycl/ext/intel/esimd/memory_properties.hpp @@ -289,22 +289,34 @@ namespace ext::oneapi::experimental { template <__ESIMD_NS::cache_hint Hint> struct property_value<__ESIMD_NS::cache_hint_L1_key, - std::integral_constant<__ESIMD_NS::cache_hint, Hint>> { - using key_t = __ESIMD_NS::cache_hint_L1_key; + std::integral_constant<__ESIMD_NS::cache_hint, Hint>> + : detail::property_base< + property_value<__ESIMD_NS::cache_hint_L1_key, + std::integral_constant<__ESIMD_NS::cache_hint, Hint>>, + oneapi::experimental::detail::PropKind::ESIMDL1CacheHint, + __ESIMD_NS::cache_hint_L1_key> { static constexpr __ESIMD_NS::cache_level level = __ESIMD_NS::cache_level::L1; static constexpr __ESIMD_NS::cache_hint hint = Hint; }; template <__ESIMD_NS::cache_hint Hint> struct property_value<__ESIMD_NS::cache_hint_L2_key, - std::integral_constant<__ESIMD_NS::cache_hint, Hint>> { - using key_t = __ESIMD_NS::cache_hint_L2_key; + std::integral_constant<__ESIMD_NS::cache_hint, Hint>> + : detail::property_base< + property_value<__ESIMD_NS::cache_hint_L2_key, + std::integral_constant<__ESIMD_NS::cache_hint, Hint>>, + oneapi::experimental::detail::PropKind::ESIMDL2CacheHint, + __ESIMD_NS::cache_hint_L2_key> { static constexpr __ESIMD_NS::cache_level level = __ESIMD_NS::cache_level::L2; static constexpr __ESIMD_NS::cache_hint hint = Hint; }; template <__ESIMD_NS::cache_hint Hint> struct property_value<__ESIMD_NS::cache_hint_L3_key, - std::integral_constant<__ESIMD_NS::cache_hint, Hint>> { - using key_t = __ESIMD_NS::cache_hint_L3_key; + std::integral_constant<__ESIMD_NS::cache_hint, Hint>> + : detail::property_base< + property_value<__ESIMD_NS::cache_hint_L3_key, + std::integral_constant<__ESIMD_NS::cache_hint, Hint>>, + oneapi::experimental::detail::PropKind::ESIMDL3CacheHint, + __ESIMD_NS::cache_hint_L3_key> { static constexpr __ESIMD_NS::cache_level level = __ESIMD_NS::cache_level::L3; static constexpr __ESIMD_NS::cache_hint hint = Hint; }; diff --git a/sycl/include/sycl/ext/intel/experimental/kernel_execution_properties.hpp b/sycl/include/sycl/ext/intel/experimental/kernel_execution_properties.hpp index e65a39c072110..a88839b313504 100644 --- a/sycl/include/sycl/ext/intel/experimental/kernel_execution_properties.hpp +++ b/sycl/include/sycl/ext/intel/experimental/kernel_execution_properties.hpp @@ -26,8 +26,9 @@ inline constexpr cache_config_enum large_slm = inline constexpr cache_config_enum large_data = cache_config_enum::large_data; -struct cache_config : oneapi::experimental::detail::run_time_property_key< - oneapi::experimental::detail::PropKind::CacheConfig> { +struct cache_config + : oneapi::experimental::detail::run_time_property_key< + cache_config, oneapi::experimental::detail::PropKind::CacheConfig> { cache_config(cache_config_enum v) : value(v) {} cache_config_enum value; }; diff --git a/sycl/include/sycl/ext/oneapi/experimental/cluster_group_prop.hpp b/sycl/include/sycl/ext/oneapi/experimental/cluster_group_prop.hpp index 31487beffd810..f035a431ca2a1 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/cluster_group_prop.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/cluster_group_prop.hpp @@ -19,6 +19,7 @@ namespace cuda { template struct cluster_size : ::sycl::ext::oneapi::experimental::detail::run_time_property_key< + cluster_size, ::sycl::ext::oneapi::experimental::detail::ClusterLaunch> { cluster_size(const range &size) : size(size) {} sycl::range get_cluster_size() { return size; } diff --git a/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp b/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp index b3c758aaa891d..7ecf5ce4c8b14 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -383,6 +384,17 @@ inline void partial_barrier(queue Q, const std::vector &Events, submit(Q, [&](handler &CGH) { partial_barrier(CGH, Events); }, CodeLoc); } +inline void execute_graph(queue Q, command_graph &G, + const sycl::detail::code_location &CodeLoc = + sycl::detail::code_location::current()) { + Q.ext_oneapi_graph(G, CodeLoc); +} + +inline void execute_graph(handler &CGH, + command_graph &G) { + CGH.ext_oneapi_graph(G); +} + } // namespace ext::oneapi::experimental } // namespace _V1 } // namespace sycl diff --git a/sycl/include/sycl/ext/oneapi/experimental/graph.hpp b/sycl/include/sycl/ext/oneapi/experimental/graph.hpp index d18cf3ebc4b3d..2bc3ef1d921ab 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/graph.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/graph.hpp @@ -96,6 +96,7 @@ class node_impl; class graph_impl; class exec_graph_impl; class dynamic_parameter_impl; +class dynamic_command_group_impl; } // namespace detail enum class node_type { @@ -216,6 +217,23 @@ class depends_on_all_leaves : public ::sycl::detail::DataLessProperty< } // namespace node } // namespace property +class __SYCL_EXPORT dynamic_command_group { +public: + dynamic_command_group( + const command_graph &Graph, + const std::vector> &CGFList); + + size_t get_active_cgf() const; + void set_active_cgf(size_t Index); + +private: + template + friend const decltype(Obj::impl) & + sycl::detail::getSyclObjImpl(const Obj &SyclObject); + + std::shared_ptr impl; +}; + namespace detail { // Templateless modifiable command-graph base class. class __SYCL_EXPORT modifiable_command_graph { @@ -337,6 +355,12 @@ class __SYCL_EXPORT modifiable_command_graph { modifiable_command_graph(const std::shared_ptr &Impl) : impl(Impl) {} + /// Template-less implementation of add() for dynamic command-group nodes. + /// @param DynCGF Dynamic Command-group function object to add. + /// @param Dep List of predecessor nodes. + /// @return Node added to the graph. + node addImpl(dynamic_command_group &DynCGF, const std::vector &Dep); + /// Template-less implementation of add() for CGF nodes. /// @param CGF Command-group function to add. /// @param Dep List of predecessor nodes. diff --git a/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp b/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp index 70055c6680b74..9f91607456dd6 100644 --- a/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp @@ -60,13 +60,15 @@ struct device_has_key std::integral_constant...>; }; -struct nd_range_kernel_key { +struct nd_range_kernel_key + : detail::compile_time_property_key { template using value_t = property_value>; }; -struct single_task_kernel_key { +struct single_task_kernel_key + : detail::compile_time_property_key { using value_t = property_value; }; @@ -87,15 +89,18 @@ struct max_linear_work_group_size_key template struct property_value, - std::integral_constant...> { + std::integral_constant...> + : detail::property_base< + property_value, + std::integral_constant...>, + detail::PropKind::WorkGroupSize, work_group_size_key> { static_assert( sizeof...(Dims) + 1 <= 3, "work_group_size property currently only supports up to three values."); static_assert(detail::AllNonZero::value, "work_group_size property must only contain non-zero values."); - using key_t = work_group_size_key; - constexpr size_t operator[](int Dim) const { return std::array{Dim0, Dims...}[Dim]; } @@ -104,7 +109,12 @@ struct property_value, template struct property_value, - std::integral_constant...> { + std::integral_constant...> + : detail::property_base< + property_value, + std::integral_constant...>, + detail::PropKind::WorkGroupSizeHint, work_group_size_hint_key> { static_assert(sizeof...(Dims) + 1 <= 3, "work_group_size_hint property currently " "only supports up to three values."); @@ -112,8 +122,6 @@ struct property_value::value, "work_group_size_hint property must only contain non-zero values."); - using key_t = work_group_size_hint_key; - constexpr size_t operator[](int Dim) const { return std::array{Dim0, Dims...}[Dim]; } @@ -121,41 +129,57 @@ struct property_value struct property_value> { + std::integral_constant> + : detail::property_base< + property_value>, + detail::PropKind::SubGroupSize, sub_group_size_key> { static_assert(Size != 0, "sub_group_size_key property must contain a non-zero value."); - using key_t = sub_group_size_key; using value_t = std::integral_constant; static constexpr uint32_t value = Size; }; template struct property_value...> { - using key_t = device_has_key; + std::integral_constant...> + : detail::property_base< + property_value...>, + detail::PropKind::DeviceHas, device_has_key> { static constexpr std::array value{Aspects...}; }; template -struct property_value> { +struct property_value> + : detail::property_base>, + detail::PropKind::NDRangeKernel, + nd_range_kernel_key> { static_assert( Dims >= 1 && Dims <= 3, "nd_range_kernel_key property must use dimension of 1, 2 or 3."); - using key_t = nd_range_kernel_key; using value_t = int; static constexpr int dimensions = Dims; }; -template <> struct property_value { - using key_t = single_task_kernel_key; -}; +template <> +struct property_value + : detail::property_base, + detail::PropKind::SingleTaskKernel, + single_task_kernel_key> {}; template struct property_value, - std::integral_constant...> { + std::integral_constant...> + : detail::property_base< + property_value, + std::integral_constant...>, + detail::PropKind::MaxWorkGroupSize, max_work_group_size_key> { static_assert(sizeof...(Dims) + 1 <= 3, "max_work_group_size property currently " "only supports up to three values."); @@ -163,16 +187,16 @@ struct property_value::value, "max_work_group_size property must only contain non-zero values."); - using key_t = max_work_group_size_key; - constexpr size_t operator[](int Dim) const { return std::array{Dim0, Dims...}[Dim]; } }; -template <> struct property_value { - using key_t = max_linear_work_group_size_key; -}; +template <> +struct property_value + : detail::property_base, + detail::PropKind::MaxLinearWorkGroupSize, + max_linear_work_group_size_key> {}; template inline constexpr work_group_size_key::value_t work_group_size; @@ -235,8 +259,13 @@ template , - std::integral_constant> { - using key_t = work_group_progress_key; + std::integral_constant> + : detail::property_base< + property_value< + work_group_progress_key, + std::integral_constant, + std::integral_constant>, + detail::PropKind::WorkGroupProgress, work_group_progress_key> { static constexpr forward_progress_guarantee guarantee = Guarantee; static constexpr execution_scope coordinationScope = CoordinationScope; }; @@ -246,8 +275,13 @@ template , - std::integral_constant> { - using key_t = work_group_progress_key; + std::integral_constant> + : detail::property_base< + property_value< + sub_group_progress_key, + std::integral_constant, + std::integral_constant>, + detail::PropKind::SubGroupProgress, sub_group_progress_key> { static constexpr forward_progress_guarantee guarantee = Guarantee; static constexpr execution_scope coordinationScope = CoordinationScope; }; @@ -257,8 +291,13 @@ template , - std::integral_constant> { - using key_t = work_group_progress_key; + std::integral_constant> + : detail::property_base< + property_value< + work_item_progress_key, + std::integral_constant, + std::integral_constant>, + detail::PropKind::WorkItemProgress, work_item_progress_key> { static constexpr forward_progress_guarantee guarantee = Guarantee; static constexpr execution_scope coordinationScope = CoordinationScope; }; diff --git a/sycl/include/sycl/ext/oneapi/latency_control/properties.hpp b/sycl/include/sycl/ext/oneapi/latency_control/properties.hpp index 0716720a3ab36..ef7a5e7f6b704 100644 --- a/sycl/include/sycl/ext/oneapi/latency_control/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/latency_control/properties.hpp @@ -58,8 +58,15 @@ struct property_value< intel::experimental::latency_constraint_key, std::integral_constant, std::integral_constant, - std::integral_constant> { - using key_t = intel::experimental::latency_constraint_key; + std::integral_constant> + : detail::property_base< + property_value, + std::integral_constant< + intel::experimental::latency_control_type, Type>, + std::integral_constant>, + oneapi::experimental::detail::PropKind::LatencyConstraint, + intel::experimental::latency_constraint_key> { static constexpr int target = Target; static constexpr intel::experimental::latency_control_type type = Type; static constexpr int cycle = Cycle; diff --git a/sycl/include/sycl/ext/oneapi/properties/properties.hpp b/sycl/include/sycl/ext/oneapi/properties/properties.hpp index 1c93e00dbe880..b7db9715cad4d 100644 --- a/sycl/include/sycl/ext/oneapi/properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/properties.hpp @@ -200,6 +200,10 @@ template class properties { static_assert(NumContainedProps == sizeof...(PropertyValueTs), "One or more property argument is not a property in the " "property list."); + // We're in process of refactoring properties infrastructure, make sure that + // any newly added properties use `detail::property_base`! + static_assert( + (std::is_base_of_v && ...)); } template diff --git a/sycl/include/sycl/ext/oneapi/properties/property.hpp b/sycl/include/sycl/ext/oneapi/properties/property.hpp index 0b3e0c610c30f..41a59337440b8 100644 --- a/sycl/include/sycl/ext/oneapi/properties/property.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/property.hpp @@ -94,7 +94,7 @@ enum PropKind : uint32_t { namespace sycl::ext::oneapi::experimental { // (2.) -struct foo : detail::run_time_property_key { +struct foo : detail::run_time_property_key { foo(int v) : value(v) {} int value; }; @@ -215,10 +215,35 @@ enum PropKind : uint32_t { PropKindSize = 79, }; +template struct PropertyToKind { + static constexpr PropKind Kind = PropertyT::Kind; +}; + +struct property_tag {}; +template +struct property_base : property_tag { + using key_t = property_key_t; +#if !defined(_MSC_VER) + // Temporary, to ensure new code matches previous behavior and to catch any + // silly copy-paste mistakes. MSVC can't compile it, but linux-only is enough + // for this temporary check. + static_assert([]() constexpr { + if constexpr (std::is_same_v) + // key_t is incomplete at this point for runtime properties. + return true; + else + return Kind == PropertyToKind::Kind; + }()); +#endif +}; + struct property_key_base_tag {}; struct compile_time_property_key_base_tag : property_key_base_tag {}; -template struct run_time_property_key : property_key_base_tag { +template +struct run_time_property_key : property_key_base_tag, + property_base { protected: static constexpr PropKind Kind = Kind_; @@ -235,12 +260,6 @@ struct compile_time_property_key : compile_time_property_key_base_tag { friend struct PropertyToKind; }; -// This trait must be specialized for all properties and must have a unique -// constexpr PropKind member named Kind. -template struct PropertyToKind { - static constexpr PropKind Kind = PropertyT::Kind; -}; - // Get unique ID for property. template struct PropertyID { static constexpr int value = diff --git a/sycl/include/sycl/ext/oneapi/properties/property_value.hpp b/sycl/include/sycl/ext/oneapi/properties/property_value.hpp index dc7d13145677d..629ae794a0b1c 100644 --- a/sycl/include/sycl/ext/oneapi/properties/property_value.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/property_value.hpp @@ -41,9 +41,11 @@ struct PropertyValueBase : public detail::SingleNontypePropertyValueBase { } // namespace detail template -struct property_value : public detail::PropertyValueBase { - using key_t = PropertyT; -}; +struct property_value + : public detail::PropertyValueBase, + public detail::property_base, + detail::PropertyToKind::Kind, + PropertyT> {}; template constexpr std::enable_if_t::value, diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 8abb4323ab3e1..4e8f62d53c36d 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -1175,7 +1175,6 @@ class __SYCL_EXPORT handler { StoreLambda( std::move(Wrapper)); setType(detail::CGType::Kernel); - setNDRangeUsed(false); #endif } else #endif // !__SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ && @@ -1198,7 +1197,6 @@ class __SYCL_EXPORT handler { StoreLambda( std::move(KernelFunc)); setType(detail::CGType::Kernel); - setNDRangeUsed(false); #endif #else (void)KernelFunc; @@ -1249,7 +1247,6 @@ class __SYCL_EXPORT handler { StoreLambda( std::move(KernelFunc)); setType(detail::CGType::Kernel); - setNDRangeUsed(true); #endif } @@ -1272,7 +1269,6 @@ class __SYCL_EXPORT handler { setNDRangeDescriptor(std::move(NumWorkItems)); processLaunchProperties(Props); setType(detail::CGType::Kernel); - setNDRangeUsed(false); extractArgsAndReqs(); MKernelName = getKernelName(); #endif @@ -1298,7 +1294,6 @@ class __SYCL_EXPORT handler { setNDRangeDescriptor(std::move(NDRange)); processLaunchProperties(Props); setType(detail::CGType::Kernel); - setNDRangeUsed(true); extractArgsAndReqs(); MKernelName = getKernelName(); #endif @@ -1339,7 +1334,6 @@ class __SYCL_EXPORT handler { setNDRangeDescriptor(NumWorkGroups, /*SetNumWorkGroups=*/true); StoreLambda(std::move(KernelFunc)); setType(detail::CGType::Kernel); - setNDRangeUsed(false); #endif // __SYCL_DEVICE_ONLY__ } @@ -1971,7 +1965,6 @@ class __SYCL_EXPORT handler { StoreLambda( std::move(KernelFunc)); setType(detail::CGType::Kernel); - setNDRangeUsed(false); #endif } @@ -2069,7 +2062,6 @@ class __SYCL_EXPORT handler { detail::checkValueRange(NumWorkItems, WorkItemOffset); setNDRangeDescriptor(std::move(NumWorkItems), std::move(WorkItemOffset)); setType(detail::CGType::Kernel); - setNDRangeUsed(false); extractArgsAndReqs(); MKernelName = getKernelName(); #endif @@ -2148,7 +2140,6 @@ class __SYCL_EXPORT handler { setNDRangeDescriptor(std::move(NumWorkItems)); MKernel = detail::getSyclObjImpl(std::move(Kernel)); setType(detail::CGType::Kernel); - setNDRangeUsed(false); if (!lambdaAndKernelHaveEqualName()) { extractArgsAndReqs(); MKernelName = getKernelName(); @@ -2189,7 +2180,6 @@ class __SYCL_EXPORT handler { setNDRangeDescriptor(std::move(NumWorkItems), std::move(WorkItemOffset)); MKernel = detail::getSyclObjImpl(std::move(Kernel)); setType(detail::CGType::Kernel); - setNDRangeUsed(false); if (!lambdaAndKernelHaveEqualName()) { extractArgsAndReqs(); MKernelName = getKernelName(); @@ -2229,7 +2219,6 @@ class __SYCL_EXPORT handler { setNDRangeDescriptor(std::move(NDRange)); MKernel = detail::getSyclObjImpl(std::move(Kernel)); setType(detail::CGType::Kernel); - setNDRangeUsed(true); if (!lambdaAndKernelHaveEqualName()) { extractArgsAndReqs(); MKernelName = getKernelName(); @@ -3357,6 +3346,7 @@ class __SYCL_EXPORT handler { size_t Size, bool Block = false); friend class ext::oneapi::experimental::detail::graph_impl; friend class ext::oneapi::experimental::detail::dynamic_parameter_impl; + friend class ext::oneapi::experimental::detail::dynamic_command_group_impl; bool DisableRangeRounding(); @@ -3626,8 +3616,10 @@ class __SYCL_EXPORT handler { } #endif +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES // Set that an ND Range was used during a call to parallel_for void setNDRangeUsed(bool Value); +#endif inline void internalProfilingTagImpl() { throwIfActionIsCreated(); diff --git a/sycl/include/sycl/kernel_bundle.hpp b/sycl/include/sycl/kernel_bundle.hpp index 1237bc0651b40..73c336d3d131e 100644 --- a/sycl/include/sycl/kernel_bundle.hpp +++ b/sycl/include/sycl/kernel_bundle.hpp @@ -901,7 +901,8 @@ struct build_source_bundle_props; // PropertyT syclex::include_files ///////////////////////// struct include_files - : detail::run_time_property_key { + : detail::run_time_property_key { include_files(); include_files(const std::string &name, const std::string &content) { record.emplace_back(std::make_pair(name, content)); @@ -922,7 +923,8 @@ struct is_property_key_of { + : detail::run_time_property_key { std::vector opts; build_options(const std::string &optsArg) : opts{optsArg} {} build_options(const std::vector &optsArg) : opts(optsArg) {} @@ -936,7 +938,8 @@ struct is_property_key_of ///////////////////////// // PropertyT syclex::save_log ///////////////////////// -struct save_log : detail::run_time_property_key { +struct save_log + : detail::run_time_property_key { std::string *log; save_log(std::string *logArg) : log(logArg) {} }; @@ -950,7 +953,8 @@ struct is_property_key_of // PropertyT syclex::registered_kernel_names ///////////////////////// struct registered_kernel_names - : detail::run_time_property_key { + : detail::run_time_property_key { std::vector kernel_names; registered_kernel_names() {} registered_kernel_names(const std::string &knArg) : kernel_names{knArg} {} diff --git a/sycl/include/sycl/multi_ptr.hpp b/sycl/include/sycl/multi_ptr.hpp index e84e1094d2ecb..b1946fb68afc4 100644 --- a/sycl/include/sycl/multi_ptr.hpp +++ b/sycl/include/sycl/multi_ptr.hpp @@ -747,11 +747,8 @@ inline constexpr auto decoration_space = Space; } // namespace detail // Legacy specialization of multi_ptr. -// TODO: Add deprecation warning here when possible. template -class __SYCL2020_DEPRECATED( - "decorated::legacy multi_ptr specialization is deprecated since SYCL 2020.") - multi_ptr { +class multi_ptr { static constexpr auto DecorationSpace = detail::decoration_space; public: @@ -1101,11 +1098,8 @@ class __SYCL2020_DEPRECATED( }; // Legacy specialization of multi_ptr for void. -// TODO: Add deprecation warning here when possible. template -class __SYCL2020_DEPRECATED( - "decorated::legacy multi_ptr specialization is deprecated since SYCL 2020.") - multi_ptr { +class multi_ptr { static constexpr auto DecorationSpace = detail::decoration_space; public: @@ -1264,11 +1258,8 @@ class __SYCL2020_DEPRECATED( }; // Legacy specialization of multi_ptr for const void. -// TODO: Add deprecation warning here when possible. template -class __SYCL2020_DEPRECATED( - "decorated::legacy multi_ptr specialization is deprecated since SYCL 2020.") - multi_ptr { +class multi_ptr { static constexpr auto DecorationSpace = detail::decoration_space; public: diff --git a/sycl/include/sycl/pointers.hpp b/sycl/include/sycl/pointers.hpp index c74a8e104ab82..cd082b9f79904 100644 --- a/sycl/include/sycl/pointers.hpp +++ b/sycl/include/sycl/pointers.hpp @@ -47,6 +47,11 @@ using private_ptr = // The interface exposes non-decorated pointer while keeping the // address space information internally. +template +using raw_generic_ptr = + multi_ptr; + template using raw_global_ptr = multi_ptr +using decorated_generic_ptr = + multi_ptr; + template using decorated_global_ptr = multi_ptr> MStreams; std::vector> MAuxiliaryResources; + /// Used to implement ext_oneapi_graph dynamic_command_group. Stores the list + /// of command-groups that a kernel command can be updated to. + std::vector> MAlternativeKernels; ur_kernel_cache_config_t MKernelCacheConfig; bool MKernelIsCooperative = false; bool MKernelUsesClusterLaunch = false; @@ -277,7 +280,7 @@ class CGExecKernel : public CG { MKernelBundle(std::move(KernelBundle)), MArgs(std::move(Args)), MKernelName(std::move(KernelName)), MStreams(std::move(Streams)), MAuxiliaryResources(std::move(AuxiliaryResources)), - MKernelCacheConfig(std::move(KernelCacheConfig)), + MAlternativeKernels{}, MKernelCacheConfig(std::move(KernelCacheConfig)), MKernelIsCooperative(KernelIsCooperative), MKernelUsesClusterLaunch(MKernelUsesClusterLaunch) { assert(getType() == CGType::Kernel && "Wrong type of exec kernel CG."); diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index e725fc4ce0c82..861ec2a883601 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -352,11 +352,76 @@ void graph_impl::removeRoot(const std::shared_ptr &Root) { MRoots.erase(Root); } -std::shared_ptr -graph_impl::add(const std::vector> &Dep) { - // Copy deps so we can modify them - auto Deps = Dep; +std::set> graph_impl::getCGEdges( + const std::shared_ptr &CommandGroup) const { + const auto &Requirements = CommandGroup->getRequirements(); + if (!MAllowBuffers && Requirements.size()) { + throw sycl::exception(make_error_code(errc::invalid), + "Cannot use buffers in a graph without passing the " + "assume_buffer_outlives_graph property on " + "Graph construction."); + } + + if (CommandGroup->getType() == sycl::detail::CGType::Kernel) { + auto CGKernel = + static_cast(CommandGroup.get()); + if (CGKernel->hasStreams()) { + throw sycl::exception( + make_error_code(errc::invalid), + "Using sycl streams in a graph node is unsupported."); + } + } + + // Add any nodes specified by event dependencies into the dependency list + std::set> UniqueDeps; + for (auto &Dep : CommandGroup->getEvents()) { + if (auto NodeImpl = MEventsMap.find(Dep); NodeImpl == MEventsMap.end()) { + throw sycl::exception(sycl::make_error_code(errc::invalid), + "Event dependency from handler::depends_on does " + "not correspond to a node within the graph"); + } else { + UniqueDeps.insert(NodeImpl->second); + } + } + + // A unique set of dependencies obtained by checking requirements and events + for (auto &Req : Requirements) { + // Look through the graph for nodes which share this requirement + for (auto &Node : MNodeStorage) { + if (Node->hasRequirementDependency(Req)) { + bool ShouldAddDep = true; + // If any of this node's successors have this requirement then we skip + // adding the current node as a dependency. + for (auto &Succ : Node->MSuccessors) { + if (Succ.lock()->hasRequirementDependency(Req)) { + ShouldAddDep = false; + break; + } + } + if (ShouldAddDep) { + UniqueDeps.insert(Node); + } + } + } + } + + return UniqueDeps; +} + +void graph_impl::markCGMemObjs( + const std::shared_ptr &CommandGroup) { + const auto &Requirements = CommandGroup->getRequirements(); + for (auto &Req : Requirements) { + auto MemObj = static_cast(Req->MSYCLMemObj); + bool WasInserted = MMemObjs.insert(MemObj).second; + if (WasInserted) { + MemObj->markBeingUsedInGraph(); + } + } +} +std::shared_ptr +graph_impl::add(std::vector> &Deps) { const std::shared_ptr &NodeImpl = std::make_shared(); MNodeStorage.push_back(NodeImpl); @@ -370,7 +435,7 @@ graph_impl::add(const std::vector> &Dep) { std::shared_ptr graph_impl::add(std::function CGF, const std::vector &Args, - const std::vector> &Dep) { + std::vector> &Deps) { (void)Args; sycl::handler Handler{shared_from_this()}; @@ -401,8 +466,8 @@ graph_impl::add(std::function CGF, Handler.getType()); auto NodeImpl = - this->add(NodeType, std::move(Handler.impl->MGraphNodeCG), Dep); - NodeImpl->MNDRangeUsed = Handler.impl->MNDRangeUsed; + this->add(NodeType, std::move(Handler.impl->MGraphNodeCG), Deps); + // Add an event associated with this explicit node for mixed usage addEventForNode(std::make_shared(), NodeImpl); @@ -444,67 +509,15 @@ graph_impl::add(const std::vector Events) { std::shared_ptr graph_impl::add(node_type NodeType, - std::unique_ptr CommandGroup, - const std::vector> &Dep) { - // Copy deps so we can modify them - auto Deps = Dep; + std::shared_ptr CommandGroup, + std::vector> &Deps) { // A unique set of dependencies obtained by checking requirements and events - std::set> UniqueDeps; - const auto &Requirements = CommandGroup->getRequirements(); - if (!MAllowBuffers && Requirements.size()) { - throw sycl::exception(make_error_code(errc::invalid), - "Cannot use buffers in a graph without passing the " - "assume_buffer_outlives_graph property on " - "Graph construction."); - } + std::set> UniqueDeps = getCGEdges(CommandGroup); - if (CommandGroup->getType() == sycl::detail::CGType::Kernel) { - auto CGKernel = - static_cast(CommandGroup.get()); - if (CGKernel->hasStreams()) { - throw sycl::exception( - make_error_code(errc::invalid), - "Using sycl streams in a graph node is unsupported."); - } - } + // Track and mark the memory objects being used by the graph. + markCGMemObjs(CommandGroup); - for (auto &Req : Requirements) { - // Track and mark the memory objects being used by the graph. - auto MemObj = static_cast(Req->MSYCLMemObj); - bool WasInserted = MMemObjs.insert(MemObj).second; - if (WasInserted) { - MemObj->markBeingUsedInGraph(); - } - // Look through the graph for nodes which share this requirement - for (auto &Node : MNodeStorage) { - if (Node->hasRequirementDependency(Req)) { - bool ShouldAddDep = true; - // If any of this node's successors have this requirement then we skip - // adding the current node as a dependency. - for (auto &Succ : Node->MSuccessors) { - if (Succ.lock()->hasRequirementDependency(Req)) { - ShouldAddDep = false; - break; - } - } - if (ShouldAddDep) { - UniqueDeps.insert(Node); - } - } - } - } - - // Add any nodes specified by event dependencies into the dependency list - for (auto &Dep : CommandGroup->getEvents()) { - if (auto NodeImpl = MEventsMap.find(Dep); NodeImpl != MEventsMap.end()) { - UniqueDeps.insert(NodeImpl->second); - } else { - throw sycl::exception(sycl::make_error_code(errc::invalid), - "Event dependency from handler::depends_on does " - "not correspond to a node within the graph"); - } - } // Add any deps determined from requirements and events into the dependency // list Deps.insert(Deps.end(), UniqueDeps.begin(), UniqueDeps.end()); @@ -518,6 +531,41 @@ graph_impl::add(node_type NodeType, return NodeImpl; } +std::shared_ptr +graph_impl::add(std::shared_ptr &DynCGImpl, + std::vector> &Deps) { + // Set of Dependent nodes based on CG event and accessor dependencies. + std::set> DynCGDeps = + getCGEdges(DynCGImpl->MKernels[0]); + for (unsigned i = 1; i < DynCGImpl->getNumCGs(); i++) { + auto &CG = DynCGImpl->MKernels[i]; + auto CGEdges = getCGEdges(CG); + if (CGEdges != DynCGDeps) { + throw sycl::exception(make_error_code(sycl::errc::invalid), + "Command-groups in dynamic command-group don't have" + "equivalent dependencies to other graph nodes."); + } + } + + // Track and mark the memory objects being used by the graph. + for (auto &CG : DynCGImpl->MKernels) { + markCGMemObjs(CG); + } + + // Get active dynamic command-group CG and use to create a node object + const auto &ActiveKernel = DynCGImpl->getActiveKernel(); + std::shared_ptr NodeImpl = + add(node_type::kernel, ActiveKernel, Deps); + + // Add an event associated with this explicit node for mixed usage + addEventForNode(std::make_shared(), NodeImpl); + + // Track the dynamic command-group used inside the node object + DynCGImpl->MNodes.push_back(NodeImpl); + + return NodeImpl; +} + bool graph_impl::clearQueues() { bool AnyQueuesCleared = false; for (auto &Queue : MRecordingQueues) { @@ -1515,7 +1563,7 @@ void exec_graph_impl::updateImpl(std::shared_ptr Node) { } } - UpdateDesc.hNewKernel = nullptr; + UpdateDesc.hNewKernel = UrKernel; UpdateDesc.numNewMemObjArgs = MemobjDescs.size(); UpdateDesc.pNewMemObjArgList = MemobjDescs.data(); UpdateDesc.numNewPointerArgs = PtrDescs.size(); @@ -1568,6 +1616,27 @@ modifiable_command_graph::modifiable_command_graph( : impl(std::make_shared( SyclQueue.get_context(), SyclQueue.get_device(), PropList)) {} +node modifiable_command_graph::addImpl(dynamic_command_group &DynCGF, + const std::vector &Deps) { + impl->throwIfGraphRecordingQueue("Explicit API \"Add()\" function"); + auto DynCGFImpl = sycl::detail::getSyclObjImpl(DynCGF); + + if (DynCGFImpl->MGraph != impl) { + throw sycl::exception(make_error_code(sycl::errc::invalid), + "Graph does not match the graph associated with " + "dynamic command-group."); + } + + std::vector> DepImpls; + for (auto &D : Deps) { + DepImpls.push_back(sycl::detail::getSyclObjImpl(D)); + } + + graph_impl::WriteLock Lock(impl->MMutex); + std::shared_ptr NodeImpl = impl->add(DynCGFImpl, DepImpls); + return sycl::detail::createSyclObjFromImpl(NodeImpl); +} + node modifiable_command_graph::addImpl(const std::vector &Deps) { impl->throwIfGraphRecordingQueue("Explicit API \"Add()\" function"); std::vector> DepImpls; @@ -1775,6 +1844,190 @@ void dynamic_parameter_base::updateAccessor( impl->updateAccessor(Acc); } +void dynamic_parameter_impl::updateValue(const raw_kernel_arg *NewRawValue, + size_t Size) { + // Number of bytes is taken from member of raw_kernel_arg object rather + // than using the size parameter which represents sizeof(raw_kernel_arg). + std::ignore = Size; + size_t RawArgSize = NewRawValue->MArgSize; + const void *RawArgData = NewRawValue->MArgData; + + updateValue(RawArgData, RawArgSize); +} + +void dynamic_parameter_impl::updateValue(const void *NewValue, size_t Size) { + for (auto &[NodeWeak, ArgIndex] : MNodes) { + auto NodeShared = NodeWeak.lock(); + if (NodeShared) { + dynamic_parameter_impl::updateCGArgValue(NodeShared->MCommandGroup, + ArgIndex, NewValue, Size); + } + } + + for (auto &DynCGInfo : MDynCGs) { + auto DynCG = DynCGInfo.DynCG.lock(); + if (DynCG) { + auto &CG = DynCG->MKernels[DynCGInfo.CGIndex]; + dynamic_parameter_impl::updateCGArgValue(CG, DynCGInfo.ArgIndex, NewValue, + Size); + } + } + + std::memcpy(MValueStorage.data(), NewValue, Size); +} + +void dynamic_parameter_impl::updateAccessor( + const sycl::detail::AccessorBaseHost *Acc) { + for (auto &[NodeWeak, ArgIndex] : MNodes) { + auto NodeShared = NodeWeak.lock(); + // Should we fail here if the node isn't alive anymore? + if (NodeShared) { + dynamic_parameter_impl::updateCGAccessor(NodeShared->MCommandGroup, + ArgIndex, Acc); + } + } + + for (auto &DynCGInfo : MDynCGs) { + auto DynCG = DynCGInfo.DynCG.lock(); + if (DynCG) { + auto &CG = DynCG->MKernels[DynCGInfo.CGIndex]; + dynamic_parameter_impl::updateCGAccessor(CG, DynCGInfo.ArgIndex, Acc); + } + } + + std::memcpy(MValueStorage.data(), Acc, + sizeof(sycl::detail::AccessorBaseHost)); +} + +void dynamic_parameter_impl::updateCGArgValue( + std::shared_ptr CG, int ArgIndex, const void *NewValue, + size_t Size) { + auto &Args = static_cast(CG.get())->MArgs; + for (auto &Arg : Args) { + if (Arg.MIndex != ArgIndex) { + continue; + } + assert(Arg.MSize == static_cast(Size)); + // MPtr may be a pointer into arg storage so we memcpy the contents of + // NewValue rather than assign it directly + std::memcpy(Arg.MPtr, NewValue, Size); + break; + } +} + +void dynamic_parameter_impl::updateCGAccessor( + std::shared_ptr CG, int ArgIndex, + const sycl::detail::AccessorBaseHost *Acc) { + auto &Args = static_cast(CG.get())->MArgs; + + auto NewAccImpl = sycl::detail::getSyclObjImpl(*Acc); + for (auto &Arg : Args) { + if (Arg.MIndex != ArgIndex) { + continue; + } + assert(Arg.MType == sycl::detail::kernel_param_kind_t::kind_accessor); + + // Find old accessor in accessor storage and replace with new one + if (static_cast(NewAccImpl->MSYCLMemObj) + ->needsWriteBack()) { + throw sycl::exception( + make_error_code(errc::invalid), + "Accessors to buffers which have write_back enabled " + "are not allowed to be used in command graphs."); + } + + // All accessors passed to this function will be placeholders, so we must + // perform steps similar to what happens when handler::require() is + // called here. + sycl::detail::Requirement *NewReq = NewAccImpl.get(); + if (NewReq->MAccessMode != sycl::access_mode::read) { + auto SYCLMemObj = + static_cast(NewReq->MSYCLMemObj); + SYCLMemObj->handleWriteAccessorCreation(); + } + + for (auto &Acc : CG->getAccStorage()) { + if (auto OldAcc = static_cast(Arg.MPtr); + Acc.get() == OldAcc) { + Acc = NewAccImpl; + } + } + + for (auto &Req : CG->getRequirements()) { + if (auto OldReq = static_cast(Arg.MPtr); + Req == OldReq) { + Req = NewReq; + } + } + Arg.MPtr = NewAccImpl.get(); + break; + } +} + +dynamic_command_group_impl::dynamic_command_group_impl( + const command_graph &Graph) + : MGraph{sycl::detail::getSyclObjImpl(Graph)}, MActiveCGF(0) {} + +void dynamic_command_group_impl::finalizeCGFList( + const std::vector> &CGFList) { + for (size_t CGFIndex = 0; CGFIndex < CGFList.size(); CGFIndex++) { + const auto &CGF = CGFList[CGFIndex]; + // Handler defined inside the loop so it doesn't appear to the runtime + // as a single command-group with multiple commands inside. + sycl::handler Handler{MGraph}; + CGF(Handler); + + if (Handler.getType() != sycl::detail::CGType::Kernel) { + throw sycl::exception( + make_error_code(errc::invalid), + "The only type of command-groups that can be used in " + "dynamic command-groups is kernels."); + } + + Handler.finalize(); + + // Take unique_ptr object from handler and convert to + // shared_ptr to store + sycl::detail::CG *RawCGPtr = Handler.impl->MGraphNodeCG.release(); + auto RawCGExecPtr = static_cast(RawCGPtr); + auto CGExecSP = std::shared_ptr(RawCGExecPtr); + MKernels.push_back(CGExecSP); + + // Track dynamic_parameter usage in command-list + auto &DynamicParams = Handler.impl->MDynamicParameters; + for (auto &[DynamicParam, ArgIndex] : DynamicParams) { + DynamicParam->registerDynCG(shared_from_this(), CGFIndex, ArgIndex); + } + } + + // For each CGExecKernel store the list of alternative kernels, not + // including itself. + using CGExecKernelSP = std::shared_ptr; + using CGExecKernelWP = std::weak_ptr; + for (auto KernelCG : MKernels) { + std::vector Alternatives; + std::copy_if( + MKernels.begin(), MKernels.end(), std::back_inserter(Alternatives), + [&KernelCG](const CGExecKernelSP &K) { return K != KernelCG; }); + + KernelCG->MAlternativeKernels = std::move(Alternatives); + } +} + +void dynamic_command_group_impl::setActiveIndex(size_t Index) { + if (Index >= getNumCGs()) { + throw sycl::exception(sycl::make_error_code(errc::invalid), + "Index is out of range."); + } + MActiveCGF = Index; + + // Update nodes using the dynamic command-group to use the new active CG + for (auto &Node : MNodes) { + if (auto NodeSP = Node.lock()) { + NodeSP->MCommandGroup = getActiveKernel(); + } + } +} } // namespace detail node_type node::get_type() const { return impl->MNodeType; } @@ -1813,6 +2066,25 @@ template <> __SYCL_EXPORT void node::update_range<2>(range<2> Range) { template <> __SYCL_EXPORT void node::update_range<3>(range<3> Range) { impl->updateRange(Range); } + +dynamic_command_group::dynamic_command_group( + const command_graph &Graph, + const std::vector> &CGFList) + : impl(std::make_shared(Graph)) { + if (CGFList.empty()) { + throw sycl::exception(sycl::make_error_code(errc::invalid), + "Dynamic command-group cannot be created with an " + "empty CGF list."); + } + impl->finalizeCGFList(CGFList); +} + +size_t dynamic_command_group::get_active_cgf() const { + return impl->getActiveIndex(); +} +void dynamic_command_group::set_active_cgf(size_t Index) { + return impl->setActiveIndex(Index); +} } // namespace experimental } // namespace oneapi } // namespace ext diff --git a/sycl/source/detail/graph_impl.hpp b/sycl/source/detail/graph_impl.hpp index 4ee34830f39a2..6144e3f51b9da 100644 --- a/sycl/source/detail/graph_impl.hpp +++ b/sycl/source/detail/graph_impl.hpp @@ -95,7 +95,7 @@ class node_impl : public std::enable_shared_from_this { /// User facing type of the node. node_type MNodeType = node_type::empty; /// Command group object which stores all args etc needed to enqueue the node - std::unique_ptr MCommandGroup; + std::shared_ptr MCommandGroup; /// Stores the executable graph impl associated with this node if it is a /// subgraph node. std::shared_ptr MSubGraphImpl; @@ -108,9 +108,6 @@ class node_impl : public std::enable_shared_from_this { /// cannot be used to find out the partion of a node outside of this process. int MPartitionNum = -1; - /// Track whether an ND-Range was used for kernel nodes - bool MNDRangeUsed = false; - /// Add successor to the node. /// @param Node Node to add as a successor. void registerSuccessor(const std::shared_ptr &Node) { @@ -143,10 +140,9 @@ class node_impl : public std::enable_shared_from_this { /// @param NodeType Type of the command-group. /// @param CommandGroup The CG which stores the command information for this /// node. - node_impl(node_type NodeType, - std::unique_ptr &&CommandGroup) + node_impl(node_type NodeType, std::shared_ptr CommandGroup) : MCGType(CommandGroup->getType()), MNodeType(NodeType), - MCommandGroup(std::move(CommandGroup)) { + MCommandGroup(CommandGroup) { if (NodeType == node_type::subgraph) { MSubGraphImpl = static_cast(MCommandGroup.get()) @@ -405,75 +401,6 @@ class node_impl : public std::enable_shared_from_this { return (ReqSrc->MDims > 1) || (ReqDst->MDims > 1); } - /// Update the value of an accessor inside this node. Accessors must be - /// handled specifically compared to other argument values. - /// @param ArgIndex The index of the accessor arg to be updated - /// @param Acc Pointer to the new accessor value - void updateAccessor(int ArgIndex, const sycl::detail::AccessorBaseHost *Acc) { - auto &Args = - static_cast(MCommandGroup.get())->MArgs; - auto NewAccImpl = sycl::detail::getSyclObjImpl(*Acc); - for (auto &Arg : Args) { - if (Arg.MIndex != ArgIndex) { - continue; - } - assert(Arg.MType == sycl::detail::kernel_param_kind_t::kind_accessor); - - // Find old accessor in accessor storage and replace with new one - if (static_cast(NewAccImpl->MSYCLMemObj) - ->needsWriteBack()) { - throw sycl::exception( - make_error_code(errc::invalid), - "Accessors to buffers which have write_back enabled " - "are not allowed to be used in command graphs."); - } - - // All accessors passed to this function will be placeholders, so we must - // perform steps similar to what happens when handler::require() is - // called here. - sycl::detail::Requirement *NewReq = NewAccImpl.get(); - if (NewReq->MAccessMode != sycl::access_mode::read) { - auto SYCLMemObj = - static_cast(NewReq->MSYCLMemObj); - SYCLMemObj->handleWriteAccessorCreation(); - } - - for (auto &Acc : MCommandGroup->getAccStorage()) { - if (auto OldAcc = - static_cast(Arg.MPtr); - Acc.get() == OldAcc) { - Acc = NewAccImpl; - } - } - - for (auto &Req : MCommandGroup->getRequirements()) { - if (auto OldReq = - static_cast(Arg.MPtr); - Req == OldReq) { - Req = NewReq; - } - } - Arg.MPtr = NewAccImpl.get(); - break; - } - } - - void updateArgValue(int ArgIndex, const void *NewValue, size_t Size) { - - auto &Args = - static_cast(MCommandGroup.get())->MArgs; - for (auto &Arg : Args) { - if (Arg.MIndex != ArgIndex) { - continue; - } - assert(Arg.MSize == static_cast(Size)); - // MPtr may be a pointer into arg storage so we memcpy the contents of - // NewValue rather than assign it directly - std::memcpy(Arg.MPtr, NewValue, Size); - break; - } - } - template void updateNDRange(nd_range ExecutionRange) { if (MCGType != sycl::detail::CGType::Kernel) { @@ -481,11 +408,6 @@ class node_impl : public std::enable_shared_from_this { sycl::errc::invalid, "Cannot update execution range of nodes which are not kernel nodes"); } - if (!MNDRangeUsed) { - throw sycl::exception(sycl::errc::invalid, - "Cannot update node which was created with a " - "sycl::range with a sycl::nd_range"); - } auto &NDRDesc = static_cast(MCommandGroup.get()) @@ -495,7 +417,7 @@ class node_impl : public std::enable_shared_from_this { throw sycl::exception(sycl::errc::invalid, "Cannot update execution range of a node with an " "execution range of different dimensions than what " - "the node was originall created with."); + "the node was originally created with."); } NDRDesc = sycl::detail::NDRDescT{ExecutionRange}; @@ -507,11 +429,6 @@ class node_impl : public std::enable_shared_from_this { sycl::errc::invalid, "Cannot update execution range of nodes which are not kernel nodes"); } - if (MNDRangeUsed) { - throw sycl::exception(sycl::errc::invalid, - "Cannot update node which was created with a " - "sycl::nd_range with a sycl::range"); - } auto &NDRDesc = static_cast(MCommandGroup.get()) @@ -521,7 +438,7 @@ class node_impl : public std::enable_shared_from_this { throw sycl::exception(sycl::errc::invalid, "Cannot update execution range of a node with an " "execution range of different dimensions than what " - "the node was originall created with."); + "the node was originally created with."); } NDRDesc = sycl::detail::NDRDescT{ExecutionRange}; @@ -535,6 +452,7 @@ class node_impl : public std::enable_shared_from_this { ExecCG->MArgs = OtherExecCG->MArgs; ExecCG->MNDRDesc = OtherExecCG->MNDRDesc; + ExecCG->MKernelName = OtherExecCG->MKernelName; ExecCG->getAccStorage() = OtherExecCG->getAccStorage(); ExecCG->getRequirements() = OtherExecCG->getRequirements(); @@ -888,30 +806,40 @@ class graph_impl : public std::enable_shared_from_this { /// @param Root Node to remove from list of root nodes. void removeRoot(const std::shared_ptr &Root); + /// Verifies the CG is valid to add to the graph and returns set of + /// dependent nodes if so. + /// @param CommandGroup The command group to verify and retrieve edges for. + /// @return Set of dependent nodes in the graph. + std::set> + getCGEdges(const std::shared_ptr &CommandGroup) const; + + /// Identifies the sycl buffers used in the command-group and marks them + /// as used in the graph. + /// @param CommandGroup The command-group to check for buffer usage in. + void markCGMemObjs(const std::shared_ptr &CommandGroup); + /// Create a kernel node in the graph. /// @param NodeType User facing type of the node. /// @param CommandGroup The CG which stores all information for this node. - /// @param Dep Dependencies of the created node. + /// @param Deps Dependencies of the created node. /// @return Created node in the graph. - std::shared_ptr - add(node_type NodeType, std::unique_ptr CommandGroup, - const std::vector> &Dep = {}); + std::shared_ptr add(node_type NodeType, + std::shared_ptr CommandGroup, + std::vector> &Deps); /// Create a CGF node in the graph. /// @param CGF Command-group function to create node with. /// @param Args Node arguments. - /// @param Dep Dependencies of the created node. + /// @param Deps Dependencies of the created node. /// @return Created node in the graph. - std::shared_ptr - add(std::function CGF, - const std::vector &Args, - const std::vector> &Dep = {}); + std::shared_ptr add(std::function CGF, + const std::vector &Args, + std::vector> &Deps); /// Create an empty node in the graph. - /// @param Dep List of predecessor nodes. + /// @param Deps List of predecessor nodes. /// @return Created node in the graph. - std::shared_ptr - add(const std::vector> &Dep = {}); + std::shared_ptr add(std::vector> &Deps); /// Create an empty node in the graph. /// @param Events List of events associated to this node. @@ -919,6 +847,14 @@ class graph_impl : public std::enable_shared_from_this { std::shared_ptr add(const std::vector Events); + /// Create a dynamic command-group node in the graph. + /// @param DynCGImpl Dynamic command-group used to create node. + /// @param Deps List of predecessor nodes. + /// @return Created node in the graph. + std::shared_ptr + add(std::shared_ptr &DynCGImpl, + std::vector> &Deps); + /// Add a queue to the set of queues which are currently recording to this /// graph. /// @param RecordingQueue Queue to add to set. @@ -1236,7 +1172,7 @@ class graph_impl : public std::enable_shared_from_this { /// @param Node The node to add deps for /// @param Deps List of dependent nodes void addDepsToNode(std::shared_ptr Node, - const std::vector> &Deps) { + std::vector> &Deps) { if (!Deps.empty()) { for (auto &N : Deps) { N->registerSuccessor(Node); @@ -1520,65 +1456,110 @@ class dynamic_parameter_impl { MNodes.emplace_back(NodeImpl, ArgIndex); } + /// Struct detailing an instance of the usage of the dynamic parameter in a + /// dynamic CG. + struct DynamicCGInfo { + /// Dynamic command-group that uses this dynamic parameter. + std::weak_ptr DynCG; + /// Index of the CG in the Dynamic CG that uses this dynamic parameter. + size_t CGIndex; + /// The arg index in the kernel the dynamic parameter is used. + int ArgIndex; + }; + + /// Registers a dynamic command-group with this dynamic parameter. + /// @param DynCG The dynamic command-group to register. + /// @param CGIndex Index of the CG in DynCG using this dynamic parameter. + /// @param ArgIndex The arg index in the kernel the dynamic parameter is used. + void registerDynCG(std::shared_ptr DynCG, + size_t CGIndex, int ArgIndex) { + MDynCGs.emplace_back(DynamicCGInfo{DynCG, CGIndex, ArgIndex}); + } + /// Get a pointer to the internal value of this dynamic parameter void *getValue() { return MValueStorage.data(); } /// Update sycl_ext_oneapi_raw_kernel_arg parameter /// @param NewRawValue Pointer to a raw_kernel_arg object. /// @param Size Parameter is ignored. - void updateValue(const raw_kernel_arg *NewRawValue, size_t Size) { - // Number of bytes is taken from member of raw_kernel_arg object rather - // than using the size parameter which represents sizeof(raw_kernel_arg). - std::ignore = Size; - size_t RawArgSize = NewRawValue->MArgSize; - const void *RawArgData = NewRawValue->MArgData; - - for (auto &[NodeWeak, ArgIndex] : MNodes) { - auto NodeShared = NodeWeak.lock(); - if (NodeShared) { - NodeShared->updateArgValue(ArgIndex, RawArgData, RawArgSize); - } - } - std::memcpy(MValueStorage.data(), RawArgData, RawArgSize); - } + void updateValue(const raw_kernel_arg *NewRawValue, size_t Size); /// Update the internal value of this dynamic parameter as well as the value - /// of this parameter in all registered nodes. + /// of this parameter in all registered nodes and dynamic CGs. /// @param NewValue Pointer to the new value /// @param Size Size of the data pointer to by NewValue - void updateValue(const void *NewValue, size_t Size) { - for (auto &[NodeWeak, ArgIndex] : MNodes) { - auto NodeShared = NodeWeak.lock(); - if (NodeShared) { - NodeShared->updateArgValue(ArgIndex, NewValue, Size); - } - } - std::memcpy(MValueStorage.data(), NewValue, Size); - } + void updateValue(const void *NewValue, size_t Size); /// Update the internal value of this dynamic parameter as well as the value - /// of this parameter in all registered nodes. Should only be called for - /// accessor dynamic_parameters. + /// of this parameter in all registered nodes and dynamic CGs. Should only be + /// called for accessor dynamic_parameters. /// @param Acc The new accessor value - void updateAccessor(const sycl::detail::AccessorBaseHost *Acc) { - for (auto &[NodeWeak, ArgIndex] : MNodes) { - auto NodeShared = NodeWeak.lock(); - // Should we fail here if the node isn't alive anymore? - if (NodeShared) { - NodeShared->updateAccessor(ArgIndex, Acc); - } - } - std::memcpy(MValueStorage.data(), Acc, - sizeof(sycl::detail::AccessorBaseHost)); - } + void updateAccessor(const sycl::detail::AccessorBaseHost *Acc); + + /// Static helper function for updating command-group value arguments. + /// @param CG The command-group to update the argument information for. + /// @param ArgIndex The argument index to update. + /// @param NewValue Pointer to the new value. + /// @param Size Size of the data pointer to by NewValue + static void updateCGArgValue(std::shared_ptr CG, + int ArgIndex, const void *NewValue, size_t Size); + + /// Static helper function for updating command-group accessor arguments. + /// @param CG The command-group to update the argument information for. + /// @param ArgIndex The argument index to update. + /// @param Acc The new accessor value + static void updateCGAccessor(std::shared_ptr CG, + int ArgIndex, + const sycl::detail::AccessorBaseHost *Acc); // Weak ptrs to node_impls which will be updated std::vector, int>> MNodes; + // Dynamic command-groups which will be updated + std::vector MDynCGs; std::shared_ptr MGraph; std::vector MValueStorage; }; +class dynamic_command_group_impl + : public std::enable_shared_from_this { +public: + dynamic_command_group_impl( + const command_graph &Graph); + + /// Returns the index of the active command-group + size_t getActiveIndex() const { return MActiveCGF; } + + /// Returns the number of CGs in the dynamic command-group. + size_t getNumCGs() const { return MKernels.size(); } + + /// Set the index of the active command-group. + /// @param Index The new index. + void setActiveIndex(size_t Index); + + /// Instantiates a command-group object for each CGF in the list. + /// @param CGFList List of CGFs to finalize with a handler into CG objects. + void + finalizeCGFList(const std::vector> &CGFList); + + /// Retrieve CG at the currently active index + /// @param Shared pointer to the active CG object. + std::shared_ptr getActiveKernel() const { + return MKernels[MActiveCGF]; + } + + /// Graph this dynamic command-group is associated with. + std::shared_ptr MGraph; + + /// Index of active command-group + std::atomic MActiveCGF; + + /// List of kernel command-groups for dynamic command-group nodes + std::vector> MKernels; + + /// List of nodes using this dynamic command-group. + std::vector> MNodes; +}; } // namespace detail } // namespace experimental } // namespace oneapi diff --git a/sycl/source/detail/handler_impl.hpp b/sycl/source/detail/handler_impl.hpp index 2c2a4963bed98..e452eca0c8a6d 100644 --- a/sycl/source/detail/handler_impl.hpp +++ b/sycl/source/detail/handler_impl.hpp @@ -152,10 +152,6 @@ class handler_impl { ext::oneapi::experimental::detail::dynamic_parameter_impl *, int>> MDynamicParameters; - // Track whether an NDRange was used when submitting a kernel (as opposed to a - // range), needed for graph update - bool MNDRangeUsed = false; - /// The storage for the arguments passed. /// We need to store a copy of values that are passed explicitly through /// set_arg, require and so on, because we need them to be alive after diff --git a/sycl/source/detail/jit_compiler.cpp b/sycl/source/detail/jit_compiler.cpp index c0c22954822b7..19f1915943f05 100644 --- a/sycl/source/detail/jit_compiler.cpp +++ b/sycl/source/detail/jit_compiler.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -30,7 +31,12 @@ static inline void printPerformanceWarning(const std::string &Message) { jit_compiler::jit_compiler() { auto checkJITLibrary = [this]() -> bool { +#ifdef _WIN32 + static const std::string dir = sycl::detail::OSUtil::getCurrentDSODir(); + static const std::string JITLibraryName = dir + "\\" + "sycl-jit.dll"; +#else static const std::string JITLibraryName = "libsycl-jit.so"; +#endif void *LibraryPtr = sycl::detail::ur::loadOsLibrary(JITLibraryName); if (LibraryPtr == nullptr) { @@ -625,6 +631,7 @@ ur_kernel_handle_t jit_compiler::materializeSpecConstants( QueueImplPtr Queue, const RTDeviceBinaryImage *BinImage, const std::string &KernelName, const std::vector &SpecConstBlob) { +#ifndef _WIN32 if (!BinImage) { throw sycl::exception(sycl::make_error_code(sycl::errc::invalid), "No suitable IR available for materializing"); @@ -716,6 +723,13 @@ ur_kernel_handle_t jit_compiler::materializeSpecConstants( } return NewKernel; +#else // _WIN32 + (void)Queue; + (void)BinImage; + (void)KernelName; + (void)SpecConstBlob; + return nullptr; +#endif // _WIN32 } std::unique_ptr diff --git a/sycl/source/detail/os_util.cpp b/sycl/source/detail/os_util.cpp index 860ad71f9f7ea..c8ee3b8f33c11 100644 --- a/sycl/source/detail/os_util.cpp +++ b/sycl/source/detail/os_util.cpp @@ -148,8 +148,8 @@ std::string OSUtil::getDirName(const char *Path) { #elif defined(__SYCL_RT_OS_WINDOWS) /// Returns an absolute path where the object was found. -// ur_win_proxy_loader.dll uses this same logic. If it is changed -// significantly, it might be wise to change it there too. +// ur_win_proxy_loader.dll and sycl-jit.dll use this same logic. If it is +// changed significantly, it might be wise to change it there too. std::string OSUtil::getCurrentDSODir() { char Path[MAX_PATH]; Path[0] = '\0'; diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 78ee43a320989..091504a983ff3 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2527,23 +2527,16 @@ static ur_result_t SetKernelParamsAndLaunch( return Error; } -ur_result_t enqueueImpCommandBufferKernel( - context Ctx, DeviceImplPtr DeviceImpl, - ur_exp_command_buffer_handle_t CommandBuffer, - const CGExecKernel &CommandGroup, - std::vector &SyncPoints, - ur_exp_command_buffer_sync_point_t *OutSyncPoint, - ur_exp_command_buffer_command_handle_t *OutCommand, - const std::function &getMemAllocationFunc) { - auto ContextImpl = sycl::detail::getSyclObjImpl(Ctx); - const sycl::detail::AdapterPtr &Adapter = ContextImpl->getAdapter(); +namespace { +std::tuple, + const KernelArgMask *> +getCGKernelInfo(const CGExecKernel &CommandGroup, ContextImplPtr ContextImpl, + DeviceImplPtr DeviceImpl, + std::vector &UrKernelsToRelease, + std::vector &UrProgramsToRelease) { + ur_kernel_handle_t UrKernel = nullptr; - ur_program_handle_t UrProgram = nullptr; - std::shared_ptr SyclKernelImpl = nullptr; std::shared_ptr DeviceImageImpl = nullptr; - - auto Kernel = CommandGroup.MSyclKernel; - auto KernelBundleImplPtr = CommandGroup.MKernelBundle; const KernelArgMask *EliminatedArgMask = nullptr; // Use kernel_bundle if available unless it is interop. @@ -2551,27 +2544,74 @@ ur_result_t enqueueImpCommandBufferKernel( // in interop kernel bundles (if any) do not have kernel_id // and can therefore not be looked up, but since they are self-contained // they can simply be launched directly. - if (KernelBundleImplPtr && !KernelBundleImplPtr->isInterop()) { + if (auto KernelBundleImplPtr = CommandGroup.MKernelBundle; + KernelBundleImplPtr && !KernelBundleImplPtr->isInterop()) { auto KernelName = CommandGroup.MKernelName; kernel_id KernelID = detail::ProgramManager::getInstance().getSYCLKernelID(KernelName); + kernel SyclKernel = KernelBundleImplPtr->get_kernel(KernelID, KernelBundleImplPtr); - SyclKernelImpl = detail::getSyclObjImpl(SyclKernel); + + auto SyclKernelImpl = detail::getSyclObjImpl(SyclKernel); UrKernel = SyclKernelImpl->getHandleRef(); DeviceImageImpl = SyclKernelImpl->getDeviceImage(); - UrProgram = DeviceImageImpl->get_ur_program_ref(); EliminatedArgMask = SyclKernelImpl->getKernelArgMask(); - } else if (Kernel != nullptr) { + } else if (auto Kernel = CommandGroup.MSyclKernel; Kernel != nullptr) { UrKernel = Kernel->getHandleRef(); - UrProgram = Kernel->getProgramRef(); EliminatedArgMask = Kernel->getKernelArgMask(); } else { + ur_program_handle_t UrProgram = nullptr; std::tie(UrKernel, std::ignore, EliminatedArgMask, UrProgram) = sycl::detail::ProgramManager::getInstance().getOrCreateKernel( ContextImpl, DeviceImpl, CommandGroup.MKernelName); + UrKernelsToRelease.push_back(UrKernel); + UrProgramsToRelease.push_back(UrProgram); + } + return std::make_tuple(UrKernel, DeviceImageImpl, EliminatedArgMask); +} +} // anonymous namespace + +ur_result_t enqueueImpCommandBufferKernel( + context Ctx, DeviceImplPtr DeviceImpl, + ur_exp_command_buffer_handle_t CommandBuffer, + const CGExecKernel &CommandGroup, + std::vector &SyncPoints, + ur_exp_command_buffer_sync_point_t *OutSyncPoint, + ur_exp_command_buffer_command_handle_t *OutCommand, + const std::function &getMemAllocationFunc) { + // List of ur objects to be released after UR call. We don't do anything + // with the ur_program_handle_t objects, but need to update their reference + // count. + std::vector UrKernelsToRelease; + std::vector UrProgramsToRelease; + + ur_kernel_handle_t UrKernel = nullptr; + std::shared_ptr DeviceImageImpl = nullptr; + const KernelArgMask *EliminatedArgMask = nullptr; + + auto ContextImpl = sycl::detail::getSyclObjImpl(Ctx); + std::tie(UrKernel, DeviceImageImpl, EliminatedArgMask) = + getCGKernelInfo(CommandGroup, ContextImpl, DeviceImpl, UrKernelsToRelease, + UrProgramsToRelease); + + // Build up the list of UR kernel handles that the UR command could be + // updated to use. + std::vector AltUrKernels; + const std::vector> + &AlternativeKernels = CommandGroup.MAlternativeKernels; + for (const auto &AltCGKernelWP : AlternativeKernels) { + auto AltCGKernel = AltCGKernelWP.lock(); + assert(AltCGKernel != nullptr); + + ur_kernel_handle_t AltUrKernel = nullptr; + std::tie(AltUrKernel, std::ignore, std::ignore) = + getCGKernelInfo(*AltCGKernel.get(), ContextImpl, DeviceImpl, + UrKernelsToRelease, UrProgramsToRelease); + AltUrKernels.push_back(AltUrKernel); } + const sycl::detail::AdapterPtr &Adapter = ContextImpl->getAdapter(); auto SetFunc = [&Adapter, &UrKernel, &DeviceImageImpl, &Ctx, &getMemAllocationFunc](sycl::detail::ArgDesc &Arg, size_t NextTrueIndex) { @@ -2622,14 +2662,17 @@ ur_result_t enqueueImpCommandBufferKernel( ur_result_t Res = Adapter->call_nocheck( CommandBuffer, UrKernel, NDRDesc.Dims, &NDRDesc.GlobalOffset[0], - &NDRDesc.GlobalSize[0], LocalSize, 0, nullptr, SyncPoints.size(), - SyncPoints.size() ? SyncPoints.data() : nullptr, 0, nullptr, - OutSyncPoint, nullptr, + &NDRDesc.GlobalSize[0], LocalSize, AltUrKernels.size(), + AltUrKernels.size() ? AltUrKernels.data() : nullptr, + SyncPoints.size(), SyncPoints.size() ? SyncPoints.data() : nullptr, 0, + nullptr, OutSyncPoint, nullptr, CommandBufferDesc.isUpdatable ? OutCommand : nullptr); - if (!SyclKernelImpl && !Kernel) { - Adapter->call(UrKernel); - Adapter->call(UrProgram); + for (auto &Kernel : UrKernelsToRelease) { + Adapter->call(Kernel); + } + for (auto &Program : UrProgramsToRelease) { + Adapter->call(Program); } if (Res != UR_RESULT_SUCCESS) { diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index ac1d8ca44c5dc..efbbb52acab73 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -583,16 +583,16 @@ ur_kernel_handle_t Scheduler::completeSpecConstMaterialization( [[maybe_unused]] const RTDeviceBinaryImage *BinImage, [[maybe_unused]] const std::string &KernelName, [[maybe_unused]] std::vector &SpecConstBlob) { -#if SYCL_EXT_JIT_ENABLE +#if SYCL_EXT_JIT_ENABLE && !_WIN32 return detail::jit_compiler::get_instance().materializeSpecConstants( Queue, BinImage, KernelName, SpecConstBlob); -#else // SYCL_EXT_JIT_ENABLE +#else // SYCL_EXT_JIT_ENABLE && !_WIN32 if (detail::SYCLConfig::get() > 0) { std::cerr << "WARNING: Materialization of spec constants not supported by " "this build\n"; } return nullptr; -#endif // SYCL_EXT_JIT_ENABLE +#endif // SYCL_EXT_JIT_ENABLE && !_WIN32 } EventImplPtr Scheduler::addCommandGraphUpdate( diff --git a/sycl/source/detail/windows_os_utils.hpp b/sycl/source/detail/windows_os_utils.hpp index 690fbba46371c..f9141f2a4c5e1 100644 --- a/sycl/source/detail/windows_os_utils.hpp +++ b/sycl/source/detail/windows_os_utils.hpp @@ -10,6 +10,8 @@ #include +// ur_proxy_loader.dll and sycl-jit.dll use this same logic. If it changed +// significantly, then it'd be wise to update those versions as well. using OSModuleHandle = intptr_t; constexpr OSModuleHandle ExeModuleHandle = -1; inline OSModuleHandle getOSModuleHandle(const void *VirtAddr) { diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 61ea7ee7be0a0..a7ac73f9e4c34 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -555,11 +555,12 @@ event handler::finalize() { // Find the last node added to the graph from this queue, so our new // node can set it as a predecessor. auto DependentNode = GraphImpl->getLastInorderNode(MQueue); - - NodeImpl = DependentNode - ? GraphImpl->add(NodeType, std::move(CommandGroup), - {DependentNode}) - : GraphImpl->add(NodeType, std::move(CommandGroup)); + std::vector> + Deps; + if (DependentNode) { + Deps.push_back(DependentNode); + } + NodeImpl = GraphImpl->add(NodeType, std::move(CommandGroup), Deps); // If we are recording an in-order queue remember the new node, so it // can be used as a dependency for any more nodes recorded from this @@ -567,12 +568,13 @@ event handler::finalize() { GraphImpl->setLastInorderNode(MQueue, NodeImpl); } else { auto LastBarrierRecordedFromQueue = GraphImpl->getBarrierDep(MQueue); + std::vector> + Deps; + if (LastBarrierRecordedFromQueue) { - NodeImpl = GraphImpl->add(NodeType, std::move(CommandGroup), - {LastBarrierRecordedFromQueue}); - } else { - NodeImpl = GraphImpl->add(NodeType, std::move(CommandGroup)); + Deps.push_back(LastBarrierRecordedFromQueue); } + NodeImpl = GraphImpl->add(NodeType, std::move(CommandGroup), Deps); if (NodeImpl->MCGType == sycl::detail::CGType::Barrier) { GraphImpl->setBarrierDep(MQueue, NodeImpl); @@ -582,8 +584,6 @@ event handler::finalize() { // Associate an event with this new node and return the event. GraphImpl->addEventForNode(EventImpl, NodeImpl); - NodeImpl->MNDRangeUsed = impl->MNDRangeUsed; - return detail::createSyclObjFromImpl(EventImpl); } @@ -2008,7 +2008,9 @@ std::tuple, bool> handler::getMaxWorkGroups_v2() { return {std::array{0, 0, 0}, false}; } -void handler::setNDRangeUsed(bool Value) { impl->MNDRangeUsed = Value; } +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES +void handler::setNDRangeUsed(bool Value) { (void)Value; } +#endif void handler::registerDynamicParameter( ext::oneapi::experimental::detail::dynamic_parameter_base &DynamicParamBase, diff --git a/sycl/test-e2e/Compression/compression_separate_compile.cpp b/sycl/test-e2e/Compression/compression_separate_compile.cpp index 9e47bbebdc875..dab17e3506b4e 100644 --- a/sycl/test-e2e/Compression/compression_separate_compile.cpp +++ b/sycl/test-e2e/Compression/compression_separate_compile.cpp @@ -10,6 +10,13 @@ ////////////////////// Link device images // RUN: %clangxx --offload-compress -fsycl -fsycl-link -fsycl-targets=spir64_x86_64 -fPIC %t_kernel1_aot.o %t_kernel2_aot.o -o %t_compressed_image.o -v +// Make sure the clang-offload-wrapper is called with the --offload-compress +// option. +// RUN: %clangxx --offload-compress -fsycl -fsycl-link -fsycl-targets=spir64_x86_64 -fPIC %t_kernel1_aot.o %t_kernel2_aot.o -o %t_compressed_image.o -### &> %t_driver_opts.txt +// RUN: FileCheck -input-file=%t_driver_opts.txt %s --check-prefix=CHECK-DRIVER-OPTS + +// CHECK-DRIVER-OPTS: clang-offload-wrapper{{.*}} "-offload-compress" + ////////////////////// Compile the host program // RUN: %clangxx -fsycl -std=c++17 -Wno-attributes -Wno-deprecated-declarations -fPIC -c %s -o %t_main.o diff --git a/sycl/test-e2e/Compression/no_zstd_warning.cpp b/sycl/test-e2e/Compression/no_zstd_warning.cpp index 8a4460f9b8643..c87f2fe480096 100644 --- a/sycl/test-e2e/Compression/no_zstd_warning.cpp +++ b/sycl/test-e2e/Compression/no_zstd_warning.cpp @@ -1,4 +1,4 @@ // using --offload-compress without zstd should throw an error. // REQUIRES: !zstd -// RUN: not %{build} -O0 -g --offload-compress %S/Inputs/single_kernel.cpp -o %t_compress.out 2>&1 | FileCheck %s +// RUN: not %{build} %O0 -g --offload-compress %S/Inputs/single_kernel.cpp -o %t_compress.out 2>&1 | FileCheck %s // CHECK: '--offload-compress' option is specified but zstd is not available. The device image will not be compressed. diff --git a/sycl/test-e2e/Config/kernel_from_file.cpp b/sycl/test-e2e/Config/kernel_from_file.cpp index 8450d6eae2573..9cd3bdf1c12f3 100644 --- a/sycl/test-e2e/Config/kernel_from_file.cpp +++ b/sycl/test-e2e/Config/kernel_from_file.cpp @@ -7,11 +7,11 @@ // As we are doing a separate device compilation here, we need to explicitly // add the device lib instrumentation (itt_compiler_wrapper) // RUN: %clangxx -Wno-error=ignored-attributes -DSYCL_DISABLE_FALLBACK_ASSERT %cxx_std_optionc++17 -fsycl-device-only -fno-sycl-dead-args-optimization -Xclang -fsycl-int-header=%t.h %s -o %t.bc -Xclang -verify-ignore-unexpected=note,warning -Wno-sycl-strict -// >> ---- unbundle compiler wrapper and sanitizer device objects +// >> ---- unbundle compiler wrapper and asan device objects // RUN: clang-offload-bundler -type=o -targets=sycl-spir64-unknown-unknown -input=%sycl_static_libs_dir/libsycl-itt-compiler-wrappers%obj_ext -output=%t_compiler_wrappers.bc -unbundle -// RUN: %if linux %{ clang-offload-bundler -type=o -targets=sycl-spir64-unknown-unknown -input=%sycl_static_libs_dir/libsycl-sanitizer%obj_ext -output=%t_sanitizer.bc -unbundle %} +// RUN: %if linux %{ clang-offload-bundler -type=o -targets=sycl-spir64-unknown-unknown -input=%sycl_static_libs_dir/libsycl-asan%obj_ext -output=%t_asan.bc -unbundle %} // >> ---- link device code -// RUN: %if linux %{ llvm-link -o=%t_app.bc %t.bc %t_compiler_wrappers.bc %t_sanitizer.bc %} %else %{ llvm-link -o=%t_app.bc %t.bc %t_compiler_wrappers.bc %} +// RUN: %if linux %{ llvm-link -o=%t_app.bc %t.bc %t_compiler_wrappers.bc %t_asan.bc %} %else %{ llvm-link -o=%t_app.bc %t.bc %t_compiler_wrappers.bc %} // >> ---- translate to SPIR-V // RUN: llvm-spirv -o %t.spv %t_app.bc // RUN: %clangxx -Wno-error=ignored-attributes -DSYCL_DISABLE_FALLBACK_ASSERT %cxx_std_optionc++17 %include_option %t.h %s -o %t.out %sycl_options -fno-sycl-dead-args-optimization -Xclang -verify-ignore-unexpected=note,warning diff --git a/sycl/test-e2e/DeviceGlobal/device_global_copy.cpp b/sycl/test-e2e/DeviceGlobal/device_global_copy.cpp index 2d9ea347179ce..e46c6286e2e7a 100644 --- a/sycl/test-e2e/DeviceGlobal/device_global_copy.cpp +++ b/sycl/test-e2e/DeviceGlobal/device_global_copy.cpp @@ -1,4 +1,6 @@ -// RUN: %{build} -std=c++23 -o %t.out +// DEFINE: %{cpp23} = %if cl_options %{/std:c++23%} %else %{-std=c++23%} + +// RUN: %{build} %{cpp23} -o %t.out // RUN: %{run} %t.out // // UNSUPPORTED: opencl && gpu diff --git a/sycl/test-e2e/DeviceLib/exp/exp-std-complex-double-edge-cases.cpp b/sycl/test-e2e/DeviceLib/exp/exp-std-complex-double-edge-cases.cpp index 791bda2cf1e61..f539f67aacab6 100644 --- a/sycl/test-e2e/DeviceLib/exp/exp-std-complex-double-edge-cases.cpp +++ b/sycl/test-e2e/DeviceLib/exp/exp-std-complex-double-edge-cases.cpp @@ -3,6 +3,7 @@ // // REQUIRES: aspect-fp64 // UNSUPPORTED: hip || cuda +// UNSUPPORTED-INTENDED: This test is intended for backends with SPIR-V support. // // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/DeviceLib/exp/exp-std-complex-edge-cases.hpp b/sycl/test-e2e/DeviceLib/exp/exp-std-complex-edge-cases.hpp index 649c960750156..992cd6147b535 100644 --- a/sycl/test-e2e/DeviceLib/exp/exp-std-complex-edge-cases.hpp +++ b/sycl/test-e2e/DeviceLib/exp/exp-std-complex-edge-cases.hpp @@ -5,6 +5,7 @@ #include #include +#include bool check(bool cond, const std::string &cond_str, int line, unsigned testcase) { @@ -198,12 +199,18 @@ template bool test() { sycl::buffer> data(testcases, sycl::range{N}); sycl::buffer> results(sycl::range{N}); + sycl::buffer> exp_conj(sycl::range{N}); + sycl::buffer> conj_exp(sycl::range{N}); q.submit([&](sycl::handler &cgh) { sycl::accessor acc_data(data, cgh, sycl::read_only); - sycl::accessor acc(results, cgh, sycl::write_only); + sycl::accessor acc_results(results, cgh, sycl::write_only); + sycl::accessor acc_exp_conj(exp_conj, cgh, sycl::write_only); + sycl::accessor acc_conj_exp(conj_exp, cgh, sycl::write_only); cgh.parallel_for(sycl::range{N}, [=](sycl::item<1> it) { - acc[it] = std::exp(acc_data[it]); + acc_results[it] = std::exp(acc_data[it]); + acc_exp_conj[it] = std::exp(std::conj(acc_data[it])); + acc_conj_exp[it] = std::conj(std::exp(acc_data[it])); }); }).wait_and_throw(); @@ -218,9 +225,18 @@ template bool test() { // Based on https://en.cppreference.com/w/cpp/numeric/complex/exp // z below refers to the argument passed to std::exp(complex) - sycl::host_accessor acc(results); + sycl::host_accessor acc_results(results); + sycl::host_accessor acc_exp_conj(exp_conj); + sycl::host_accessor acc_conj_exp(conj_exp); for (unsigned i = 0; i < N; ++i) { - std::complex r = acc[i]; + // std::exp(std::conj(z)) == std::conj(std::exp(z)) + // NAN is not equal to NAN in floating-point arithmetic, therefore compare + // only results without NAN + if (!std::isnan(acc_exp_conj[i].real()) && + !std::isnan(acc_exp_conj[i].imag())) + CHECK(acc_exp_conj[i] == acc_conj_exp[i], passed, i); + + std::complex r = acc_results[i]; // If z is (+/-0, +0), the result is (1, +0) if (testcases[i].real() == 0 && testcases[i].imag() == 0 && !std::signbit(testcases[i].imag())) { @@ -246,6 +262,33 @@ template bool test() { CHECK(r.imag() == 0, passed, i); CHECK(std::signbit(testcases[i].imag()) == std::signbit(r.imag()), passed, i); + // If z is (-inf, y) (for any finite y), the result is +0cis(y) where + // cis(y) is cos(y) + isin(y) + } else if (std::isinf(testcases[i].real()) && + std::signbit(testcases[i].real()) && + std::isfinite(testcases[i].imag())) { + CHECK(r.real() == 0, passed, i) + CHECK(std::signbit(r.real()) == + std::signbit(std::cos(testcases[i].imag())), + passed, i) + CHECK(r.imag() == 0, passed, i) + CHECK(std::signbit(r.imag()) == + std::signbit(std::sin(testcases[i].imag())), + passed, i) + // If z is (+inf, y) (for any finite nonzero y), the result is +∞cis(y) + // where cis(y) is cos(y) + isin(y) + } else if (std::isinf(testcases[i].real()) && + !std::signbit(testcases[i].real()) && + std::isfinite(testcases[i].imag()) && + testcases[i].imag() != 0) { + CHECK(std::isinf(r.real()), passed, i) + CHECK(std::signbit(r.real()) == + std::signbit(std::cos(testcases[i].imag())), + passed, i) + CHECK(std::isinf(r.imag()), passed, i) + CHECK(std::signbit(r.imag()) == + std::signbit(std::sin(testcases[i].imag())), + passed, i) // If z is (-inf, +inf), the result is (+/-0, +/-0) (signs are // unspecified) } else if (std::isinf(testcases[i].real()) && testcases[i].real() < 0 && @@ -290,6 +333,13 @@ template bool test() { } else if (std::isfinite(testcases[i].imag()) && std::abs(testcases[i].imag()) <= 1) { CHECK(!std::signbit(r.real()), passed, i); +#ifdef _WIN32 + // This check fails on win, temporary skipping: + // CMPLRLLVM-61834 + // TODO: Delete this macro block when fixed + if (std::is_same_v) + continue; +#endif CHECK(std::signbit(r.imag()) == std::signbit(testcases[i].imag()), passed, i); // Those tests were taken from oneDPL, not sure what is the corner case diff --git a/sycl/test-e2e/DeviceLib/exp/exp-std-complex-float-edge-cases.cpp b/sycl/test-e2e/DeviceLib/exp/exp-std-complex-float-edge-cases.cpp new file mode 100644 index 0000000000000..e1c956a45e382 --- /dev/null +++ b/sycl/test-e2e/DeviceLib/exp/exp-std-complex-float-edge-cases.cpp @@ -0,0 +1,12 @@ +// This test checks edge cases handling for std::exp(std::complex) used +// in SYCL kernels. +// +// UNSUPPORTED: hip || cuda +// UNSUPPORTED-INTENDED: This test is intended for backends with SPIR-V support. +// +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include "exp-std-complex-edge-cases.hpp" + +int main() { return test(); } diff --git a/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions.cpp b/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions.cpp index ef3b440790c6b..6c6fe20337dab 100644 --- a/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions.cpp @@ -43,7 +43,7 @@ int main() { auto GraphExec = Graph.finalize(); - InOrderQueue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }); + exp_ext::execute_graph(InOrderQueue, GraphExec); InOrderQueue.wait_and_throw(); free(PtrA, InOrderQueue); diff --git a/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions_submit.cpp b/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions_submit.cpp index 8eccce2ea8ef9..623d6fc817879 100644 --- a/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions_submit.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions_submit.cpp @@ -60,7 +60,7 @@ int main() { auto GraphExec = Graph.finalize(); - Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }); + exp_ext::execute_graph(Queue, GraphExec); Queue.wait_and_throw(); } diff --git a/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions_submit_with_event.cpp b/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions_submit_with_event.cpp index f66731d745bd2..4b8294be7e989 100644 --- a/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions_submit_with_event.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions_submit_with_event.cpp @@ -52,8 +52,9 @@ int main() { auto GraphExec = Graph.finalize(); - Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }); - Queue.wait_and_throw(); + exp_ext::submit_with_event(Queue, [&](handler &CGH) { + exp_ext::execute_graph(CGH, GraphExec); + }).wait(); free(PtrA, Queue); free(PtrB, Queue); diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_accessor.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_accessor.cpp new file mode 100644 index 0000000000000..a5e5a1ea78b87 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_accessor.cpp @@ -0,0 +1,61 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// XFAIL: level_zero +// XFAIL-TRACKER: OFNAAO-307 + +// Tests using dynamic command-group objects with buffer accessors + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + buffer Buf{sycl::range<1>(Size)}; + Buf.set_write_back(false); + auto Acc = Buf.get_access(); + + exp_ext::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {exp_ext::property::graph::assume_buffer_outlives_graph{}}}; + + int PatternA = 42; + auto CGFA = [&](handler &CGH) { + CGH.require(Acc); + CGH.parallel_for(Size, + [=](item<1> Item) { Acc[Item.get_id()] = PatternA; }); + }; + + int PatternB = 0xA; + auto CGFB = [&](handler &CGH) { + CGH.require(Acc); + CGH.parallel_for(Size, + [=](item<1> Item) { Acc[Item.get_id()] = PatternB; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + std::vector HostData(Size, 0); + Queue.copy(Acc, HostData.data()).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == PatternA); + } + + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(Acc, HostData.data()).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == PatternB); + } + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_deps.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_deps.cpp new file mode 100644 index 0000000000000..7b477edacff98 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_deps.cpp @@ -0,0 +1,79 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// XFAIL: level_zero +// XFAIL-TRACKER: OFNAAO-307 + +// Tests adding a dynamic command-group node to a graph using buffer +// accessors for the node edges. + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + int *Ptr = (int *)sycl::malloc_device(Size, Queue); + buffer Buf{sycl::range<1>(Size)}; + Buf.set_write_back(false); + + exp_ext::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {exp_ext::property::graph::assume_buffer_outlives_graph{}}}; + + auto RootNode = Graph.add([&](handler &CGH) { + auto Acc = Buf.get_access(CGH); + CGH.parallel_for(Size, [=](item<1> Item) { Acc[Item.get_id()] = 1; }); + }); + + int PatternA = 42; + auto CGFA = [&](handler &CGH) { + auto Acc = Buf.get_access(CGH); + CGH.parallel_for(Size, + [=](item<1> Item) { Acc[Item.get_id()] += PatternA; }); + }; + + int PatternB = 0xA; + auto CGFB = [&](handler &CGH) { + auto Acc = Buf.get_access(CGH); + CGH.parallel_for(Size, + [=](item<1> Item) { Acc[Item.get_id()] += PatternB; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + + auto LeafNode = Graph.add([&](handler &CGH) { + auto Acc = Buf.get_access(CGH); + CGH.parallel_for( + Size, [=](item<1> Item) { Ptr[Item.get_id()] = Acc[Item.get_id()]; }); + }); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + + std::vector HostData(Size, 0); + Queue.copy(Ptr, HostData.data(), Size).wait(); + + int Ref = PatternA + 1; + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == Ref); + } + + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); + Ref = PatternB + 1; + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == Ref); + } + + sycl::free(Ptr, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_deps2.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_deps2.cpp new file mode 100644 index 0000000000000..a420d7deb58de --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_deps2.cpp @@ -0,0 +1,88 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// XFAIL: level_zero +// XFAIL-TRACKER: OFNAAO-307 + +// Tests adding a dynamic command-group node to a graph using buffer +// accessors for the node edges, but where different command-groups +// use different buffers that create identical edges. + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + int *Ptr = (int *)sycl::malloc_device(Size, Queue); + buffer BufA{sycl::range<1>(Size)}; + buffer BufB{sycl::range<1>(Size)}; + BufA.set_write_back(false); + BufB.set_write_back(false); + + exp_ext::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {exp_ext::property::graph::assume_buffer_outlives_graph{}}}; + + int InitA = 4; + int InitB = -4; + auto RootNode = Graph.add([&](handler &CGH) { + auto AccA = BufA.get_access(CGH); + auto AccB = BufB.get_access(CGH); + CGH.parallel_for(Size, [=](item<1> Item) { + AccA[Item.get_id()] = InitA; + AccB[Item.get_id()] = InitB; + }); + }); + + int PatternA = 42; + auto CGFA = [&](handler &CGH) { + auto AccA = BufA.get_access(CGH); + CGH.parallel_for(Size, + [=](item<1> Item) { AccA[Item.get_id()] += PatternA; }); + }; + + int PatternB = 0xA; + auto CGFB = [&](handler &CGH) { + auto AccB = BufB.get_access(CGH); + CGH.parallel_for(Size, + [=](item<1> Item) { AccB[Item.get_id()] += PatternB; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + + auto LeafNode = Graph.add([&](handler &CGH) { + auto AccA = BufA.get_access(CGH); + auto AccB = BufB.get_access(CGH); + CGH.parallel_for(Size, [=](item<1> Item) { + Ptr[Item.get_id()] = AccA[Item.get_id()] + AccB[Item.get_id()]; + }); + }); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + + std::vector HostData(Size, 0); + Queue.copy(Ptr, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == (InitA + InitB + PatternA)); + } + + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); + int Ref = InitA + InitB + PatternB; + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == Ref); + } + + sycl::free(Ptr, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_spv.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_spv.cpp new file mode 100644 index 0000000000000..0eaa714463670 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_accessor_spv.cpp @@ -0,0 +1,81 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out %S/../Inputs/Kernels/update_with_indices_accessor.spv +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out %S/../Inputs/Kernels/update_with_indices_accessor.spv 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out %S/../Inputs/Kernels/update_with_indices_accessor.spv 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// REQUIRES: level_zero +// XFAIL: level_zero +// XFAIL-TRACKER: OFNAAO-307 + +// Tests updating an accessor argument to a graph node created from SPIR-V +// using dynamic command-groups. + +#include "../graph_common.hpp" + +int main(int, char **argv) { + queue Queue{}; + sycl::kernel_bundle KernelBundle = loadKernelsFromFile(Queue, argv[1]); + const auto getKernel = + [](sycl::kernel_bundle &bundle, + const std::string &name) { + return bundle.ext_oneapi_get_kernel(name); + }; + + kernel kernel = getKernel( + KernelBundle, "_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_EUlvE_"); + + exp_ext::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {exp_ext::property::graph::assume_buffer_outlives_graph{}}}; + + buffer BufA{sycl::range<1>(Size)}; + buffer BufB{sycl::range<1>(Size)}; + BufA.set_write_back(false); + BufB.set_write_back(false); + + auto AccA = BufA.get_access(); + auto AccB = BufB.get_access(); + + auto CGFA = [&](handler &CGH) { + CGH.require(AccA); + CGH.set_arg(0, AccA); + CGH.single_task(kernel); + }; + + auto CGFB = [&](handler &CGH) { + CGH.require(AccB); + CGH.set_arg(0, AccB); + CGH.single_task(kernel); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Queue, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + + std::vector HostDataA(Size, 0); + std::vector HostDataB(Size, 0); + Queue.copy(BufA.get_access(), HostDataA.data()).wait(); + Queue.copy(BufB.get_access(), HostDataB.data()).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostDataA[i] == i); + assert(HostDataB[i] == 0); + } + + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(BufA.get_access(), HostDataA.data()).wait(); + Queue.copy(BufB.get_access(), HostDataB.data()).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostDataA[i] == i); + assert(HostDataB[i] == i); + } + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_different_arg_nums.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_different_arg_nums.cpp new file mode 100644 index 0000000000000..795dc074d882e --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_different_arg_nums.cpp @@ -0,0 +1,148 @@ +// RUN: %{build} -o %t.out +// RUN: env SYCL_UR_TRACE=2 %{run} %t.out | FileCheck %s +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// XFAIL: level_zero +// XFAIL-TRACKER: OFNAAO-307 + +// Tests updating a dynamic command-group with command-groups containing a +// different number of arguments. + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + int *Ptr = malloc_device(Size, Queue); + std::vector HostData(Size); + + // 3 kernel arguments: Ptr, PatternA, PatternB + int PatternA = 42; + int PatternB = 0xA; + auto CGFA = [&](handler &CGH) { + CGH.parallel_for( + Size, [=](item<1> Item) { Ptr[Item.get_id()] = PatternA + PatternB; }); + }; + + // 2 kernel arguments: Ptr, MyPatternStruct + struct PatternStruct { + int PatternA; + int PatternB; + }; + PatternStruct MyPatternStruct{PatternA + 1, PatternB + 1}; + auto CGFB = [&](handler &CGH) { + CGH.parallel_for(Size, [=](item<1> Item) { + Ptr[Item.get_id()] = MyPatternStruct.PatternA + MyPatternStruct.PatternB; + }); + }; + + // 1 kernel argument: Ptr + auto CGFC = [&](handler &CGH) { + CGH.parallel_for(Size, + [=](item<1> Item) { Ptr[Item.get_id()] = 42 - 0xA; }); + }; + + // 4 kernel argument: Ptr + int PatternC = -12; + auto CGFD = [&](handler &CGH) { + CGH.parallel_for(Size, [=](item<1> Item) { + Ptr[Item.get_id()] = PatternA + PatternB + PatternC; + }); + }; + + // CHECK: <--- urKernelSetArgPointer( + // CHECK-SAME: .hKernel = [[KERNEL_HANDLE1:[0-9a-fA-Fx]+]] + // CHECL-SAME: .argIndex = 0 + + // CHECK: <--- urKernelSetArgValue + // CHECK-SAME: .hKernel = [[KERNEL_HANDLE1]] + // CHECK-SAME: .argIndex = 1 + + // CHECK: <--- urKernelSetArgValue + // CHECK-SAME: .hKernel = [[KERNEL_HANDLE1]] + // CHECK-SAME: .argIndex = 2 + + // CHECK: <--- urCommandBufferAppendKernelLaunchExp + // CHECK-SAME: .hKernel = [[KERNEL_HANDLE1]] + // CHECK-SAME: .numKernelAlternatives = 3 + // CHECK-SAME: .phKernelAlternatives = {[[KERNEL_HANDLE2:[0-9a-fA-Fx]+]], [[KERNEL_HANDLE3:[0-9a-fA-Fx]+]], [[KERNEL_HANDLE4:[0-9a-fA-Fx]+]]} + auto DynamicCG = + exp_ext::dynamic_command_group(Graph, {CGFA, CGFB, CGFC, CGFD}); + auto DynamicCGNode = Graph.add(DynamicCG); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + // Verify CGFA works with 3 arguments + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); + int Ref = PatternA + PatternB; + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == Ref); + } + + // Verify CGFB works with 2 arguments + // CHECK: <--- urCommandBufferUpdateKernelLaunchExp + // CHECK-SAME: .hNewKernel = [[KERNEL_HANDLE2]] + // CHECK-SAME: .numNewMemObjArgs = 0 + // CHECK-SAME: .numNewPointerArgs = 1 + // CHECK-SAME: .numNewValueArgs = 1 + // CHECK-SAME: .stype = UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC + // CHECK-SAME: .argIndex = 0 + // CHECK-SAME: .stype = UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC + // CHECK-SAME: .argIndex = 1 + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); + Ref = (PatternA + 1) + (PatternB + 1); + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == Ref); + } + + // Verify CGFC works with 1 argument + // CHECK: <--- urCommandBufferUpdateKernelLaunchExp + // CHECK-SAME: .hNewKernel = [[KERNEL_HANDLE3]] + // CHECK-SAME: .numNewMemObjArgs = 0 + // CHECK-SAME: .numNewPointerArgs = 1 + // CHECK-SAME: .numNewValueArgs = 0 + // CHECK-SAME: .stype = UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC + // CHECK-SAME: .argIndex = 0 + DynamicCG.set_active_cgf(2); + ExecGraph.update(DynamicCGNode); + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); + Ref = PatternA - PatternB; + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == Ref); + } + + // Verify CGFD works with 4 arguments + // CHECK: <--- urCommandBufferUpdateKernelLaunchExp + // CHECK-SAME: .hNewKernel = [[KERNEL_HANDLE4]] + // CHECK-SAME: .numNewMemObjArgs = 0 + // CHECK-SAME: .numNewPointerArgs = 1 + // CHECK-SAME: .numNewValueArgs = 3 + // CHECK-SAME: .stype = UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC + // CHECK-SAME: .argIndex = 0 + // CHECK-SAME: .stype = UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC + // CHECK-SAME: .argIndex = 1 + // CHECK-SAME: .stype = UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC + // CHECK-SAME: .argIndex = 2 + // CHECK-SAME: .stype = UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC + // CHECK-SAME: .argIndex = 3 + DynamicCG.set_active_cgf(3); + ExecGraph.update(DynamicCGNode); + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); + Ref = PatternA + PatternB + PatternC; + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == Ref); + } + + sycl::free(Ptr, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_event_deps.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_event_deps.cpp new file mode 100644 index 0000000000000..0964f6e0c354e --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_event_deps.cpp @@ -0,0 +1,72 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// XFAIL: level_zero +// XFAIL-TRACKER: OFNAAO-307 + +// Tests adding a dynamic command-group node to a graph using graph limited +// events for dependencies. + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + int *PtrA = malloc_device(Size, Queue); + int *PtrB = malloc_device(Size, Queue); + int *PtrC = malloc_device(Size, Queue); + std::vector HostData(Size); + + Graph.begin_recording(Queue); + int PatternA = 42; + auto EventA = Queue.fill(PtrA, PatternA, Size); + int PatternB = 0xA; + auto EventB = Queue.fill(PtrB, PatternB, Size); + Graph.end_recording(Queue); + + auto CGFA = [&](handler &CGH) { + CGH.depends_on({EventA, EventB}); + CGH.parallel_for(Size, [=](item<1> Item) { + auto I = Item.get_id(); + PtrC[I] = PtrA[I] * PtrB[I]; + }); + }; + + auto CGFB = [&](handler &CGH) { + CGH.depends_on({EventA, EventB}); + CGH.parallel_for(Size, [=](item<1> Item) { + auto I = Item.get_id(); + PtrC[I] = PtrA[I] + PtrB[I]; + }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(PtrC, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == PatternA * PatternB); + } + + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(PtrC, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == PatternA + PatternB); + } + + sycl::free(PtrA, Queue); + sycl::free(PtrB, Queue); + sycl::free(PtrC, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_ndrange.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_ndrange.cpp new file mode 100644 index 0000000000000..237e9173f253e --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_ndrange.cpp @@ -0,0 +1,72 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// XFAIL: level_zero +// XFAIL-TRACKER: OFNAAO-307 + +// Tests updating a dynamic command-group node where the dynamic command-groups +// have different ranges/nd-ranges + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + int *Ptr = malloc_device(Size, Queue); + std::vector HostData(Size); + + auto RootNode = + Graph.add([&](handler &cgh) { cgh.memset(Ptr, 0, Size * sizeof(int)); }); + + int PatternA = 42; + size_t ItemsA = Size / 2; + sycl::range<1> RangeA{ItemsA}; + auto CGFA = [&](handler &CGH) { + CGH.parallel_for(RangeA, + [=](item<1> Item) { Ptr[Item.get_id()] = PatternA; }); + }; + + int PatternB = 0xA; + size_t ItemsB = Size / 4; + sycl::nd_range<1> RangeB{sycl::range{ItemsB}, sycl::range{16}}; + auto CGFB = [&](handler &CGH) { + CGH.parallel_for( + RangeB, [=](nd_item<1> Item) { Ptr[Item.get_global_id()] = PatternB; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB}); + auto DynamicCGNode = + Graph.add(DynamicCG, exp_ext::property::node::depends_on(RootNode)); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + if (i < ItemsA) { + assert(HostData[i] == PatternA); + } else { + assert(HostData[i] == 0); + } + } + + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + if (i < ItemsB) { + assert(HostData[i] == PatternB); + } else { + assert(HostData[i] == 0); + } + } + + sycl::free(Ptr, Queue); + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_ndrange_3D.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_ndrange_3D.cpp new file mode 100644 index 0000000000000..261ac6ecf5c3b --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_ndrange_3D.cpp @@ -0,0 +1,80 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// XFAIL: level_zero +// XFAIL-TRACKER: OFNAAO-307 + +// Tests updating a dynamic command-group node where the dynamic command-groups +// have different range/nd-range dimensions + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + const size_t N = 64; + int *Ptr = malloc_device(N, Queue); + std::vector HostData(N); + + auto RootNode = + Graph.add([&](handler &cgh) { cgh.memset(Ptr, 0, N * sizeof(int)); }); + + int PatternA = 42; + sycl::range<1> RangeA{N}; + auto CGFA = [&](handler &CGH) { + CGH.parallel_for(RangeA, + [=](item<1> Item) { Ptr[Item.get_id()] = PatternA; }); + }; + + int PatternB = 0xA; + sycl::nd_range<3> RangeB{sycl::range{4, 4, 4}, sycl::range{2, 2, 2}}; + auto CGFB = [&](handler &CGH) { + CGH.parallel_for(RangeB, [=](nd_item<3> Item) { + Ptr[Item.get_global_linear_id()] = PatternB; + }); + }; + + int PatternC = 7; + sycl::range<2> RangeC{8, 8}; + auto CGFC = [&](handler &CGH) { + CGH.parallel_for( + RangeC, [=](item<2> Item) { Ptr[Item.get_linear_id()] = PatternC; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB, CGFC}); + auto DynamicCGNode = + Graph.add(DynamicCG, exp_ext::property::node::depends_on(RootNode)); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), N).wait(); + for (size_t i = 0; i < N; i++) { + assert(HostData[i] == PatternA); + } + + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), N).wait(); + for (size_t i = 0; i < N; i++) { + assert(HostData[i] == PatternB); + } + + DynamicCG.set_active_cgf(2); + ExecGraph.update(DynamicCGNode); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), N).wait(); + for (size_t i = 0; i < N; i++) { + assert(HostData[i] == PatternC); + } + + sycl::free(Ptr, Queue); + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_overwrite_range.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_overwrite_range.cpp new file mode 100644 index 0000000000000..04697077bec36 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_overwrite_range.cpp @@ -0,0 +1,57 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// Tests how the nd-range of a node is overwritten by the active command-group + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + std::vector HostData(Size); + int *Ptr = malloc_device(Size, Queue); + Queue.memset(Ptr, 0, Size * sizeof(int)).wait(); + + int PatternA = 42; + auto CGFA = [&](handler &CGH) { + CGH.parallel_for(Size, + [=](item<1> Item) { Ptr[Item.get_id()] = PatternA; }); + }; + + int PatternB = 0xA; + auto CGFB = [&](handler &CGH) { + CGH.parallel_for(Size, + [=](item<1> Item) { Ptr[Item.get_id()] = PatternB; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + + size_t NewRange = Size / 2; + sycl::range<1> UpdateRange(NewRange); + DynamicCGNode.update_range(UpdateRange); + + DynamicCG.set_active_cgf(1); + + // Check that the UpdateRange from active CGF 0 is preserved + DynamicCG.set_active_cgf(0); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + if (i < NewRange) { + assert(HostData[i] == PatternA); + } else { + assert(HostData[i] == 0); + } + } + + sycl::free(Ptr, Queue); + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_parameters.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_parameters.cpp new file mode 100644 index 0000000000000..7049b5bdde305 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_parameters.cpp @@ -0,0 +1,71 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// XFAIL: level_zero +// XFAIL-TRACKER: OFNAAO-307 + +// Tests updating kernel code using dynamic command-groups that have different +// parameters in each command-group. + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + int *PtrA = malloc_device(Size, Queue); + int *PtrB = malloc_device(Size, Queue); + + std::vector HostDataA(Size); + std::vector HostDataB(Size); + + Queue.memset(PtrA, 0, Size * sizeof(int)); + Queue.memset(PtrB, 0, Size * sizeof(int)); + Queue.wait(); + + int PatternA = 0xA; + auto CGFA = [&](handler &CGH) { + CGH.parallel_for(Size, + [=](item<1> Item) { PtrA[Item.get_id()] = PatternA; }); + }; + + int PatternB = 42; + auto CGFB = [&](handler &CGH) { + CGH.parallel_for(Size, + [=](item<1> Item) { PtrB[Item.get_id()] = PatternB; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(PtrA, HostDataA.data(), Size); + Queue.copy(PtrB, HostDataB.data(), Size); + Queue.wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostDataA[i] == PatternA); + assert(HostDataB[i] == 0); + } + + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(PtrA, HostDataA.data(), Size); + Queue.copy(PtrB, HostDataB.data(), Size); + Queue.wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostDataA[i] == PatternA); + assert(HostDataB[i] == PatternB); + } + + sycl::free(PtrA, Queue); + sycl::free(PtrB, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_shared_nodes.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_shared_nodes.cpp new file mode 100644 index 0000000000000..1f98200791b6c --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_shared_nodes.cpp @@ -0,0 +1,75 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// XFAIL: level_zero +// XFAIL-TRACKER: OFNAAO-307 + +// Tests using the same dynamic command-group in more than one graph node. + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + std::vector HostData(Size); + int *Ptr = malloc_device(Size, Queue); + + auto RootNode = + Graph.add([&](handler &CGH) { CGH.memset(Ptr, 0, Size * sizeof(int)); }); + + int PatternA = 42; + auto CGFA = [&](handler &CGH) { + CGH.parallel_for(Size, + [=](item<1> Item) { Ptr[Item.get_id()] += PatternA; }); + }; + + int PatternB = 0xA; + auto CGFB = [&](handler &CGH) { + CGH.parallel_for(Size, + [=](item<1> Item) { Ptr[Item.get_id()] += PatternB; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB}); + auto Node1 = + Graph.add(DynamicCG, exp_ext::property::node::depends_on(RootNode)); + + auto Node2 = Graph.add( + [&](handler &cgh) { + cgh.parallel_for(Size, [=](item<1> Item) { Ptr[Item.get_id()] *= 2; }); + }, + exp_ext::property::node::depends_on(Node1)); + + auto Node3 = Graph.add(DynamicCG, exp_ext::property::node::depends_on(Node2)); + + // This ND-Range affects Node 1 as well, as the range is tied to the node. + sycl::range<1> Node3Range(Size / 2); + Node3.update_range(Node3Range); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + int Ref = (i < Node3Range.get(0)) ? (PatternA * 3) : 0; + assert(HostData[i] == Ref); + } + + DynamicCG.set_active_cgf(1); + ExecGraph.update(Node1); + ExecGraph.update(Node3); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); + int Ref = (PatternB * 3); + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == Ref); + } + + sycl::free(Ptr, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_update_before_finalize.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_update_before_finalize.cpp new file mode 100644 index 0000000000000..8c0c705960ef6 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_update_before_finalize.cpp @@ -0,0 +1,46 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// Tests updating a dynamic command-group node after it has been added to +// a graph but before the graph has been finalized + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + int *Ptr = malloc_device(Size, Queue); + std::vector HostData(Size); + + int PatternA = 42; + auto CGFA = [&](handler &CGH) { + CGH.parallel_for(Size, + [=](item<1> Item) { Ptr[Item.get_id()] = PatternA; }); + }; + + int PatternB = 0xA; + auto CGFB = [&](handler &CGH) { + CGH.parallel_for(Size, + [=](item<1> Item) { Ptr[Item.get_id()] = PatternB; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + DynamicCG.set_active_cgf(1); + auto ExecGraph = Graph.finalize(); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == PatternB); + } + + sycl::free(Ptr, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_usm.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_usm.cpp new file mode 100644 index 0000000000000..100701f7b62aa --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_usm.cpp @@ -0,0 +1,56 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// XFAIL: level_zero +// XFAIL-TRACKER: OFNAAO-307 + +// Tests updating usm kernel code using dynamic command-groups + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + int *Ptr = malloc_device(Size, Queue); + std::vector HostData(Size); + + int PatternA = 42; + auto CGFA = [&](handler &CGH) { + CGH.parallel_for(Size, + [=](item<1> Item) { Ptr[Item.get_id()] = PatternA; }); + }; + + int PatternB = 0xA; + auto CGFB = [&](handler &CGH) { + CGH.parallel_for(Size, + [=](item<1> Item) { Ptr[Item.get_id()] = PatternB; }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == PatternA); + } + + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == PatternB); + } + + sycl::free(Ptr, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_with_all_dyn_params.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_with_all_dyn_params.cpp new file mode 100644 index 0000000000000..53b34d1add289 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_with_all_dyn_params.cpp @@ -0,0 +1,120 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// XFAIL: level_zero +// XFAIL-TRACKER: OFNAAO-307 + +// Tests using a dynamic command-group object with dynamic parameters inside it + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + int *PtrA = malloc_device(Size, Queue); + int *PtrB = malloc_device(Size, Queue); + int *PtrC = malloc_device(Size, Queue); + + std::vector HostDataA(Size); + std::vector HostDataB(Size); + std::vector HostDataC(Size); + + exp_ext::dynamic_parameter DynParam1(Graph, PtrA); + exp_ext::dynamic_parameter DynParam2(Graph, PtrC); + + auto CGFA = [&](handler &CGH) { + CGH.set_arg(0, DynParam1); + // TODO: Use the free function kernel extension instead of regular kernels + // when available. + CGH.single_task([=]() { + for (size_t i = 0; i < Size; i++) { + PtrA[i] = i; + } + }); + }; + + auto CGFB = [&](handler &CGH) { + CGH.set_arg(0, DynParam1); + // TODO: Use the free function kernel extension instead of regular kernels + // when available. + CGH.single_task([=]() { + for (size_t i = 0; i < Size; i++) { + PtrA[i] = i; + } + }); + }; + + auto CGFC = [&](handler &CGH) { + CGH.set_arg(0, DynParam2); + // TODO: Use the free function kernel extension instead of regular kernels + // when available. + CGH.single_task([=]() { + for (size_t i = 0; i < Size; i++) { + PtrC[i] = i; + } + }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB, CGFC}); + auto DynamicCGNode = Graph.add(DynamicCG); + auto KernelNode = Graph.add( + [&](handler &CGH) { + CGH.set_arg(0, DynParam2); + // TODO: Use the free function kernel extension instead of regular + // kernels when available. + CGH.single_task([=]() { + for (size_t i = 0; i < Size; i++) { + PtrC[i] += i; + } + }); + }, + exp_ext::property::node::depends_on(DynamicCGNode)); + + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + auto ExecuteGraphAndVerifyResults = [&](bool A, bool B, bool C) { + Queue.memset(PtrA, 0, Size * sizeof(int)); + Queue.memset(PtrB, 0, Size * sizeof(int)); + Queue.memset(PtrC, 0, Size * sizeof(int)); + Queue.wait(); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(PtrA, HostDataA.data(), Size); + Queue.copy(PtrB, HostDataB.data(), Size); + Queue.copy(PtrC, HostDataC.data(), Size); + Queue.wait(); + + for (size_t i = 0; i < Size; i++) { + assert(HostDataA[i] == (A ? i : 0)); + assert(HostDataB[i] == (B ? i : 0)); + assert(HostDataC[i] == (C ? (2 * i) : i)); + } + }; + ExecuteGraphAndVerifyResults(true, false, false); + + DynParam1.update(PtrB); + ExecGraph.update(DynamicCGNode); + ExecuteGraphAndVerifyResults(false, true, false); + + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + ExecuteGraphAndVerifyResults(false, true, false); + + DynamicCG.set_active_cgf(2); + // Should be ignored as DynParam1 not used in active node + DynParam1.update(PtrA); + ExecGraph.update(DynamicCGNode); + ExecuteGraphAndVerifyResults(false, false, true); + + sycl::free(PtrA, Queue); + sycl::free(PtrB, Queue); + sycl::free(PtrC, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_with_different_type_dyn_params.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_with_different_type_dyn_params.cpp new file mode 100644 index 0000000000000..00482185ebc27 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_with_different_type_dyn_params.cpp @@ -0,0 +1,136 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// XFAIL: level_zero +// XFAIL-TRACKER: OFNAAO-307 + +// Tests using a dynamic command-group object with dynamic parameters of +// different types + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + int *PtrA = malloc_device(Size, Queue); + int *PtrB = malloc_device(Size, Queue); + int *PtrC = malloc_device(Size, Queue); + + std::vector HostDataA(Size); + std::vector HostDataB(Size); + std::vector HostDataC(Size); + + int ScalarValue = 17; + exp_ext::dynamic_parameter DynParamScalar(Graph, ScalarValue); + exp_ext::dynamic_parameter DynParamPtr(Graph, PtrA); + + // Kernel has 2 dynamic parameters, one of scalar type & one of ptr type + auto CGFA = [&](handler &CGH) { + CGH.set_arg(0, DynParamPtr); + CGH.set_arg(1, DynParamScalar); + // TODO: Use the free function kernel extension instead of regular kernels + // when available. + CGH.single_task([=]() { + for (size_t i = 0; i < Size; i++) { + PtrA[i] = ScalarValue; + } + }); + }; + + // Kernel has a single argument, a dynamic parameter of ptr type + auto CGFB = [&](handler &CGH) { + CGH.set_arg(0, DynParamPtr); + // TODO: Use the free function kernel extension instead of regular kernels + // when available. + CGH.single_task([=]() { + for (size_t i = 0; i < Size; i++) { + PtrA[i] = ScalarValue; + } + }); + }; + + // Kernel has a two arguments, an immutable ptr type argument and a + // dynamic parameter of scalar type. + auto CGFC = [&](handler &CGH) { + CGH.set_arg(1, DynParamScalar); + // TODO: Use the free function kernel extension instead of regular kernels + // when available. + CGH.single_task([=]() { + for (size_t i = 0; i < Size; i++) { + PtrC[i] = ScalarValue; + } + }); + }; + + // Kernel has a single argument, of immutable pointer type + auto CGFD = [&](handler &CGH) { + // TODO: Use the free function kernel extension instead of regular kernels + // when available. + CGH.single_task([=]() { + for (size_t i = 0; i < Size; i++) { + PtrA[i] = ScalarValue; + } + }); + }; + + auto DynamicCG = + exp_ext::dynamic_command_group(Graph, {CGFA, CGFB, CGFC, CGFD}); + auto DynamicCGNode = Graph.add(DynamicCG); + + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + auto ExecuteGraphAndVerifyResults = [&](int A, int B, int C) { + Queue.memset(PtrA, 0, Size * sizeof(int)); + Queue.memset(PtrB, 0, Size * sizeof(int)); + Queue.memset(PtrC, 0, Size * sizeof(int)); + Queue.wait(); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(PtrA, HostDataA.data(), Size); + Queue.copy(PtrB, HostDataB.data(), Size); + Queue.copy(PtrC, HostDataC.data(), Size); + Queue.wait(); + + for (size_t i = 0; i < Size; i++) { + assert(HostDataA[i] == A); + assert(HostDataB[i] == B); + assert(HostDataC[i] == C); + } + }; + // CGFA using PtrA and ScalarValue in its dynamic parameters + ExecuteGraphAndVerifyResults(ScalarValue, 0, 0); + + // CGFA using PtrB and UpdatedScalarValue in its dynamic parameters + DynParamPtr.update(PtrB); + int UpdatedScalarValue = 42; + DynParamScalar.update(UpdatedScalarValue); + ExecGraph.update(DynamicCGNode); + ExecuteGraphAndVerifyResults(0, UpdatedScalarValue, 0); + + // CGFB using PtrB in its dynamic parameter and immutable ScalarValue + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + ExecuteGraphAndVerifyResults(0, ScalarValue, false); + + // CGFC using immutable PtrC and UpdatedScalarValue in its dynamic parameter + DynamicCG.set_active_cgf(2); + ExecGraph.update(DynamicCGNode); + ExecuteGraphAndVerifyResults(0, 0, UpdatedScalarValue); + + // CGFD using immutable PtrA and immutable ScalarValue for arguments + DynamicCG.set_active_cgf(3); + ExecGraph.update(DynamicCGNode); + ExecuteGraphAndVerifyResults(ScalarValue, 0, 0); + + sycl::free(PtrA, Queue); + sycl::free(PtrB, Queue); + sycl::free(PtrC, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_with_some_dyn_params.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_with_some_dyn_params.cpp new file mode 100644 index 0000000000000..3213fc4eec2fe --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_with_some_dyn_params.cpp @@ -0,0 +1,106 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// XFAIL: level_zero +// XFAIL-TRACKER: OFNAAO-307 + +// Tests using a dynamic command-group object where some but not all the +// command-groups use dynamic parameters. + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + int *PtrA = malloc_device(Size, Queue); + int *PtrB = malloc_device(Size, Queue); + int *PtrC = malloc_device(Size, Queue); + + std::vector HostDataA(Size); + std::vector HostDataB(Size); + std::vector HostDataC(Size); + + exp_ext::dynamic_parameter DynParam(Graph, PtrA); + + auto CGFA = [&](handler &CGH) { + CGH.set_arg(0, DynParam); + // TODO: Use the free function kernel extension instead of regular kernels + // when available. + CGH.single_task([=]() { + for (size_t i = 0; i < Size; i++) { + PtrA[i] = i; + } + }); + }; + + auto CGFB = [&](handler &CGH) { + CGH.set_arg(0, DynParam); + // TODO: Use the free function kernel extension instead of regular kernels + // when available. + CGH.single_task([=]() { + for (size_t i = 0; i < Size; i++) { + PtrA[i] = i; + } + }); + }; + + auto CGFC = [&](handler &CGH) { + CGH.single_task([=]() { + for (size_t i = 0; i < Size; i++) { + PtrC[i] = i; + } + }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB, CGFC}); + auto DynamicCGNode = Graph.add(DynamicCG); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + auto ExecuteGraphAndVerifyResults = [&](bool A, bool B, bool C) { + Queue.memset(PtrA, 0, Size * sizeof(int)); + Queue.memset(PtrB, 0, Size * sizeof(int)); + Queue.memset(PtrC, 0, Size * sizeof(int)); + Queue.wait(); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(PtrA, HostDataA.data(), Size); + Queue.copy(PtrB, HostDataB.data(), Size); + Queue.copy(PtrC, HostDataC.data(), Size); + Queue.wait(); + + for (size_t i = 0; i < Size; i++) { + assert(HostDataA[i] == (A ? i : 0)); + assert(HostDataB[i] == (B ? i : 0)); + assert(HostDataC[i] == (C ? i : 0)); + } + }; + // CGFA with DynParam using PtrA + ExecuteGraphAndVerifyResults(true, false, false); + + // CGFA with DynParam using PtrB + DynParam.update(PtrB); + ExecGraph.update(DynamicCGNode); + ExecuteGraphAndVerifyResults(false, true, false); + + // CGFB with DynParam using PtrB + DynamicCG.set_active_cgf(1); + ExecGraph.update(DynamicCGNode); + ExecuteGraphAndVerifyResults(false, true, false); + + // CGFC unconditionally using PtrC + DynamicCG.set_active_cgf(2); + ExecGraph.update(DynamicCGNode); + ExecuteGraphAndVerifyResults(false, false, true); + + sycl::free(PtrA, Queue); + sycl::free(PtrB, Queue); + sycl::free(PtrC, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/update_ndrange_to_range.cpp b/sycl/test-e2e/Graph/Update/update_ndrange_to_range.cpp new file mode 100644 index 0000000000000..43db9d172e618 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/update_ndrange_to_range.cpp @@ -0,0 +1,54 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// + +// Tests updating a graph node from sycl::nd_range to sycl::range + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + int *PtrA = malloc_device(Size, Queue); + + std::vector HostDataA(Size); + + Queue.memset(PtrA, 0, Size * sizeof(int)).wait(); + + nd_range<1> NDRange{range{Size}, range{32}}; + + auto KernelNode = Graph.add([&](handler &cgh) { + cgh.parallel_for(NDRange, [=](nd_item<1> Item) { + size_t GlobalID = Item.get_global_id(); + PtrA[GlobalID] += GlobalID; + }); + }); + + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + // first half of PtrA should be filled with values + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(PtrA, HostDataA.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostDataA[i] == i); + } + + // Update NDRange to target first half only + size_t UpdateSize = Size / 2; + KernelNode.update_range(range<1>{UpdateSize}); + ExecGraph.update(KernelNode); + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(PtrA, HostDataA.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostDataA[i] == (i >= UpdateSize ? i : i * 2)); + } + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/update_nullptr.cpp b/sycl/test-e2e/Graph/Update/update_nullptr.cpp new file mode 100644 index 0000000000000..060386c6659a3 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/update_nullptr.cpp @@ -0,0 +1,67 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// Tests updating a graph node using a USM pointer set to nullptr + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + + const size_t N = 1024; + + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + int *PtrOut = malloc_device(N, Queue); + int *PtrIn = malloc_device(N, Queue); + + Queue.memset(PtrOut, 0, N * sizeof(int)).wait(); + int PtrPattern = 42; + Queue.fill(PtrIn, PtrPattern, N).wait(); + + exp_ext::dynamic_parameter InputParam(Graph, PtrIn); + int DefaultPattern = 10; + auto KernelNode = Graph.add([&](handler &cgh) { + cgh.set_arg(0, InputParam); + cgh.set_arg(1, PtrOut); + cgh.single_task([=]() { + for (size_t i = 0; i < N; i++) { + if (PtrIn) { + PtrOut[i] = PtrIn[i]; + } else { + PtrOut[i] = DefaultPattern; + } + } + }); + }); + + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + // PtrA should be filled with values + Queue.ext_oneapi_graph(ExecGraph).wait(); + + std::vector HostData(N); + Queue.copy(PtrOut, HostData.data(), N).wait(); + for (size_t i = 0; i < N; i++) { + assert(HostData[i] == PtrPattern); + } + + // Swap Input to nullptr + int *NullPtr = nullptr; + InputParam.update(NullPtr); + ExecGraph.update(KernelNode); + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(PtrOut, HostData.data(), N).wait(); + for (size_t i = 0; i < N; i++) { + assert(HostData[i] == DefaultPattern); + } + + free(PtrIn, Queue); + free(PtrOut, Queue); + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/update_range_to_ndrange.cpp b/sycl/test-e2e/Graph/Update/update_range_to_ndrange.cpp new file mode 100644 index 0000000000000..94052c8379b58 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/update_range_to_ndrange.cpp @@ -0,0 +1,55 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// + +// Tests updating a graph node from using a sycl::range to a sycl::nd_range + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + int *PtrA = malloc_device(Size, Queue); + + std::vector HostDataA(Size); + + Queue.memset(PtrA, 0, Size * sizeof(int)).wait(); + + range<1> Range{Size}; + + auto KernelNode = Graph.add([&](handler &cgh) { + cgh.parallel_for(Range, [=](item<1> Item) { + size_t GlobalID = Item.get_id(); + PtrA[GlobalID] += GlobalID; + }); + }); + + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + // first half of PtrA should be filled with values + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(PtrA, HostDataA.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostDataA[i] == i); + } + + // Update NDRange to target first half only + size_t UpdateSize = Size / 2; + nd_range<1> NDRange{range{UpdateSize}, range{32}}; + KernelNode.update_nd_range(NDRange); + ExecGraph.update(KernelNode); + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(PtrA, HostDataA.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostDataA[i] == (i >= UpdateSize ? i : i * 2)); + } + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/whole_update_dynamic_cgf.cpp b/sycl/test-e2e/Graph/Update/whole_update_dynamic_cgf.cpp new file mode 100644 index 0000000000000..0e8b87c0725f2 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/whole_update_dynamic_cgf.cpp @@ -0,0 +1,76 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// XFAIL: level_zero +// XFAIL-TRACKER: OFNAAO-307 + +// Tests interaction of whole graph update and dynamic command-groups + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + exp_ext::command_graph GraphA{Queue.get_context(), Queue.get_device()}; + exp_ext::command_graph GraphB{Queue.get_context(), Queue.get_device()}; + + int *Ptr = malloc_device(Size, Queue); + std::vector HostData(Size); + + int PatternA = 42; + auto CGFA = [&](handler &CGH) { + CGH.parallel_for(Size, + [=](item<1> Item) { Ptr[Item.get_id()] = PatternA; }); + }; + + int PatternB = 0xA; + auto CGFB = [&](handler &CGH) { + CGH.parallel_for(Size, + [=](item<1> Item) { Ptr[Item.get_id()] = PatternB; }); + }; + + auto DynamicCGA = exp_ext::dynamic_command_group(GraphA, {CGFA, CGFB}); + auto DynamicCGNodeA = GraphA.add(DynamicCGA); + + auto DynamicCGB = exp_ext::dynamic_command_group(GraphB, {CGFA, CGFB}); + auto DynamicCGNodeB = GraphB.add(DynamicCGB); + DynamicCGB.set_active_cgf(1); // Check if doesn't affect GraphA + + auto ExecGraph = GraphA.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == PatternA); + } + + // Graph B has CGF B as active, while Graph A has CGF A as active. + // Different command-groups should error due to being different + // kernels. + std::error_code ErrorCode = make_error_code(sycl::errc::success); + try { + ExecGraph.update(GraphB); + } catch (const sycl::exception &e) { + ErrorCode = e.code(); + } + assert(ErrorCode == sycl::errc::invalid); + + // Both ExecGraph and Graph B have CGFB as active, so + // whole graph update should be valid as graphs match. + DynamicCGA.set_active_cgf(1); + ExecGraph.update(DynamicCGNodeA); + ExecGraph.update(GraphB); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + Queue.copy(Ptr, HostData.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostData[i] == PatternB); + } + + sycl::free(Ptr, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/whole_update_dynamic_param.cpp b/sycl/test-e2e/Graph/Update/whole_update_dynamic_param.cpp index c9a4922e7fd46..b894685a8bd87 100644 --- a/sycl/test-e2e/Graph/Update/whole_update_dynamic_param.cpp +++ b/sycl/test-e2e/Graph/Update/whole_update_dynamic_param.cpp @@ -5,8 +5,6 @@ // Extra run to check for immediate-command-list in Level Zero // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} -// REQUIRES: aspect-usm_shared_allocations - // Tests that whole graph update works when using dynamic parameters. #include "../graph_common.hpp" diff --git a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp index cc45096b8564c..1588d55dfd2e2 100644 --- a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp +++ b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp @@ -9,10 +9,6 @@ // REQUIRES: (opencl || level_zero) // UNSUPPORTED: accelerator -// UNSUPPORTED: windows -// UNSUPPORTED-TRACKER: CMPLRLLVM-63166 -// in CMakeLists). - // RUN: %{build} -o %t.out // RUN: %{run} %t.out // RUN: %{l0_leak_check} %{run} %t.out @@ -95,7 +91,7 @@ void test_1(sycl::queue &Queue, sycl::kernel &Kernel, int seed) { sycl::free(usmPtr, Queue); } -void test_build_and_run() { +int test_build_and_run() { namespace syclex = sycl::ext::oneapi::experimental; using source_kb = sycl::kernel_bundle; using exe_kb = sycl::kernel_bundle; @@ -110,7 +106,7 @@ void test_build_and_run() { "kernel bundle extension: " << q.get_device().get_info() << std::endl; - return; + return -1; } // Create from source. @@ -141,12 +137,14 @@ void test_build_and_run() { // Test the kernels. test_1(q, k, 37 + 5); // ff_cp seeds 37. AddEm will add 5 more. + + return 0; } int main() { #ifdef SYCL_EXT_ONEAPI_KERNEL_COMPILER - test_build_and_run(); + return test_build_and_run(); #else static_assert(false, "Kernel Compiler feature test macro undefined"); #endif diff --git a/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp b/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp index ac6dc118ce8b2..0105ea076839d 100644 --- a/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp +++ b/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp @@ -109,7 +109,7 @@ void verify_op_c(const T l, const T r, const float ref, OP op) { template class ewops_ab {}; template + size_t VF, typename Tv = T> void test_ewops_ab() { if constexpr (Use == use::a) std::cout << "Test A "; @@ -122,41 +122,43 @@ void test_ewops_ab() { verify_op_ab>( - T(5.0), T(2.0), 7.0, [](auto l, auto r) { return l + r; }); + Tv(5.0), Tv(2.0), 7.0, [](auto l, auto r) { return l + r; }); verify_op_ab>( - T(5.0), T(2.0), 3.0, [](auto l, auto r) { return l - r; }); + Tv(5.0), Tv(2.0), 3.0, [](auto l, auto r) { return l - r; }); verify_op_ab>( - T(5.0), T(2.0), 10.0, [](auto l, auto r) { return l * r; }); + Tv(5.0), Tv(2.0), 10.0, [](auto l, auto r) { return l * r; }); verify_op_ab>( - T(5.0), T(2.0), 2.5, [](auto l, auto r) { return l / r; }); + Tv(5.0), Tv(2.0), 2.5, [](auto l, auto r) { return l / r; }); verify_op_ab>( - T(5.0), T(5.0), 5.0, [](auto l, auto r) { return l == r ? l : T(1.0); }); + Tv(5.0), Tv(5.0), 5.0, + [](auto l, auto r) { return l == r ? l : Tv(1.0); }); verify_op_ab>( - T(5.0), T(4.0), 4.0, [](auto l, auto r) { return l == r ? l : r; }); + Tv(5.0), Tv(4.0), 4.0, [](auto l, auto r) { return l == r ? l : r; }); verify_op_ab>( - T(5.0), T(5.0), 1.0, [](auto l, auto r) { return l != r ? l : T(1.0); }); + Tv(5.0), Tv(5.0), 1.0, + [](auto l, auto r) { return l != r ? l : Tv(1.0); }); verify_op_ab>( - T(5.0), T(2.0), 3.0, - [](auto l, auto r) { return l > r ? T(3.0) : T(2.0); }); + Tv(5.0), Tv(2.0), 3.0, + [](auto l, auto r) { return l > r ? Tv(3.0) : Tv(2.0); }); verify_op_ab>( - T(5.0), T(2.0), 2.0, - [](auto l, auto r) { return l < r ? T(3.0) : T(2.0); }); + Tv(5.0), Tv(2.0), 2.0, + [](auto l, auto r) { return l < r ? Tv(3.0) : Tv(2.0); }); verify_op_ab>( - T(5.0), T(2.0), 3.0, - [](auto l, auto r) { return l >= r ? T(3.0) : T(2.0); }); + Tv(5.0), Tv(2.0), 3.0, + [](auto l, auto r) { return l >= r ? Tv(3.0) : Tv(2.0); }); verify_op_ab>( - T(5.0), T(2.0), 2.0, - [](auto l, auto r) { return l <= r ? T(3.0) : T(2.0); }); + Tv(5.0), Tv(2.0), 2.0, + [](auto l, auto r) { return l <= r ? Tv(3.0) : Tv(2.0); }); } // Avoid same kernel name for different types and numbers of columns diff --git a/sycl/test-e2e/Regression/pf-wg-atomic64.cpp b/sycl/test-e2e/Regression/pf-wg-atomic64.cpp index 2cf4dd1ee39ec..31b0c7188059d 100644 --- a/sycl/test-e2e/Regression/pf-wg-atomic64.cpp +++ b/sycl/test-e2e/Regression/pf-wg-atomic64.cpp @@ -1,4 +1,6 @@ -// DISABLED: aspect-atomic64 +// UNSUPPORTED: aspect-atomic64 +// UNSUPPORTED-INTENDED: The test is intended for devices without atomic64 +// support. // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/VirtualMem/extending_virtual_memory_range.cpp b/sycl/test-e2e/VirtualMem/extending_virtual_memory_range.cpp new file mode 100644 index 0000000000000..67dec551543f1 --- /dev/null +++ b/sycl/test-e2e/VirtualMem/extending_virtual_memory_range.cpp @@ -0,0 +1,97 @@ +// This test checks whether memory accesses to contiguous virtual memory ranges +// are performed correctly + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include + +#include "helpers.hpp" + +struct VirtualAddressRange { + VirtualAddressRange(uintptr_t Ptr, size_t Size) : MPtr{Ptr}, MSize{Size} {} + + uintptr_t MPtr; + size_t MSize; +}; + +struct PhysicalMemoryMapping { + PhysicalMemoryMapping(syclext::physical_mem &&PhysicalMem, void *MappingPtr) + : MPhysicalMem(std::move(PhysicalMem)), MMappingPtr(MappingPtr) {} + syclext::physical_mem MPhysicalMem; + void *MMappingPtr; +}; + +int main() { + int Failed = 0; + sycl::queue Q; + sycl::context Context = Q.get_context(); + sycl::device Device = Q.get_device(); + + constexpr size_t NumberOfVirtualMemoryRanges = 5; + constexpr size_t ElementsInRange = 100; + constexpr int ValueSetInKernel = 999; + + size_t BytesRequiredPerRange = ElementsInRange * sizeof(int); + + size_t UsedGranularity = GetLCMGranularity(Device, Context); + + size_t AlignedByteSizePerRange = + GetAlignedByteSize(BytesRequiredPerRange, UsedGranularity); + + std::vector VirtualMemoryRanges; + std::vector PhysicalMemoryMappings; + + for (size_t Index = 0; Index < NumberOfVirtualMemoryRanges; ++Index) { + uintptr_t VirtualMemoryPtr = + syclext::reserve_virtual_mem(AlignedByteSizePerRange, Context); + syclext::physical_mem PhysicalMem{Device, Context, AlignedByteSizePerRange}; + void *MappedPtr = PhysicalMem.map(VirtualMemoryPtr, AlignedByteSizePerRange, + syclext::address_access_mode::read_write); + + VirtualMemoryRanges.emplace_back(VirtualMemoryPtr, AlignedByteSizePerRange); + PhysicalMemoryMappings.emplace_back(std::move(PhysicalMem), MappedPtr); + } + + std::vector ResultHostData(ElementsInRange); + + for (size_t Index = 0; Index < NumberOfVirtualMemoryRanges; ++Index) { + int *DataRangePtr = + reinterpret_cast(PhysicalMemoryMappings[Index].MMappingPtr); + + Q.parallel_for(ElementsInRange, [=](sycl::id<1> Idx) { + DataRangePtr[Idx] = ValueSetInKernel; + }).wait_and_throw(); + + { + sycl::buffer ResultBuffer(ResultHostData); + + Q.submit([&](sycl::handler &Handle) { + sycl::accessor A(ResultBuffer, Handle, sycl::write_only); + Handle.parallel_for(ElementsInRange, [=](sycl::id<1> Idx) { + A[Idx] = DataRangePtr[Idx]; + }); + }); + } + + for (size_t i = 0; i < ElementsInRange; i++) { + if (ResultHostData[i] != ValueSetInKernel) { + std::cout << "Comparison failed with virtual range " << Index + 1 + << " at index " << i << ": " << ResultHostData[i] + << " != " << ValueSetInKernel << std::endl; + ++Failed; + } + } + } + + for (auto PhysMemMap : PhysicalMemoryMappings) { + syclext::unmap(PhysMemMap.MMappingPtr, PhysMemMap.MPhysicalMem.size(), + Context); + } + for (auto VirtualMemRange : VirtualMemoryRanges) { + syclext::free_virtual_mem(VirtualMemRange.MPtr, VirtualMemRange.MSize, + Context); + } + + return Failed; +} diff --git a/sycl/test-e2e/VirtualMem/virtual_mem_operations.cpp b/sycl/test-e2e/VirtualMem/virtual_mem_operations.cpp new file mode 100644 index 0000000000000..9d9b7a0628951 --- /dev/null +++ b/sycl/test-e2e/VirtualMem/virtual_mem_operations.cpp @@ -0,0 +1,126 @@ +// This test checks whether certain operations in virtual memory extension work +// as expectd. + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include "helpers.hpp" + +int main() { + + constexpr size_t NumberOfIterations = 3; + std::array NumberOfElementsPerIteration{10, 100, + 1000}; + + sycl::queue Q; + sycl::context Context = Q.get_context(); + sycl::device Device = Q.get_device(); + + // A check should be performed that we can successfully perform and + // immediately release a valid reservation. + for (const size_t RequiredNumElements : NumberOfElementsPerIteration) { + size_t BytesRequired = RequiredNumElements * sizeof(int); + size_t UsedGranularity = GetLCMGranularity(Device, Context); + size_t AlignedByteSize = GetAlignedByteSize(BytesRequired, UsedGranularity); + uintptr_t VirtualMemoryPtr = + syclext::reserve_virtual_mem(0, AlignedByteSize, Context); + syclext::free_virtual_mem(VirtualMemoryPtr, AlignedByteSize, Context); + } + + // A check should be performed that we can successfully map and immediately + // unmap a virtual memory range. + for (const size_t RequiredNumElements : NumberOfElementsPerIteration) { + size_t BytesRequired = RequiredNumElements * sizeof(int); + size_t UsedGranularity = GetLCMGranularity(Device, Context); + size_t AlignedByteSize = GetAlignedByteSize(BytesRequired, UsedGranularity); + uintptr_t VirtualMemoryPtr = + syclext::reserve_virtual_mem(0, AlignedByteSize, Context); + syclext::physical_mem PhysicalMem{Device, Context, AlignedByteSize}; + void *MappedPtr = PhysicalMem.map(VirtualMemoryPtr, AlignedByteSize, + syclext::address_access_mode::read_write); + syclext::unmap(MappedPtr, AlignedByteSize, Context); + syclext::free_virtual_mem(VirtualMemoryPtr, AlignedByteSize, Context); + } + + { + // Check should be performed that methods get_context(), get_device() and + // size() return correct values (i.e. ones which were passed to physical_mem + // constructor). + size_t BytesRequired = NumberOfElementsPerIteration[2] * sizeof(int); + size_t UsedGranularity = GetLCMGranularity(Device, Context); + size_t AlignedByteSize = GetAlignedByteSize(BytesRequired, UsedGranularity); + + syclext::physical_mem PhysicalMem{Device, Context, AlignedByteSize}; + + assert(PhysicalMem.get_device() == Device && + "device passed to physical_mem must be the same as returned from " + "get_device()"); + + assert(PhysicalMem.get_context() == Context && + "context passed to physical_mem must be the same as returned from " + "get_context()"); + + assert(PhysicalMem.size() == AlignedByteSize && + "size in bytes passed to physical_mem must be the same as returned " + "from size()"); + } + + { + // Check to see if value returned from a valid call to map() is the same as + // reinterpret_cast(ptr). + size_t BytesRequired = NumberOfElementsPerIteration[2] * sizeof(int); + size_t UsedGranularity = GetLCMGranularity(Device, Context); + size_t AlignedByteSize = GetAlignedByteSize(BytesRequired, UsedGranularity); + + uintptr_t VirtualMemoryPtr = + syclext::reserve_virtual_mem(0, AlignedByteSize, Context); + + syclext::physical_mem PhysicalMem{Device, Context, AlignedByteSize}; + + void *MappedPtr = PhysicalMem.map(VirtualMemoryPtr, AlignedByteSize, + syclext::address_access_mode::read_write); + + assert(MappedPtr == reinterpret_cast(VirtualMemoryPtr) && + "value returned from a valid call to map() must be equal " + "reinterpret_cast(ptr)"); + + syclext::unmap(MappedPtr, AlignedByteSize, Context); + syclext::free_virtual_mem(VirtualMemoryPtr, AlignedByteSize, Context); + } + + // Check to see if can change access mode of a virtual memory range and + // immediately see it changed. + for (const size_t RequiredNumElements : NumberOfElementsPerIteration) { + size_t BytesRequired = RequiredNumElements * sizeof(int); + size_t UsedGranularity = GetLCMGranularity(Device, Context); + size_t AlignedByteSize = GetAlignedByteSize(BytesRequired, UsedGranularity); + uintptr_t VirtualMemoryPtr = + syclext::reserve_virtual_mem(0, AlignedByteSize, Context); + syclext::physical_mem PhysicalMem{Device, Context, AlignedByteSize}; + void *MappedPtr = PhysicalMem.map(VirtualMemoryPtr, AlignedByteSize, + syclext::address_access_mode::read_write); + + syclext::address_access_mode CurrentAccessMode = + syclext::get_access_mode(MappedPtr, AlignedByteSize, Context); + + assert(CurrentAccessMode == syclext::address_access_mode::read_write && + "access mode must be address_access_mode::read_write before change " + "with " + "set_access_mode()"); + + syclext::set_access_mode(MappedPtr, AlignedByteSize, + syclext::address_access_mode::read, Context); + + CurrentAccessMode = + syclext::get_access_mode(MappedPtr, AlignedByteSize, Context); + + assert(CurrentAccessMode == syclext::address_access_mode::read && + "access mode must be address_access_mode::read after change with " + "set_access_mode()"); + + syclext::unmap(MappedPtr, AlignedByteSize, Context); + syclext::free_virtual_mem(VirtualMemoryPtr, AlignedByteSize, Context); + } + + return 0; +} diff --git a/sycl/test/CMakeLists.txt b/sycl/test/CMakeLists.txt index be2332d4a4c8a..144e2204361ef 100644 --- a/sycl/test/CMakeLists.txt +++ b/sycl/test/CMakeLists.txt @@ -96,6 +96,7 @@ add_lit_testsuite(check-sycl-dumps "Running ABI dump tests only" EXCLUDE_FROM_CHECK_ALL ) -if(SYCL_ENABLE_EXTENSION_JIT) +if(NOT WIN32 AND SYCL_ENABLE_EXTENSION_JIT) + # lit-based testing of JIT passes isn't supported on Windows. add_dependencies(check-sycl check-sycl-jit) -endif(SYCL_ENABLE_EXTENSION_JIT) +endif() diff --git a/sycl/test/abi/sycl_classes_abi_neutral_test.cpp b/sycl/test/abi/sycl_classes_abi_neutral_test.cpp index 071481d0d1be0..fc494d500ce90 100644 --- a/sycl/test/abi/sycl_classes_abi_neutral_test.cpp +++ b/sycl/test/abi/sycl_classes_abi_neutral_test.cpp @@ -16,9 +16,8 @@ // member is not crossing ABI boundary. All current exclusions are listed below. // CHECK: 0 | struct sycl::ext::oneapi::experimental::build_options -// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::detail::run_time_property_key (base) (empty) -// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::detail::property_key_base_tag (base) (empty) -// CHECK-NEXT: 0 | class std::vector > opts +// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::detail::run_time_property_key +// CHECK: 0 | class std::vector > opts // CHECK-NEXT: 0 | struct std::_Vector_base, class std::allocator > > (base) // CHECK-NEXT: 0 | struct std::_Vector_base, class std::allocator > >::_Vector_impl _M_impl // CHECK-NEXT: 0 | class std::allocator > (base) (empty) @@ -26,9 +25,8 @@ // CHECK-NEXT: 0 | {{(struct std::_Vector_base, class std::allocator > >::_Vector_impl_data \(base\)|pointer _M_start)}} // CHECK: 0 | struct sycl::ext::oneapi::experimental::include_files -// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::detail::run_time_property_key (base) (empty) -// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::detail::property_key_base_tag (base) (empty) -// CHECK-NEXT: 0 | class std::vector, class std::basic_string > > record +// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::detail::run_time_property_key +// CHECK: 0 | class std::vector, class std::basic_string > > record // CHECK-NEXT: 0 | struct std::_Vector_base, class std::basic_string >, class std::allocator, class std::basic_string > > > (base) // CHECK-NEXT: 0 | struct std::_Vector_base, class std::basic_string >, class std::allocator, class std::basic_string > > >::_Vector_impl _M_impl // CHECK-NEXT: 0 | class std::allocator, class std::basic_string > > (base) (empty) @@ -36,9 +34,8 @@ // CHECK-NEXT: 0 | {{(struct std::_Vector_base, class std::basic_string >, class std::allocator, class std::basic_string > > >::_Vector_impl_data \(base\)|pointer _M_start)}} // CHECK: 0 | struct sycl::ext::oneapi::experimental::registered_kernel_names -// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::detail::run_time_property_key (base) (empty) -// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::detail::property_key_base_tag (base) (empty) -// CHECK-NEXT: 0 | class std::vector > kernel_names +// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::detail::run_time_property_key +// CHECK: 0 | class std::vector > kernel_names // CHECK-NEXT: 0 | struct std::_Vector_base, class std::allocator > > (base) // CHECK-NEXT: 0 | struct std::_Vector_base, class std::allocator > >::_Vector_impl _M_impl // CHECK-NEXT: 0 | class std::allocator > (base) (empty) diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 11d85801727c7..621765568d50c 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3023,6 +3023,9 @@ _ZN4sycl3_V13ext6oneapi12experimental20pitched_alloc_deviceEPmRKNS3_16image_desc _ZN4sycl3_V13ext6oneapi12experimental20pitched_alloc_deviceEPmRKNS3_16image_descriptorERKNS0_6deviceERKNS0_7contextE _ZN4sycl3_V13ext6oneapi12experimental20pitched_alloc_deviceEPmmmjRKNS0_5queueE _ZN4sycl3_V13ext6oneapi12experimental20pitched_alloc_deviceEPmmmjRKNS0_6deviceERKNS0_7contextE +_ZN4sycl3_V13ext6oneapi12experimental21dynamic_command_group14set_active_cgfEm +_ZN4sycl3_V13ext6oneapi12experimental21dynamic_command_groupC1ERKNS3_13command_graphILNS3_11graph_stateE0EEERKSt6vectorISt8functionIFvRNS0_7handlerEEESaISF_EE +_ZN4sycl3_V13ext6oneapi12experimental21dynamic_command_groupC2ERKNS3_13command_graphILNS3_11graph_stateE0EEERKSt6vectorISt8functionIFvRNS0_7handlerEEESaISF_EE _ZN4sycl3_V13ext6oneapi12experimental21get_composite_devicesEv _ZN4sycl3_V13ext6oneapi12experimental22get_image_channel_typeENS3_16image_mem_handleERKNS0_5queueE _ZN4sycl3_V13ext6oneapi12experimental22get_image_channel_typeENS3_16image_mem_handleERKNS0_6deviceERKNS0_7contextE @@ -3081,6 +3084,7 @@ _ZN4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graph15begin_re _ZN4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graph15begin_recordingERNS0_5queueERKNS0_13property_listE _ZN4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graph24addGraphLeafDependenciesENS3_4nodeE _ZN4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graph7addImplERKSt6vectorINS3_4nodeESaIS7_EE +_ZN4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graph7addImplERNS3_21dynamic_command_groupERKSt6vectorINS3_4nodeESaIS9_EE _ZN4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graph7addImplESt8functionIFvRNS0_7handlerEEERKSt6vectorINS3_4nodeESaISC_EE _ZN4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graph9make_edgeERNS3_4nodeES7_ _ZN4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graphC1ERKNS0_5queueERKNS0_13property_listE @@ -3597,6 +3601,7 @@ _ZNK4sycl3_V13ext6oneapi12experimental12physical_mem10get_deviceEv _ZNK4sycl3_V13ext6oneapi12experimental12physical_mem11get_contextEv _ZNK4sycl3_V13ext6oneapi12experimental12physical_mem3mapEmmNS3_19address_access_modeEm _ZNK4sycl3_V13ext6oneapi12experimental12physical_mem4sizeEv +_ZNK4sycl3_V13ext6oneapi12experimental21dynamic_command_group14get_active_cgfEv _ZNK4sycl3_V13ext6oneapi12experimental4node14get_successorsEv _ZNK4sycl3_V13ext6oneapi12experimental4node16get_predecessorsEv _ZNK4sycl3_V13ext6oneapi12experimental4node8get_typeEv diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 2b6dbdb2fe52b..b0b7fc3f0112d 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -326,6 +326,15 @@ ??0dynamic_parameter_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@$$QEAV0123456@@Z ??0dynamic_parameter_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBV0123456@@Z ??0dynamic_parameter_base@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@V?$command_graph@$0A@@23456@_KPEBX@Z +?get_active_cgf@dynamic_command_group@experimental@oneapi@ext@_V1@sycl@@QEBA_KXZ +??4dynamic_command_group@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV012345@$$QEAV012345@@Z +?addImpl@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@IEAA?AVnode@34567@AEAVdynamic_command_group@34567@AEBV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@@Z +??0dynamic_command_group@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBV?$command_graph@$0A@@12345@AEBV?$vector@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@V?$allocator@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@@2@@std@@@Z +??0dynamic_command_group@experimental@oneapi@ext@_V1@sycl@@QEAA@$$QEAV012345@@Z +??0dynamic_command_group@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBV012345@@Z +?set_active_cgf@dynamic_command_group@experimental@oneapi@ext@_V1@sycl@@QEAAX_K@Z +??1dynamic_command_group@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ +??4dynamic_command_group@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV012345@AEBV012345@@Z ??0event@_V1@sycl@@AEAA@V?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@std@@@Z ??0event@_V1@sycl@@QEAA@$$QEAV012@@Z ??0event@_V1@sycl@@QEAA@AEBV012@@Z diff --git a/sycl/test/check_device_code/extensions/address_cast.cpp b/sycl/test/check_device_code/extensions/address_cast.cpp index 36251cecbbc3b..c265a5e2fe74b 100644 --- a/sycl/test/check_device_code/extensions/address_cast.cpp +++ b/sycl/test/check_device_code/extensions/address_cast.cpp @@ -11,12 +11,6 @@ using namespace sycl; using namespace sycl::ext::oneapi::experimental; -// FIXME: should be removed when https://github.com/intel/llvm/pull/15389 is merged in. -template -using decorated_generic_ptr = - multi_ptr; - namespace static_as_cast { // CHECK-LABEL: define dso_local spir_func void @_ZN14static_as_cast19to_global_decoratedEN4sycl3_V19multi_ptrIiLNS1_6access13address_spaceE6ELNS3_9decoratedE1EEE( // CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::multi_ptr") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::multi_ptr.0") align 8 [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !srcloc [[META6:![0-9]+]] !sycl_fixed_targets [[META7:![0-9]+]] { diff --git a/sycl/test/e2e_test_requirements/no-unsupported-without-info.cpp b/sycl/test/e2e_test_requirements/no-unsupported-without-info.cpp index ae42a97ce9264..3602d7d01ae95 100644 --- a/sycl/test/e2e_test_requirements/no-unsupported-without-info.cpp +++ b/sycl/test/e2e_test_requirements/no-unsupported-without-info.cpp @@ -54,7 +54,7 @@ // tests to match the required format and in that case you should just update // (i.e. reduce) the number and the list below. // -// NUMBER-OF-UNSUPPORTED-WITHOUT-INFO: 478 +// NUMBER-OF-UNSUPPORTED-WITHOUT-INFO: 477 // // List of improperly UNSUPPORTED tests. // Remove the CHECK once the test has been properly UNSUPPORTED. @@ -113,7 +113,6 @@ // CHECK-NEXT: DeviceLib/cmath-aot.cpp // CHECK-NEXT: DeviceLib/cmath_fp64_test.cpp // CHECK-NEXT: DeviceLib/complex-fpga.cpp -// CHECK-NEXT: DeviceLib/exp/exp-std-complex-double-edge-cases.cpp // CHECK-NEXT: DeviceLib/imf_bfloat16_integeral_convesions.cpp // CHECK-NEXT: DeviceLib/imf_bfloat16_integeral_convesions.cpp // CHECK-NEXT: DeviceLib/imf_double2bfloat16.cpp diff --git a/sycl/test/extensions/annotated_usm/fake_properties.hpp b/sycl/test/extensions/annotated_usm/fake_properties.hpp index 656ea1a1ad75e..0462b96e8e24c 100644 --- a/sycl/test/extensions/annotated_usm/fake_properties.hpp +++ b/sycl/test/extensions/annotated_usm/fake_properties.hpp @@ -49,51 +49,53 @@ struct is_property_key_of : std::true_type {}; // Runtime properties enum foo_enum : unsigned { a, b, c }; -struct foo : detail::run_time_property_key { +struct foo : detail::run_time_property_key { constexpr foo(foo_enum v) : value(v) {} foo_enum value; }; -struct foz : detail::run_time_property_key { +struct foz : detail::run_time_property_key { float value1; bool value2; foz(float value1, bool value2) : value1(value1), value2(value2) {} }; -struct rt_prop1 : detail::run_time_property_key {}; -struct rt_prop2 : detail::run_time_property_key {}; -struct rt_prop3 : detail::run_time_property_key {}; -struct rt_prop4 : detail::run_time_property_key {}; -struct rt_prop5 : detail::run_time_property_key {}; -struct rt_prop6 : detail::run_time_property_key {}; -struct rt_prop7 : detail::run_time_property_key {}; -struct rt_prop8 : detail::run_time_property_key {}; -struct rt_prop9 : detail::run_time_property_key {}; -struct rt_prop10 : detail::run_time_property_key {}; -struct rt_prop11 : detail::run_time_property_key {}; -struct rt_prop12 : detail::run_time_property_key {}; -struct rt_prop13 : detail::run_time_property_key {}; -struct rt_prop14 : detail::run_time_property_key {}; -struct rt_prop15 : detail::run_time_property_key {}; -struct rt_prop16 : detail::run_time_property_key {}; -struct rt_prop17 : detail::run_time_property_key {}; -struct rt_prop18 : detail::run_time_property_key {}; -struct rt_prop19 : detail::run_time_property_key {}; -struct rt_prop20 : detail::run_time_property_key {}; -struct rt_prop21 : detail::run_time_property_key {}; -struct rt_prop22 : detail::run_time_property_key {}; -struct rt_prop23 : detail::run_time_property_key {}; -struct rt_prop24 : detail::run_time_property_key {}; -struct rt_prop25 : detail::run_time_property_key {}; -struct rt_prop26 : detail::run_time_property_key {}; -struct rt_prop27 : detail::run_time_property_key {}; -struct rt_prop28 : detail::run_time_property_key {}; -struct rt_prop29 : detail::run_time_property_key {}; -struct rt_prop30 : detail::run_time_property_key {}; -struct rt_prop31 : detail::run_time_property_key {}; -struct rt_prop32 : detail::run_time_property_key {}; -struct rt_prop33 : detail::run_time_property_key {}; +// clang-format off +struct rt_prop1 : detail::run_time_property_key {}; +struct rt_prop2 : detail::run_time_property_key {}; +struct rt_prop3 : detail::run_time_property_key {}; +struct rt_prop4 : detail::run_time_property_key {}; +struct rt_prop5 : detail::run_time_property_key {}; +struct rt_prop6 : detail::run_time_property_key {}; +struct rt_prop7 : detail::run_time_property_key {}; +struct rt_prop8 : detail::run_time_property_key {}; +struct rt_prop9 : detail::run_time_property_key {}; +struct rt_prop10 : detail::run_time_property_key {}; +struct rt_prop11 : detail::run_time_property_key {}; +struct rt_prop12 : detail::run_time_property_key {}; +struct rt_prop13 : detail::run_time_property_key {}; +struct rt_prop14 : detail::run_time_property_key {}; +struct rt_prop15 : detail::run_time_property_key {}; +struct rt_prop16 : detail::run_time_property_key {}; +struct rt_prop17 : detail::run_time_property_key {}; +struct rt_prop18 : detail::run_time_property_key {}; +struct rt_prop19 : detail::run_time_property_key {}; +struct rt_prop20 : detail::run_time_property_key {}; +struct rt_prop21 : detail::run_time_property_key {}; +struct rt_prop22 : detail::run_time_property_key {}; +struct rt_prop23 : detail::run_time_property_key {}; +struct rt_prop24 : detail::run_time_property_key {}; +struct rt_prop25 : detail::run_time_property_key {}; +struct rt_prop26 : detail::run_time_property_key {}; +struct rt_prop27 : detail::run_time_property_key {}; +struct rt_prop28 : detail::run_time_property_key {}; +struct rt_prop29 : detail::run_time_property_key {}; +struct rt_prop30 : detail::run_time_property_key {}; +struct rt_prop31 : detail::run_time_property_key {}; +struct rt_prop32 : detail::run_time_property_key {}; +struct rt_prop33 : detail::run_time_property_key {}; +// clang-format on using foo_key = foo; using foz_key = foz; diff --git a/sycl/test/extensions/properties/mock_compile_time_properties.hpp b/sycl/test/extensions/properties/mock_compile_time_properties.hpp index f9743e27e3896..ea8d98ffa5e58 100644 --- a/sycl/test/extensions/properties/mock_compile_time_properties.hpp +++ b/sycl/test/extensions/properties/mock_compile_time_properties.hpp @@ -34,7 +34,7 @@ struct boo_key : detail::compile_time_property_key { template using value_t = property_value; }; -struct foo : detail::run_time_property_key { +struct foo : detail::run_time_property_key { constexpr foo(int v = 0) : value(v) {} int value; }; @@ -44,7 +44,7 @@ inline bool operator==(const foo &lhs, const foo &rhs) { } inline bool operator!=(const foo &lhs, const foo &rhs) { return !(lhs == rhs); } -struct foz : detail::run_time_property_key { +struct foz : detail::run_time_property_key { constexpr foz(float v1, bool v2) : value1(v1), value2(v2) {} // Define copy constructor to make foz non-trivially copyable constexpr foz(const foz &f) { @@ -60,7 +60,7 @@ inline bool operator==(const foz &lhs, const foz &rhs) { } inline bool operator!=(const foz &lhs, const foz &rhs) { return !(lhs == rhs); } -struct fir : detail::run_time_property_key { +struct fir : detail::run_time_property_key { // Intentionally not constexpr to test for properties that cannot be constexpr fir(float v1, bool v2) : value1(v1), value2(v2) {} // Define copy constructor to make foz non-trivially copyable diff --git a/sycl/test/multi_ptr/aliases.cpp b/sycl/test/multi_ptr/aliases.cpp new file mode 100644 index 0000000000000..1911602e74386 --- /dev/null +++ b/sycl/test/multi_ptr/aliases.cpp @@ -0,0 +1,87 @@ +//==--------------- aliases.cpp - SYCL multi_ptr aliases test --------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// RUN: %clangxx -fsycl -fsyntax-only -Xclang -verify %s -Xclang -verify-ignore-unexpected=note,warning + +// expected-no-diagnostics + +#include + +#include + +template +void test_address_space_aliases() { + static_assert(std::is_same_v< + sycl::generic_ptr, + sycl::multi_ptr>); + static_assert(std::is_same_v< + sycl::global_ptr, + sycl::multi_ptr>); + static_assert(std::is_same_v< + sycl::local_ptr, + sycl::multi_ptr>); + static_assert(std::is_same_v< + sycl::private_ptr, + sycl::multi_ptr>); +} + +template void test_aliases() { + // Template specialization aliases for different pointer address spaces + test_address_space_aliases(); + test_address_space_aliases(); + test_address_space_aliases(); + + // Template specialization aliases for different pointer address spaces. + // The interface exposes non-decorated pointer while keeping the + // address space information internally. + static_assert(std::is_same_v< + sycl::raw_generic_ptr, + sycl::multi_ptr>); + static_assert(std::is_same_v< + sycl::raw_global_ptr, + sycl::multi_ptr>); + static_assert(std::is_same_v< + sycl::raw_local_ptr, + sycl::multi_ptr>); + static_assert(std::is_same_v< + sycl::raw_private_ptr, + sycl::multi_ptr>); + + // Template specialization aliases for different pointer address spaces. + // The interface exposes decorated pointer. + static_assert(std::is_same_v< + sycl::decorated_generic_ptr, + sycl::multi_ptr>); + static_assert(std::is_same_v< + sycl::decorated_global_ptr, + sycl::multi_ptr>); + static_assert(std::is_same_v< + sycl::decorated_local_ptr, + sycl::multi_ptr>); + static_assert(std::is_same_v< + sycl::decorated_private_ptr, + sycl::multi_ptr>); +} + +// Test "minimal set of types" in the CTS. As we are just testing aliases are +// present in this test, this should work for any type. + +template void test_aliases(); +template void test_aliases(); diff --git a/sycl/test/syclcompat/launch/kernel_properties.cpp b/sycl/test/syclcompat/launch/kernel_properties.cpp index 78920c62c5347..6beefce73d14b 100644 --- a/sycl/test/syclcompat/launch/kernel_properties.cpp +++ b/sycl/test/syclcompat/launch/kernel_properties.cpp @@ -23,7 +23,7 @@ // We need hardware which can support at least 2 sub-group sizes, since that // hardware (presumably) supports the `intel_reqd_sub_group_size` attribute. // REQUIRES: sg-32 && sg-16 -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %if cl_options %{/clang:-S /clang:-emit-llvm%} %else %{-S -emit-llvm%} %s -o - | FileCheck %s +// RUN: %clangxx -fsycl -fsycl-device-only -Xclang -fsycl-is-device %if cl_options %{/clang:-S /clang:-emit-llvm%} %else %{-S -emit-llvm%} %s -o - | FileCheck %s #include #include #include diff --git a/sycl/test/syclcompat/launch/launch_policy_lmem_neg.cpp b/sycl/test/syclcompat/launch/launch_policy_lmem_neg.cpp index 98222d6cc374f..5c2750e86b705 100644 --- a/sycl/test/syclcompat/launch/launch_policy_lmem_neg.cpp +++ b/sycl/test/syclcompat/launch/launch_policy_lmem_neg.cpp @@ -22,7 +22,7 @@ * templates as tests in launch_policy_neg.cpp **************************************************************************/ -// RUN: not %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out 2>&1 | FileCheck -vv %s +// RUN: not %clangxx -fsycl -fsyntax-only %s 2>&1 | FileCheck -vv %s #include #include diff --git a/sycl/test/syclcompat/launch/launch_policy_neg.cpp b/sycl/test/syclcompat/launch/launch_policy_neg.cpp index 558864084ff62..cee796471f23f 100644 --- a/sycl/test/syclcompat/launch/launch_policy_neg.cpp +++ b/sycl/test/syclcompat/launch/launch_policy_neg.cpp @@ -20,18 +20,18 @@ * Negative tests for new launch_policy. **************************************************************************/ -// RUN: not %clangxx -fsycl -fsycl-targets=%sycl_triple %s -DCHECK1 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK1 -// RUN: not %clangxx -fsycl -fsycl-targets=%sycl_triple %s -DCHECK2 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK2 -// RUN: not %clangxx -fsycl -fsycl-targets=%sycl_triple %s -DCHECK3 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK3 -// RUN: not %clangxx -fsycl -fsycl-targets=%sycl_triple %s -DCHECK4 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK4 -// RUN: not %clangxx -fsycl -fsycl-targets=%sycl_triple %s -DCHECK5 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK5 -// RUN: not %clangxx -fsycl -fsycl-targets=%sycl_triple %s -DCHECK6 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK6 -// RUN: not %clangxx -fsycl -fsycl-targets=%sycl_triple %s -DCHECK7 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK7 -// RUN: not %clangxx -fsycl -fsycl-targets=%sycl_triple %s -DCHECK8 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK8 -// RUN: not %clangxx -fsycl -fsycl-targets=%sycl_triple %s -DCHECK9 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK9 -// RUN: not %clangxx -fsycl -fsycl-targets=%sycl_triple %s -DCHECK10 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK10 -// RUN: not %clangxx -fsycl -fsycl-targets=%sycl_triple %s -DCHECK11 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK11 -// RUN: not %clangxx -fsycl -fsycl-targets=%sycl_triple %s -DCHECK12 -o %t.out 2>&1 | FileCheck -vv %s --check-prefixes=CHECK12 +// RUN: not %clangxx -fsycl -fsyntax-only %s -DCHECK1 2>&1 | FileCheck -vv %s --check-prefixes=CHECK1 +// RUN: not %clangxx -fsycl -fsyntax-only %s -DCHECK2 2>&1 | FileCheck -vv %s --check-prefixes=CHECK2 +// RUN: not %clangxx -fsycl -fsyntax-only %s -DCHECK3 2>&1 | FileCheck -vv %s --check-prefixes=CHECK3 +// RUN: not %clangxx -fsycl -fsyntax-only %s -DCHECK4 2>&1 | FileCheck -vv %s --check-prefixes=CHECK4 +// RUN: not %clangxx -fsycl -fsyntax-only %s -DCHECK5 2>&1 | FileCheck -vv %s --check-prefixes=CHECK5 +// RUN: not %clangxx -fsycl -fsyntax-only %s -DCHECK6 2>&1 | FileCheck -vv %s --check-prefixes=CHECK6 +// RUN: not %clangxx -fsycl -fsyntax-only %s -DCHECK7 2>&1 | FileCheck -vv %s --check-prefixes=CHECK7 +// RUN: not %clangxx -fsycl -fsyntax-only %s -DCHECK8 2>&1 | FileCheck -vv %s --check-prefixes=CHECK8 +// RUN: not %clangxx -fsycl -fsyntax-only %s -DCHECK9 2>&1 | FileCheck -vv %s --check-prefixes=CHECK9 +// RUN: not %clangxx -fsycl -fsyntax-only %s -DCHECK10 2>&1 | FileCheck -vv %s --check-prefixes=CHECK10 +// RUN: not %clangxx -fsycl -fsyntax-only %s -DCHECK11 2>&1 | FileCheck -vv %s --check-prefixes=CHECK11 +// RUN: not %clangxx -fsycl -fsyntax-only %s -DCHECK12 2>&1 | FileCheck -vv %s --check-prefixes=CHECK12 #include #include diff --git a/sycl/test/warnings/sycl_2020_deprecations.cpp b/sycl/test/warnings/sycl_2020_deprecations.cpp index c3f7d8a3450d3..4614707606830 100644 --- a/sycl/test/warnings/sycl_2020_deprecations.cpp +++ b/sycl/test/warnings/sycl_2020_deprecations.cpp @@ -287,8 +287,6 @@ int main() { [=](sycl::nd_item<1> Idx) { int PrivateVal = 0; - // expected-warning@+6{{'legacy' is deprecated: sycl::access::decorated::legacy is deprecated since SYCL 2020}} - // expected-warning@+8{{'legacy' is deprecated: sycl::access::decorated::legacy is deprecated since SYCL 2020}} // expected-warning@+8{{'get_pointer' is deprecated: accessor::get_pointer() is deprecated, please use get_multi_ptr()}} // expected-warning@+7{{'get_pointer' is deprecated: accessor::get_pointer() is deprecated, please use get_multi_ptr()}} // expected-warning@+4{{'make_ptr' is deprecated: make_ptr is deprecated since SYCL 2020. Please use address_space_cast instead.}} @@ -298,8 +296,6 @@ int main() { sycl::make_ptr( GlobalAcc.get_pointer()); - // expected-warning@+5{{'legacy' is deprecated: sycl::access::decorated::legacy is deprecated since SYCL 2020}} - // expected-warning@+7{{'legacy' is deprecated: sycl::access::decorated::legacy is deprecated since SYCL 2020}} // expected-warning@+7{{'get_pointer' is deprecated: local_accessor::get_pointer() is deprecated, please use get_multi_ptr()}} // expected-warning@+4{{'make_ptr' is deprecated: make_ptr is deprecated since SYCL 2020. Please use address_space_cast instead.}} sycl::multi_ptr( LocalAcc.get_pointer()); - // expected-warning@+4{{'legacy' is deprecated: sycl::access::decorated::legacy is deprecated since SYCL 2020}} - // expected-warning@+5{{'make_ptr' is deprecated: make_ptr is deprecated since SYCL 2020. Please use address_space_cast instead.}} - // expected-warning@+6{{'legacy' is deprecated: sycl::access::decorated::legacy is deprecated since SYCL 2020}} + // expected-warning@+4{{'make_ptr' is deprecated: make_ptr is deprecated since SYCL 2020. Please use address_space_cast instead.}} sycl::multi_ptr LegacyPrivateMptr = diff --git a/sycl/unittests/CMakeLists.txt b/sycl/unittests/CMakeLists.txt index 29d7b22d56391..8831426784de2 100644 --- a/sycl/unittests/CMakeLists.txt +++ b/sycl/unittests/CMakeLists.txt @@ -25,13 +25,7 @@ include(AddSYCLUnitTest) add_custom_target(check-sycl-unittests) -# TODO UR tests require real hardware and must be moved to sycl/test-e2e. -option(SYCL_UR_TESTS "Enable UR-specific unit tests" OFF) - -if (SYCL_UR_TESTS) - add_subdirectory(ur) -endif() - +add_subdirectory(ur) add_subdirectory(allowlist) add_subdirectory(config) add_subdirectory(misc) diff --git a/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp b/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp index 1542b5f34d7dc..90d95975a0245 100644 --- a/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp @@ -700,3 +700,166 @@ TEST_F(CommandGraphTest, RecordingWrongGraphDep) { }), sycl::exception); } + +// Error when a dynamic command-group is used with a graph belonging to a +// different graph. +TEST_F(CommandGraphTest, DynamicCommandGroupWrongGraph) { + experimental::command_graph Graph1{Queue.get_context(), Queue.get_device()}; + experimental::command_graph Graph2{Queue.get_context(), Queue.get_device()}; + auto CGF = [&](sycl::handler &CGH) { + CGH.single_task>([]() {}); + }; + + experimental::dynamic_command_group DynCG(Graph2, {CGF}); + ASSERT_THROW(Graph1.add(DynCG), sycl::exception); +} + +// Error when a non-kernel command-group is included in a dynamic command-group +TEST_F(CommandGraphTest, DynamicCommandGroupNotKernel) { + int *Ptr = malloc_device(1, Queue); + auto CGF = [&](sycl::handler &CGH) { CGH.memset(Ptr, 1, 0); }; + + experimental::command_graph Graph{Queue}; + ASSERT_THROW(experimental::dynamic_command_group DynCG(Graph, {CGF}), + sycl::exception); + sycl::free(Ptr, Queue); +} + +// Error if edges are not the same for all command-groups in dynamic command +// group, test using graph limited events to create edges +TEST_F(CommandGraphTest, DynamicCommandGroupMismatchEventEdges) { + size_t N = 32; + int *PtrA = malloc_device(N, Queue); + int *PtrB = malloc_device(N, Queue); + + experimental::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + Graph.begin_recording(Queue); + + auto EventA = Queue.submit([&](handler &CGH) { + CGH.parallel_for(N, [=](item<1> Item) { PtrA[Item.get_id()] = 1; }); + }); + + auto EventB = Queue.submit([&](handler &CGH) { + CGH.parallel_for(N, [=](item<1> Item) { PtrB[Item.get_id()] = 4; }); + }); + + Graph.end_recording(); + + auto CGFA = [&](handler &CGH) { + CGH.depends_on(EventA); + CGH.parallel_for(N, [=](item<1> Item) { PtrA[Item.get_id()] += 2; }); + }; + + auto CGFB = [&](handler &CGH) { + CGH.depends_on(EventB); + CGH.parallel_for(N, [=](item<1> Item) { PtrB[Item.get_id()] += 0xA; }); + }; + + experimental::dynamic_command_group DynCG(Graph, {CGFA, CGFB}); + ASSERT_THROW(Graph.add(DynCG), sycl::exception); + + sycl::free(PtrA, Queue); + sycl::free(PtrB, Queue); +} + +// Test that an exception is thrown when a graph isn't created with buffer +// property, but buffers are used. +TEST_F(CommandGraphTest, DynamicCommandGroupBufferThrows) { + size_t N = 32; + std::vector HostData(N, 0); + buffer Buf{HostData}; + Buf.set_write_back(false); + + experimental::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + auto CGFA = [&](handler &CGH) { + auto Acc = Buf.get_access(CGH); + CGH.parallel_for(N, [=](item<1> Item) { Acc[Item.get_id()] = 2; }); + }; + + auto CGFB = [&](handler &CGH) { + auto Acc = Buf.get_access(CGH); + CGH.parallel_for(N, [=](item<1> Item) { Acc[Item.get_id()] = 0xA; }); + }; + + experimental::dynamic_command_group DynCG(Graph, {CGFA, CGFB}); + ASSERT_THROW(Graph.add(DynCG), sycl::exception); +} + +// Test and exception is thrown when using a host-accessor to a buffer +// used in a non active CGF node in the graph. +TEST_F(CommandGraphTest, DynamicCommandGroupBufferHostAccThrows) { + size_t N = 32; + std::vector HostData(N, 0); + buffer Buf{HostData}; + Buf.set_write_back(false); + + int *Ptr = malloc_device(N, Queue); + + { + ext::oneapi::experimental::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {experimental::property::graph::assume_buffer_outlives_graph{}}}; + + auto CGFA = [&](handler &CGH) { + CGH.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] = 2; }); + }; + + auto CGFB = [&](handler &CGH) { + auto Acc = Buf.get_access(CGH); + CGH.parallel_for(N, [=](item<1> Item) { Acc[Item.get_id()] = 0xA; }); + }; + + experimental::dynamic_command_group DynCG(Graph, {CGFA, CGFB}); + ASSERT_NO_THROW(Graph.add(DynCG)); + + ASSERT_THROW({ host_accessor HostAcc{Buf}; }, sycl::exception); + } + + sycl::free(Ptr, Queue); +} + +// Error if edges are not the same for all command-groups in dynamic command +// group, test using accessors to create edges +TEST_F(CommandGraphTest, DynamicCommandGroupMismatchAccessorEdges) { + size_t N = 32; + std::vector HostData(N, 0); + buffer BufA{HostData}; + buffer BufB{HostData}; + BufA.set_write_back(false); + BufB.set_write_back(false); + + experimental::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {experimental::property::graph::assume_buffer_outlives_graph{}}}; + + Graph.begin_recording(Queue); + + Queue.submit([&](handler &CGH) { + auto AccA = BufA.get_access(CGH); + CGH.parallel_for(N, [=](item<1> Item) { AccA[Item.get_id()] = 1; }); + }); + + Queue.submit([&](handler &CGH) { + auto AccB = BufB.get_access(CGH); + CGH.parallel_for(N, [=](item<1> Item) { AccB[Item.get_id()] = 4; }); + }); + + Graph.end_recording(); + + auto CGFA = [&](handler &CGH) { + auto AccA = BufA.get_access(CGH); + CGH.parallel_for(N, [=](item<1> Item) { AccA[Item.get_id()] += 2; }); + }; + + auto CGFB = [&](handler &CGH) { + auto AccB = BufB.get_access(CGH); + CGH.parallel_for(N, [=](item<1> Item) { AccB[Item.get_id()] += 0xA; }); + }; + + experimental::dynamic_command_group DynCG(Graph, {CGFA, CGFB}); + ASSERT_THROW(Graph.add(DynCG), sycl::exception); +} diff --git a/sycl/unittests/Extensions/CommandGraph/Update.cpp b/sycl/unittests/Extensions/CommandGraph/Update.cpp index 94296507996dd..b943b9c43dd98 100644 --- a/sycl/unittests/Extensions/CommandGraph/Update.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Update.cpp @@ -136,7 +136,6 @@ TEST_F(CommandGraphTest, UpdateNodeTypeExceptions) { TEST_F(CommandGraphTest, UpdateRangeErrors) { // Test that the correct errors are throw when trying to update node ranges - nd_range<1> NDRange{range{128}, range{32}}; range<1> Range{128}; auto NodeNDRange = Graph.add([&](sycl::handler &cgh) { @@ -145,11 +144,12 @@ TEST_F(CommandGraphTest, UpdateRangeErrors) { // OK EXPECT_NO_THROW(NodeNDRange.update_nd_range(NDRange)); - // Can't update an nd_range node with a range - EXPECT_ANY_THROW(NodeNDRange.update_range(Range)); + // OK to update an nd_range node with a range of the same dimension + EXPECT_NO_THROW(NodeNDRange.update_range(Range)); // Can't update with a different number of dimensions EXPECT_ANY_THROW(NodeNDRange.update_nd_range( nd_range<2>{range<2>{128, 128}, range<2>{32, 32}})); + EXPECT_ANY_THROW(NodeNDRange.update_range(range<3>{32, 32, 1})); auto NodeRange = Graph.add([&](sycl::handler &cgh) { cgh.parallel_for>(range<1>{128}, [](item<1>) {}); @@ -157,10 +157,12 @@ TEST_F(CommandGraphTest, UpdateRangeErrors) { // OK EXPECT_NO_THROW(NodeRange.update_range(Range)); - // Can't update a range node with an nd_range - EXPECT_ANY_THROW(NodeRange.update_nd_range(NDRange)); + // OK to update a range node with an nd_range of the same dimension + EXPECT_NO_THROW(NodeRange.update_nd_range(NDRange)); // Can't update with a different number of dimensions EXPECT_ANY_THROW(NodeRange.update_range(range<2>{128, 128})); + EXPECT_ANY_THROW(NodeRange.update_nd_range( + nd_range<3>{range<3>{8, 8, 8}, range<3>{8, 8, 8}})); } class WholeGraphUpdateTest : public CommandGraphTest {