From 73ca5fa81ecd4eeee48318ac5d67c7e49d9f4e3a Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Mon, 11 Nov 2024 14:22:24 -0800 Subject: [PATCH 1/9] Update root group query Signed-off-by: Michael Aziz --- sycl/include/sycl/kernel.hpp | 26 +++++++ sycl/source/detail/kernel_impl.cpp | 32 -------- sycl/source/detail/kernel_impl.hpp | 117 +++++++++++++++++++++++++---- sycl/source/kernel.cpp | 23 +++++- 4 files changed, 150 insertions(+), 48 deletions(-) diff --git a/sycl/include/sycl/kernel.hpp b/sycl/include/sycl/kernel.hpp index dac7f619d745e..1864b61f0879a 100644 --- a/sycl/include/sycl/kernel.hpp +++ b/sycl/include/sycl/kernel.hpp @@ -170,6 +170,32 @@ class __SYCL_EXPORT kernel : public detail::OwnerLessBase { typename detail::is_kernel_queue_specific_info_desc::return_type ext_oneapi_get_info(queue Queue) const; + /// Query queue/launch-specific information from a kernel using the + /// info::kernel_queue_specific descriptor for a specific Queue and values. + /// max_num_work_groups is the only valid descriptor for this function. + /// + /// \param Queue is a valid SYCL queue. + /// \param WorkGroupSize is the work-group size the number of work-groups is + /// requested for. + /// \return depends on information being queried. + template + typename detail::is_kernel_queue_specific_info_desc::return_type + ext_oneapi_get_info(queue Queue, const range<1> &WorkGroupSize, + size_t DynamicLocalMemorySize) const; + + /// Query queue/launch-specific information from a kernel using the + /// info::kernel_queue_specific descriptor for a specific Queue and values. + /// max_num_work_groups is the only valid descriptor for this function. + /// + /// \param Queue is a valid SYCL queue. + /// \param WorkGroupSize is the work-group size the number of work-groups is + /// requested for. + /// \return depends on information being queried. + template + typename detail::is_kernel_queue_specific_info_desc::return_type + ext_oneapi_get_info(queue Queue, const range<2> &WorkGroupSize, + size_t DynamicLocalMemorySize) const; + /// Query queue/launch-specific information from a kernel using the /// info::kernel_queue_specific descriptor for a specific Queue and values. /// max_num_work_groups is the only valid descriptor for this function. diff --git a/sycl/source/detail/kernel_impl.cpp b/sycl/source/detail/kernel_impl.cpp index 986e78aa21530..f89ef979f7c9e 100644 --- a/sycl/source/detail/kernel_impl.cpp +++ b/sycl/source/detail/kernel_impl.cpp @@ -106,38 +106,6 @@ void kernel_impl::checkIfValidForNumArgsInfoQuery() const { "interoperability function or to query a device built-in kernel"); } -bool kernel_impl::exceedsOccupancyResourceLimits( - const device &Device, const range<3> &WorkGroupSize, - size_t DynamicLocalMemorySize) const { - // Respect occupancy limits for WorkGroupSize and DynamicLocalMemorySize. - // Generally, exceeding hardware resource limits will yield in an error when - // the kernel is launched. - const size_t MaxWorkGroupSize = - get_info(Device); - const size_t MaxLocalMemorySizeInBytes = - Device.get_info(); - - if (WorkGroupSize.size() > MaxWorkGroupSize) - return true; - - if (DynamicLocalMemorySize > MaxLocalMemorySizeInBytes) - return true; - - // It will be impossible to launch a kernel for Cuda when the hardware limit - // for the 32-bit registers page file size is exceeded. - if (Device.get_backend() == backend::ext_oneapi_cuda) { - const uint32_t RegsPerWorkItem = - get_info(Device); - const uint32_t MaxRegsPerWorkGroup = - Device.get_info(); - if ((MaxWorkGroupSize * RegsPerWorkItem) > MaxRegsPerWorkGroup) - return true; - } - - return false; -} - template <> typename info::platform::version::return_type kernel_impl::get_backend_info() const { diff --git a/sycl/source/detail/kernel_impl.hpp b/sycl/source/detail/kernel_impl.hpp index 1b71eb3e659ad..71d982aeca7c3 100644 --- a/sycl/source/detail/kernel_impl.hpp +++ b/sycl/source/detail/kernel_impl.hpp @@ -122,6 +122,32 @@ class kernel_impl { template typename Param::return_type ext_oneapi_get_info(queue Queue) const; + /// Query queue/launch-specific information from a kernel using the + /// info::kernel_queue_specific descriptor for a specific Queue and values. + /// max_num_work_groups is the only valid descriptor for this function. + /// + /// \param Queue is a valid SYCL queue. + /// \param WorkGroupSize is the work-group size the number of work-groups is + /// requested for. + /// \return depends on information being queried. + template + typename Param::return_type + ext_oneapi_get_info(queue Queue, const range<1> &MaxWorkGroupSize, + size_t DynamicLocalMemorySize) const; + + /// Query queue/launch-specific information from a kernel using the + /// info::kernel_queue_specific descriptor for a specific Queue and values. + /// max_num_work_groups is the only valid descriptor for this function. + /// + /// \param Queue is a valid SYCL queue. + /// \param WorkGroupSize is the work-group size the number of work-groups is + /// requested for. + /// \return depends on information being queried. + template + typename Param::return_type + ext_oneapi_get_info(queue Queue, const range<2> &MaxWorkGroupSize, + size_t DynamicLocalMemorySize) const; + /// Query queue/launch-specific information from a kernel using the /// info::kernel_queue_specific descriptor for a specific Queue and values. /// max_num_work_groups is the only valid descriptor for this function. @@ -192,11 +218,49 @@ class kernel_impl { /// Check if the occupancy limits are exceeded for the given kernel launch /// configuration. + template bool exceedsOccupancyResourceLimits(const device &Device, - const range<3> &WorkGroupSize, + const range &WorkGroupSize, size_t DynamicLocalMemorySize) const; + template + size_t queryMaxNumWorkGroups(queue Queue, + const range &WorkGroupSize, + size_t DynamicLocalMemorySize) const; }; +template +bool kernel_impl::exceedsOccupancyResourceLimits( + const device &Device, const range &WorkGroupSize, + size_t DynamicLocalMemorySize) const { + // Respect occupancy limits for WorkGroupSize and DynamicLocalMemorySize. + // Generally, exceeding hardware resource limits will yield in an error when + // the kernel is launched. + const size_t MaxWorkGroupSize = + get_info(Device); + const size_t MaxLocalMemorySizeInBytes = + Device.get_info(); + + if (WorkGroupSize.size() > MaxWorkGroupSize) + return true; + + if (DynamicLocalMemorySize > MaxLocalMemorySizeInBytes) + return true; + + // It will be impossible to launch a kernel for Cuda when the hardware limit + // for the 32-bit registers page file size is exceeded. + if (Device.get_backend() == backend::ext_oneapi_cuda) { + const uint32_t RegsPerWorkItem = + get_info(Device); + const uint32_t MaxRegsPerWorkGroup = + Device.get_info(); + if ((MaxWorkGroupSize * RegsPerWorkItem) > MaxRegsPerWorkGroup) + return true; + } + + return false; +} + template inline typename Param::return_type kernel_impl::get_info() const { static_assert(is_kernel_info_desc::value, @@ -243,13 +307,11 @@ kernel_impl::get_info(const device &Device, namespace syclex = ext::oneapi::experimental; -template <> -inline typename syclex::info::kernel_queue_specific::max_num_work_groups:: - return_type - kernel_impl::ext_oneapi_get_info< - syclex::info::kernel_queue_specific::max_num_work_groups>( - queue Queue, const range<3> &WorkGroupSize, - size_t DynamicLocalMemorySize) const { +template +size_t +kernel_impl::queryMaxNumWorkGroups(queue Queue, + const range &WorkGroupSize, + size_t DynamicLocalMemorySize) const { if (WorkGroupSize.size() == 0) throw exception(sycl::make_error_code(errc::invalid), "The launch work-group size cannot be zero."); @@ -258,10 +320,17 @@ inline typename syclex::info::kernel_queue_specific::max_num_work_groups:: const auto &Handle = getHandleRef(); auto Device = Queue.get_device(); + size_t WG[Dimensions]; + WG[0] = WorkGroupSize[0]; + if constexpr (Dimensions >= 2) + WG[1] = WorkGroupSize[1]; + if constexpr (Dimensions == 3) + WG[2] = WorkGroupSize[2]; + uint32_t GroupCount{0}; if (auto Result = Adapter->call_nocheck< UrApiKind::urKernelSuggestMaxCooperativeGroupCountExp>( - Handle, WorkGroupSize.size(), DynamicLocalMemorySize, &GroupCount); + Handle, Dimensions, WG, DynamicLocalMemorySize, &GroupCount); Result != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { // The feature is supported. Check for other errors and throw if any. Adapter->checkUrResult(Result); @@ -277,15 +346,33 @@ inline typename syclex::info::kernel_queue_specific::max_num_work_groups:: } template <> -inline typename syclex::info::kernel_queue_specific::max_num_work_group_sync:: +inline typename syclex::info::kernel_queue_specific::max_num_work_groups:: return_type kernel_impl::ext_oneapi_get_info< - syclex::info::kernel_queue_specific::max_num_work_group_sync>( + syclex::info::kernel_queue_specific::max_num_work_groups>( + queue Queue, const range<1> &WorkGroupSize, + size_t DynamicLocalMemorySize) const { + return queryMaxNumWorkGroups(Queue, WorkGroupSize, DynamicLocalMemorySize); +} + +template <> +inline typename syclex::info::kernel_queue_specific::max_num_work_groups:: + return_type + kernel_impl::ext_oneapi_get_info< + syclex::info::kernel_queue_specific::max_num_work_groups>( + queue Queue, const range<2> &WorkGroupSize, + size_t DynamicLocalMemorySize) const { + return queryMaxNumWorkGroups(Queue, WorkGroupSize, DynamicLocalMemorySize); +} + +template <> +inline typename syclex::info::kernel_queue_specific::max_num_work_groups:: + return_type + kernel_impl::ext_oneapi_get_info< + syclex::info::kernel_queue_specific::max_num_work_groups>( queue Queue, const range<3> &WorkGroupSize, size_t DynamicLocalMemorySize) const { - return ext_oneapi_get_info< - syclex::info::kernel_queue_specific::max_num_work_groups>( - Queue, WorkGroupSize, DynamicLocalMemorySize); + return queryMaxNumWorkGroups(Queue, WorkGroupSize, DynamicLocalMemorySize); } template <> @@ -299,7 +386,7 @@ inline typename syclex::info::kernel_queue_specific::max_num_work_group_sync:: get_info(Device); const sycl::range<3> WorkGroupSize{MaxWorkGroupSize, 1, 1}; return ext_oneapi_get_info< - syclex::info::kernel_queue_specific::max_num_work_group_sync>( + syclex::info::kernel_queue_specific::max_num_work_groups>( Queue, WorkGroupSize, /* DynamicLocalMemorySize */ 0); } diff --git a/sycl/source/kernel.cpp b/sycl/source/kernel.cpp index 634f22b09bafb..ceb036098552e 100644 --- a/sycl/source/kernel.cpp +++ b/sycl/source/kernel.cpp @@ -111,6 +111,22 @@ kernel::ext_oneapi_get_info(queue Queue) const { return impl->ext_oneapi_get_info(Queue); } +template +typename detail::is_kernel_queue_specific_info_desc::return_type +kernel::ext_oneapi_get_info(queue Queue, const range<1> &WorkGroupSize, + size_t DynamicLocalMemorySize) const { + return impl->ext_oneapi_get_info(Queue, WorkGroupSize, + DynamicLocalMemorySize); +} + +template +typename detail::is_kernel_queue_specific_info_desc::return_type +kernel::ext_oneapi_get_info(queue Queue, const range<2> &WorkGroupSize, + size_t DynamicLocalMemorySize) const { + return impl->ext_oneapi_get_info(Queue, WorkGroupSize, + DynamicLocalMemorySize); +} + template typename detail::is_kernel_queue_specific_info_desc::return_type kernel::ext_oneapi_get_info(queue Queue, const range<3> &WorkGroupSize, @@ -127,12 +143,17 @@ template __SYCL_EXPORT typename ext::oneapi::experimental::info:: #define __SYCL_PARAM_TRAITS_SPEC(Namespace, DescType, Desc, ReturnT) \ template __SYCL_EXPORT ReturnT \ + kernel::ext_oneapi_get_info( \ + queue, const range<1> &, size_t) const; \ + template __SYCL_EXPORT ReturnT \ + kernel::ext_oneapi_get_info( \ + queue, const range<2> &, size_t) const; \ + template __SYCL_EXPORT ReturnT \ kernel::ext_oneapi_get_info( \ queue, const range<3> &, size_t) const; // Not including "ext_oneapi_kernel_queue_specific_traits.def" because not all // kernel_queue_specific queries require the above-defined get_info interface. // clang-format off -__SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, kernel_queue_specific, max_num_work_group_sync, size_t) __SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, kernel_queue_specific, max_num_work_groups, size_t) // clang-format on #undef __SYCL_PARAM_TRAITS_SPEC From aed4951c3185129621c659aebf125545750e7b33 Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Thu, 21 Nov 2024 07:37:42 -0800 Subject: [PATCH 2/9] Update UR tag Signed-off-by: Michael Aziz --- sycl/cmake/modules/FetchUnifiedRuntime.cmake | 2 +- sycl/cmake/modules/UnifiedRuntimeTag.cmake | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/cmake/modules/FetchUnifiedRuntime.cmake b/sycl/cmake/modules/FetchUnifiedRuntime.cmake index 72841724fa01d..5f8caf7f43c6d 100644 --- a/sycl/cmake/modules/FetchUnifiedRuntime.cmake +++ b/sycl/cmake/modules/FetchUnifiedRuntime.cmake @@ -116,7 +116,7 @@ if(SYCL_UR_USE_FETCH_CONTENT) CACHE PATH "Path to external '${name}' adapter source dir" FORCE) endfunction() - set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") + set(UNIFIED_RUNTIME_REPO "https://github.com/0x12CC/unified-runtime.git") include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/UnifiedRuntimeTag.cmake) set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES") diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index 060f402ee7c35..8af26fb7c8b83 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -4,4 +4,4 @@ # Date: Mon Oct 28 16:29:45 2024 +0100 # Merge pull request #2242 from nrspruit/sysman_env_disable # [L0] Enable Sysman Thru Env by default and have zesInit be optional -set(UNIFIED_RUNTIME_TAG dbd168cbed2d2590b47904728cd5762f1c2f4c6b) +set(UNIFIED_RUNTIME_TAG f99adf104ef5ce94d66b6ed4f9dccc450f1f638c) From dd5f9187b235ea995ea8e510e3e88b2329dfd904 Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Fri, 22 Nov 2024 13:19:34 -0800 Subject: [PATCH 3/9] Fix test failures Signed-off-by: Michael Aziz --- .../ext_oneapi_kernel_queue_specific_traits.def | 3 --- sycl/source/detail/kernel_impl.hpp | 15 --------------- sycl/source/kernel.cpp | 6 ------ sycl/test-e2e/GroupAlgorithm/root_group.cpp | 11 ++++++----- sycl/test/abi/sycl_symbols_linux.dump | 4 ++-- sycl/test/abi/sycl_symbols_windows.dump | 2 -- 6 files changed, 8 insertions(+), 33 deletions(-) diff --git a/sycl/include/sycl/info/ext_oneapi_kernel_queue_specific_traits.def b/sycl/include/sycl/info/ext_oneapi_kernel_queue_specific_traits.def index 0ec11af0bb6b1..f1c92ccd1bea1 100644 --- a/sycl/include/sycl/info/ext_oneapi_kernel_queue_specific_traits.def +++ b/sycl/include/sycl/info/ext_oneapi_kernel_queue_specific_traits.def @@ -1,4 +1 @@ -// TODO: Revisit 'max_num_work_group_sync' and align it with the -// 'sycl_ext_oneapi_forward_progress' extension once #7598 is merged. -__SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, kernel_queue_specific, max_num_work_group_sync, size_t,) __SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, kernel_queue_specific, max_num_work_groups, size_t,) diff --git a/sycl/source/detail/kernel_impl.hpp b/sycl/source/detail/kernel_impl.hpp index a4afc19e3a328..80cbd9b8689b4 100644 --- a/sycl/source/detail/kernel_impl.hpp +++ b/sycl/source/detail/kernel_impl.hpp @@ -376,21 +376,6 @@ inline typename syclex::info::kernel_queue_specific::max_num_work_groups:: return queryMaxNumWorkGroups(Queue, WorkGroupSize, DynamicLocalMemorySize); } -template <> -inline typename syclex::info::kernel_queue_specific::max_num_work_group_sync:: - return_type - kernel_impl::ext_oneapi_get_info< - syclex::info::kernel_queue_specific::max_num_work_group_sync>( - queue Queue) const { - auto Device = Queue.get_device(); - const auto MaxWorkGroupSize = - get_info(Device); - const sycl::range<3> WorkGroupSize{MaxWorkGroupSize, 1, 1}; - return ext_oneapi_get_info< - syclex::info::kernel_queue_specific::max_num_work_groups>( - Queue, WorkGroupSize, /* DynamicLocalMemorySize */ 0); -} - } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/kernel.cpp b/sycl/source/kernel.cpp index ceb036098552e..2b61c9dd22d05 100644 --- a/sycl/source/kernel.cpp +++ b/sycl/source/kernel.cpp @@ -135,12 +135,6 @@ kernel::ext_oneapi_get_info(queue Queue, const range<3> &WorkGroupSize, DynamicLocalMemorySize); } -template __SYCL_EXPORT typename ext::oneapi::experimental::info:: - kernel_queue_specific::max_num_work_group_sync::return_type - kernel::ext_oneapi_get_info< - ext::oneapi::experimental::info::kernel_queue_specific:: - max_num_work_group_sync>(queue Queue) const; - #define __SYCL_PARAM_TRAITS_SPEC(Namespace, DescType, Desc, ReturnT) \ template __SYCL_EXPORT ReturnT \ kernel::ext_oneapi_get_info( \ diff --git a/sycl/test-e2e/GroupAlgorithm/root_group.cpp b/sycl/test-e2e/GroupAlgorithm/root_group.cpp index 92e5d69ffcab4..9346e2f46eb4f 100644 --- a/sycl/test-e2e/GroupAlgorithm/root_group.cpp +++ b/sycl/test-e2e/GroupAlgorithm/root_group.cpp @@ -27,13 +27,14 @@ void testQueriesAndProperties() { const auto bundle = sycl::get_kernel_bundle(q.get_context()); const auto kernel = bundle.get_kernel(); + const auto local_range = sycl::range<1>(1); const auto maxWGs = kernel.ext_oneapi_get_info< sycl::ext::oneapi::experimental::info::kernel_queue_specific:: - max_num_work_group_sync>(q); - const auto wgRange = sycl::range{WorkGroupSize, 1, 1}; + max_num_work_groups>(q, local_range, 0); + const auto wgRange = sycl::range<3>{WorkGroupSize, 1, 1}; const auto maxWGsWithLimits = kernel.ext_oneapi_get_info< sycl::ext::oneapi::experimental::info::kernel_queue_specific:: - max_num_work_group_sync>(q, wgRange, wgRange.size() * sizeof(int)); + max_num_work_groups>(q, wgRange, wgRange.size() * sizeof(int)); const auto props = sycl::ext::oneapi::experimental::properties{ sycl::ext::oneapi::experimental::use_root_sync}; q.single_task(props, []() {}); @@ -54,7 +55,7 @@ void testRootGroup() { const auto kernel = bundle.get_kernel(); const auto maxWGs = kernel.ext_oneapi_get_info< sycl::ext::oneapi::experimental::info::kernel_queue_specific:: - max_num_work_group_sync>(q); + max_num_work_groups>(q, WorkGroupSize, 0); const auto props = sycl::ext::oneapi::experimental::properties{ sycl::ext::oneapi::experimental::use_root_sync}; sycl::buffer dataBuf{sycl::range{maxWGs * WorkGroupSize}}; @@ -96,7 +97,7 @@ void testRootGroupFunctions() { const auto kernel = bundle.get_kernel(); const auto maxWGs = kernel.ext_oneapi_get_info< sycl::ext::oneapi::experimental::info::kernel_queue_specific:: - max_num_work_group_sync>(q); + max_num_work_groups>(q, WorkGroupSize, 0); const auto props = sycl::ext::oneapi::experimental::properties{ sycl::ext::oneapi::experimental::use_root_sync}; diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index a5134a7a524ca..5b449842e8699 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3939,9 +3939,9 @@ _ZNK4sycl3_V16kernel16get_backend_infoINS0_4info6device15backend_versionEEENS0_6 _ZNK4sycl3_V16kernel16get_backend_infoINS0_4info6device7versionEEENS0_6detail20is_backend_info_descIT_E11return_typeEv _ZNK4sycl3_V16kernel16get_backend_infoINS0_4info8platform7versionEEENS0_6detail20is_backend_info_descIT_E11return_typeEv _ZNK4sycl3_V16kernel17get_kernel_bundleEv +_ZNK4sycl3_V16kernel19ext_oneapi_get_infoINS0_3ext6oneapi12experimental4info21kernel_queue_specific19max_num_work_groupsEEENS0_6detail34is_kernel_queue_specific_info_descIT_E11return_typeENS0_5queueERKNS0_5rangeILi1EEEm +_ZNK4sycl3_V16kernel19ext_oneapi_get_infoINS0_3ext6oneapi12experimental4info21kernel_queue_specific19max_num_work_groupsEEENS0_6detail34is_kernel_queue_specific_info_descIT_E11return_typeENS0_5queueERKNS0_5rangeILi2EEEm _ZNK4sycl3_V16kernel19ext_oneapi_get_infoINS0_3ext6oneapi12experimental4info21kernel_queue_specific19max_num_work_groupsEEENS0_6detail34is_kernel_queue_specific_info_descIT_E11return_typeENS0_5queueERKNS0_5rangeILi3EEEm -_ZNK4sycl3_V16kernel19ext_oneapi_get_infoINS0_3ext6oneapi12experimental4info21kernel_queue_specific23max_num_work_group_syncEEENS0_6detail34is_kernel_queue_specific_info_descIT_E11return_typeENS0_5queueE -_ZNK4sycl3_V16kernel19ext_oneapi_get_infoINS0_3ext6oneapi12experimental4info21kernel_queue_specific23max_num_work_group_syncEEENS0_6detail34is_kernel_queue_specific_info_descIT_E11return_typeENS0_5queueERKNS0_5rangeILi3EEEm _ZNK4sycl3_V16kernel3getEv _ZNK4sycl3_V16kernel8get_infoINS0_4info22kernel_device_specific15work_group_sizeEEENS0_6detail35is_kernel_device_specific_info_descIT_E11return_typeERKNS0_6deviceE _ZNK4sycl3_V16kernel8get_infoINS0_4info22kernel_device_specific16global_work_sizeEEENS0_6detail35is_kernel_device_specific_info_descIT_E11return_typeERKNS0_6deviceE diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index a6e6a5e47c137..d6550fbd3a52b 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -13,8 +13,6 @@ ??$create_sub_devices@$0BAIH@@device@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@AEBV?$vector@_KV?$allocator@_K@std@@@4@@Z ??$create_sub_devices@$0BAII@@device@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@W4partition_affinity_domain@info@12@@Z ??$create_sub_devices@$0BAIJ@@device@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@XZ -??$ext_oneapi_get_info@Umax_num_work_group_sync@kernel_queue_specific@info@experimental@oneapi@ext@_V1@sycl@@@kernel@_V1@sycl@@QEBA_KVqueue@12@@Z -??$ext_oneapi_get_info@Umax_num_work_group_sync@kernel_queue_specific@info@experimental@oneapi@ext@_V1@sycl@@@kernel@_V1@sycl@@QEBA_KVqueue@12@AEBV?$range@$02@12@_K@Z ??$ext_oneapi_get_info@Umax_num_work_groups@kernel_queue_specific@info@experimental@oneapi@ext@_V1@sycl@@@kernel@_V1@sycl@@QEBA_KVqueue@12@AEBV?$range@$02@12@_K@Z ??$get_backend_info@Ubackend_version@device@info@_V1@sycl@@@context@_V1@sycl@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ ??$get_backend_info@Ubackend_version@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ From bf2037173924be92eeb5925c00f271bb27611d8d Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Mon, 25 Nov 2024 09:25:35 -0800 Subject: [PATCH 4/9] Run clang-format on test source Signed-off-by: Michael Aziz --- sycl/test-e2e/GroupAlgorithm/root_group.cpp | 32 +++++++++++++-------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/sycl/test-e2e/GroupAlgorithm/root_group.cpp b/sycl/test-e2e/GroupAlgorithm/root_group.cpp index 9346e2f46eb4f..369057d63adac 100644 --- a/sycl/test-e2e/GroupAlgorithm/root_group.cpp +++ b/sycl/test-e2e/GroupAlgorithm/root_group.cpp @@ -28,13 +28,17 @@ void testQueriesAndProperties() { sycl::get_kernel_bundle(q.get_context()); const auto kernel = bundle.get_kernel(); const auto local_range = sycl::range<1>(1); - const auto maxWGs = kernel.ext_oneapi_get_info< - sycl::ext::oneapi::experimental::info::kernel_queue_specific:: - max_num_work_groups>(q, local_range, 0); + const auto maxWGs = + kernel + .ext_oneapi_get_info( + q, local_range, 0); const auto wgRange = sycl::range<3>{WorkGroupSize, 1, 1}; - const auto maxWGsWithLimits = kernel.ext_oneapi_get_info< - sycl::ext::oneapi::experimental::info::kernel_queue_specific:: - max_num_work_groups>(q, wgRange, wgRange.size() * sizeof(int)); + const auto maxWGsWithLimits = + kernel + .ext_oneapi_get_info( + q, wgRange, wgRange.size() * sizeof(int)); const auto props = sycl::ext::oneapi::experimental::properties{ sycl::ext::oneapi::experimental::use_root_sync}; q.single_task(props, []() {}); @@ -53,9 +57,11 @@ void testRootGroup() { const auto bundle = sycl::get_kernel_bundle(q.get_context()); const auto kernel = bundle.get_kernel(); - const auto maxWGs = kernel.ext_oneapi_get_info< - sycl::ext::oneapi::experimental::info::kernel_queue_specific:: - max_num_work_groups>(q, WorkGroupSize, 0); + const auto maxWGs = + kernel + .ext_oneapi_get_info( + q, WorkGroupSize, 0); const auto props = sycl::ext::oneapi::experimental::properties{ sycl::ext::oneapi::experimental::use_root_sync}; sycl::buffer dataBuf{sycl::range{maxWGs * WorkGroupSize}}; @@ -95,9 +101,11 @@ void testRootGroupFunctions() { const auto bundle = sycl::get_kernel_bundle(q.get_context()); const auto kernel = bundle.get_kernel(); - const auto maxWGs = kernel.ext_oneapi_get_info< - sycl::ext::oneapi::experimental::info::kernel_queue_specific:: - max_num_work_groups>(q, WorkGroupSize, 0); + const auto maxWGs = + kernel + .ext_oneapi_get_info( + q, WorkGroupSize, 0); const auto props = sycl::ext::oneapi::experimental::properties{ sycl::ext::oneapi::experimental::use_root_sync}; From 58eee69809795a8f39a66fe9a0fe6d1c2f83bc7f Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Mon, 25 Nov 2024 10:16:23 -0800 Subject: [PATCH 5/9] Add Windows symbols Signed-off-by: Michael Aziz --- sycl/test/abi/sycl_symbols_windows.dump | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index d6550fbd3a52b..4d59de4ea82ac 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -13,6 +13,8 @@ ??$create_sub_devices@$0BAIH@@device@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@AEBV?$vector@_KV?$allocator@_K@std@@@4@@Z ??$create_sub_devices@$0BAII@@device@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@W4partition_affinity_domain@info@12@@Z ??$create_sub_devices@$0BAIJ@@device@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@XZ +??$ext_oneapi_get_info@Umax_num_work_groups@kernel_queue_specific@info@experimental@oneapi@ext@_V1@sycl@@@kernel@_V1@sycl@@QEBA_KVqueue@12@AEBV?$range@$00@12@_K@Z +??$ext_oneapi_get_info@Umax_num_work_groups@kernel_queue_specific@info@experimental@oneapi@ext@_V1@sycl@@@kernel@_V1@sycl@@QEBA_KVqueue@12@AEBV?$range@$01@12@_K@Z ??$ext_oneapi_get_info@Umax_num_work_groups@kernel_queue_specific@info@experimental@oneapi@ext@_V1@sycl@@@kernel@_V1@sycl@@QEBA_KVqueue@12@AEBV?$range@$02@12@_K@Z ??$get_backend_info@Ubackend_version@device@info@_V1@sycl@@@context@_V1@sycl@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ ??$get_backend_info@Ubackend_version@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ From c5798709fb1d3620d1c6547ccb2e697ea3405aa4 Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Mon, 25 Nov 2024 18:32:43 -0800 Subject: [PATCH 6/9] Empty commit Signed-off-by: Michael Aziz From 2801e1e8f687afc11ea6d15604ba4482779029e1 Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Tue, 26 Nov 2024 13:39:38 -0800 Subject: [PATCH 7/9] Fix launch query for invalid group sizes Signed-off-by: Michael Aziz --- sycl/source/detail/kernel_impl.hpp | 6 ++++-- .../Basic/launch_queries/max_num_work_groups.cpp | 12 ++++-------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/sycl/source/detail/kernel_impl.hpp b/sycl/source/detail/kernel_impl.hpp index 80cbd9b8689b4..59429f9bf302a 100644 --- a/sycl/source/detail/kernel_impl.hpp +++ b/sycl/source/detail/kernel_impl.hpp @@ -332,8 +332,10 @@ kernel_impl::queryMaxNumWorkGroups(queue Queue, if (auto Result = Adapter->call_nocheck< UrApiKind::urKernelSuggestMaxCooperativeGroupCountExp>( Handle, Dimensions, WG, DynamicLocalMemorySize, &GroupCount); - Result != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { - // The feature is supported. Check for other errors and throw if any. + Result != UR_RESULT_ERROR_UNSUPPORTED_FEATURE && + Result != UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE) { + // The feature is supported and the group size is valid. Check for other + // errors and throw if any. Adapter->checkUrResult(Result); return GroupCount; } diff --git a/sycl/test-e2e/Basic/launch_queries/max_num_work_groups.cpp b/sycl/test-e2e/Basic/launch_queries/max_num_work_groups.cpp index fe8246bb2f56d..06a8a52b56cf0 100644 --- a/sycl/test-e2e/Basic/launch_queries/max_num_work_groups.cpp +++ b/sycl/test-e2e/Basic/launch_queries/max_num_work_groups.cpp @@ -161,14 +161,10 @@ int test_max_num_work_groups(sycl::queue &q, const sycl::device &dev) { // It cannot be possible to launch a kernel successfully with a configuration // that exceeds the available resources as in the above defined workGroupSize. // workGroupSize is larger than maxWorkGroupSize, hence maxWGs must equal 0. - // Note: Level-Zero currently always returns a non-zero value. - // TODO: Remove the backend condition once the Level-Zero API issue is fixed. - if (dev.get_backend() != sycl::backend::ext_oneapi_level_zero) { - assert(maxWGs == 0 && - "max_num_work_groups query failed.\n" - "It should return 0 possible groups when the requested resources " - "by the lanuch config exceed those available in the hardware."); - } + assert(maxWGs == 0 && + "max_num_work_groups query failed.\n" + "It should return 0 possible groups when the requested resources " + "by the lanuch config exceed those available in the hardware."); // As we ensured that the 'max_num_work_groups' query correctly // returns 0 possible work-groups, test that the kernel launch will fail. From c559a6688c40a7fe5da7fcbecc2e79eaf4db61e7 Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Tue, 26 Nov 2024 13:55:21 -0800 Subject: [PATCH 8/9] Update UR tag Signed-off-by: Michael Aziz --- sycl/cmake/modules/UnifiedRuntimeTag.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index 5ddaea33cda91..dc76d2cbfa8b3 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -4,4 +4,4 @@ # Date: Mon Nov 25 11:04:37 2024 +0000 # Merge pull request #2303 from nrspruit/zeInitDrivers # [L0] Implement Support for zeInitDrivers -set(UNIFIED_RUNTIME_TAG 39b72622cf2997459788000ac665e259f728a9ff) +set(UNIFIED_RUNTIME_TAG ca0a0e3e3b3ad933605a35850ebc11bbf5d5f367) From 90c2fa8600df0a3c0bb05dc1b419ae7502ac0bda Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Tue, 26 Nov 2024 14:32:05 -0800 Subject: [PATCH 9/9] Empty commit