From 73ca5fa81ecd4eeee48318ac5d67c7e49d9f4e3a Mon Sep 17 00:00:00 2001
From: Michael Aziz <michael.aziz@intel.com>
Date: Mon, 11 Nov 2024 14:22:24 -0800
Subject: [PATCH 1/9] Update root group query

Signed-off-by: Michael Aziz <michael.aziz@intel.com>
---
 sycl/include/sycl/kernel.hpp       |  26 +++++++
 sycl/source/detail/kernel_impl.cpp |  32 --------
 sycl/source/detail/kernel_impl.hpp | 117 +++++++++++++++++++++++++----
 sycl/source/kernel.cpp             |  23 +++++-
 4 files changed, 150 insertions(+), 48 deletions(-)
diff --git a/sycl/include/sycl/kernel.hpp b/sycl/include/sycl/kernel.hpp
index dac7f619d745e..1864b61f0879a 100644
--- a/sycl/include/sycl/kernel.hpp
+++ b/sycl/include/sycl/kernel.hpp
@@ -170,6 +170,32 @@ class __SYCL_EXPORT kernel : public detail::OwnerLessBase<kernel> {
   typename detail::is_kernel_queue_specific_info_desc<Param>::return_type
   ext_oneapi_get_info(queue Queue) const;
 
+  /// Query queue/launch-specific information from a kernel using the
+  /// info::kernel_queue_specific descriptor for a specific Queue and values.
+  /// max_num_work_groups is the only valid descriptor for this function.
+  ///
+  /// \param Queue is a valid SYCL queue.
+  /// \param WorkGroupSize is the work-group size the number of work-groups is
+  /// requested for.
+  /// \return depends on information being queried.
+  template <typename Param>
+  typename detail::is_kernel_queue_specific_info_desc<Param>::return_type
+  ext_oneapi_get_info(queue Queue, const range<1> &WorkGroupSize,
+                      size_t DynamicLocalMemorySize) const;
+
+  /// Query queue/launch-specific information from a kernel using the
+  /// info::kernel_queue_specific descriptor for a specific Queue and values.
+  /// max_num_work_groups is the only valid descriptor for this function.
+  ///
+  /// \param Queue is a valid SYCL queue.
+  /// \param WorkGroupSize is the work-group size the number of work-groups is
+  /// requested for.
+  /// \return depends on information being queried.
+  template <typename Param>
+  typename detail::is_kernel_queue_specific_info_desc<Param>::return_type
+  ext_oneapi_get_info(queue Queue, const range<2> &WorkGroupSize,
+                      size_t DynamicLocalMemorySize) const;
+
   /// Query queue/launch-specific information from a kernel using the
   /// info::kernel_queue_specific descriptor for a specific Queue and values.
   /// max_num_work_groups is the only valid descriptor for this function.
diff --git a/sycl/source/detail/kernel_impl.cpp b/sycl/source/detail/kernel_impl.cpp
index 986e78aa21530..f89ef979f7c9e 100644
--- a/sycl/source/detail/kernel_impl.cpp
+++ b/sycl/source/detail/kernel_impl.cpp
@@ -106,38 +106,6 @@ void kernel_impl::checkIfValidForNumArgsInfoQuery() const {
       "interoperability function or to query a device built-in kernel");
 }
 
-bool kernel_impl::exceedsOccupancyResourceLimits(
-    const device &Device, const range<3> &WorkGroupSize,
-    size_t DynamicLocalMemorySize) const {
-  // Respect occupancy limits for WorkGroupSize and DynamicLocalMemorySize.
-  // Generally, exceeding hardware resource limits will yield in an error when
-  // the kernel is launched.
-  const size_t MaxWorkGroupSize =
-      get_info<info::kernel_device_specific::work_group_size>(Device);
-  const size_t MaxLocalMemorySizeInBytes =
-      Device.get_info<info::device::local_mem_size>();
-
-  if (WorkGroupSize.size() > MaxWorkGroupSize)
-    return true;
-
-  if (DynamicLocalMemorySize > MaxLocalMemorySizeInBytes)
-    return true;
-
-  // It will be impossible to launch a kernel for Cuda when the hardware limit
-  // for the 32-bit registers page file size is exceeded.
-  if (Device.get_backend() == backend::ext_oneapi_cuda) {
-    const uint32_t RegsPerWorkItem =
-        get_info<info::kernel_device_specific::ext_codeplay_num_regs>(Device);
-    const uint32_t MaxRegsPerWorkGroup =
-        Device.get_info<ext::codeplay::experimental::info::device::
-                            max_registers_per_work_group>();
-    if ((MaxWorkGroupSize * RegsPerWorkItem) > MaxRegsPerWorkGroup)
-      return true;
-  }
-
-  return false;
-}
-
 template <>
 typename info::platform::version::return_type
 kernel_impl::get_backend_info<info::platform::version>() const {
diff --git a/sycl/source/detail/kernel_impl.hpp b/sycl/source/detail/kernel_impl.hpp
index 1b71eb3e659ad..71d982aeca7c3 100644
--- a/sycl/source/detail/kernel_impl.hpp
+++ b/sycl/source/detail/kernel_impl.hpp
@@ -122,6 +122,32 @@ class kernel_impl {
   template <typename Param>
   typename Param::return_type ext_oneapi_get_info(queue Queue) const;
 
+  /// Query queue/launch-specific information from a kernel using the
+  /// info::kernel_queue_specific descriptor for a specific Queue and values.
+  /// max_num_work_groups is the only valid descriptor for this function.
+  ///
+  /// \param Queue is a valid SYCL queue.
+  /// \param WorkGroupSize is the work-group size the number of work-groups is
+  /// requested for.
+  /// \return depends on information being queried.
+  template <typename Param>
+  typename Param::return_type
+  ext_oneapi_get_info(queue Queue, const range<1> &MaxWorkGroupSize,
+                      size_t DynamicLocalMemorySize) const;
+
+  /// Query queue/launch-specific information from a kernel using the
+  /// info::kernel_queue_specific descriptor for a specific Queue and values.
+  /// max_num_work_groups is the only valid descriptor for this function.
+  ///
+  /// \param Queue is a valid SYCL queue.
+  /// \param WorkGroupSize is the work-group size the number of work-groups is
+  /// requested for.
+  /// \return depends on information being queried.
+  template <typename Param>
+  typename Param::return_type
+  ext_oneapi_get_info(queue Queue, const range<2> &MaxWorkGroupSize,
+                      size_t DynamicLocalMemorySize) const;
+
   /// Query queue/launch-specific information from a kernel using the
   /// info::kernel_queue_specific descriptor for a specific Queue and values.
   /// max_num_work_groups is the only valid descriptor for this function.
@@ -192,11 +218,49 @@ class kernel_impl {
 
   /// Check if the occupancy limits are exceeded for the given kernel launch
   /// configuration.
+  template <int Dimensions>
   bool exceedsOccupancyResourceLimits(const device &Device,
-                                      const range<3> &WorkGroupSize,
+                                      const range<Dimensions> &WorkGroupSize,
                                       size_t DynamicLocalMemorySize) const;
+  template <int Dimensions>
+  size_t queryMaxNumWorkGroups(queue Queue,
+                               const range<Dimensions> &WorkGroupSize,
+                               size_t DynamicLocalMemorySize) const;
 };
 
+template <int Dimensions>
+bool kernel_impl::exceedsOccupancyResourceLimits(
+    const device &Device, const range<Dimensions> &WorkGroupSize,
+    size_t DynamicLocalMemorySize) const {
+  // Respect occupancy limits for WorkGroupSize and DynamicLocalMemorySize.
+  // Generally, exceeding hardware resource limits will yield in an error when
+  // the kernel is launched.
+  const size_t MaxWorkGroupSize =
+      get_info<info::kernel_device_specific::work_group_size>(Device);
+  const size_t MaxLocalMemorySizeInBytes =
+      Device.get_info<info::device::local_mem_size>();
+
+  if (WorkGroupSize.size() > MaxWorkGroupSize)
+    return true;
+
+  if (DynamicLocalMemorySize > MaxLocalMemorySizeInBytes)
+    return true;
+
+  // It will be impossible to launch a kernel for Cuda when the hardware limit
+  // for the 32-bit registers page file size is exceeded.
+  if (Device.get_backend() == backend::ext_oneapi_cuda) {
+    const uint32_t RegsPerWorkItem =
+        get_info<info::kernel_device_specific::ext_codeplay_num_regs>(Device);
+    const uint32_t MaxRegsPerWorkGroup =
+        Device.get_info<ext::codeplay::experimental::info::device::
+                            max_registers_per_work_group>();
+    if ((MaxWorkGroupSize * RegsPerWorkItem) > MaxRegsPerWorkGroup)
+      return true;
+  }
+
+  return false;
+}
+
 template <typename Param>
 inline typename Param::return_type kernel_impl::get_info() const {
   static_assert(is_kernel_info_desc<Param>::value,
@@ -243,13 +307,11 @@ kernel_impl::get_info(const device &Device,
 
 namespace syclex = ext::oneapi::experimental;
 
-template <>
-inline typename syclex::info::kernel_queue_specific::max_num_work_groups::
-    return_type
-    kernel_impl::ext_oneapi_get_info<
-        syclex::info::kernel_queue_specific::max_num_work_groups>(
-        queue Queue, const range<3> &WorkGroupSize,
-        size_t DynamicLocalMemorySize) const {
+template <int Dimensions>
+size_t
+kernel_impl::queryMaxNumWorkGroups(queue Queue,
+                                   const range<Dimensions> &WorkGroupSize,
+                                   size_t DynamicLocalMemorySize) const {
   if (WorkGroupSize.size() == 0)
     throw exception(sycl::make_error_code(errc::invalid),
                     "The launch work-group size cannot be zero.");
@@ -258,10 +320,17 @@ inline typename syclex::info::kernel_queue_specific::max_num_work_groups::
   const auto &Handle = getHandleRef();
   auto Device = Queue.get_device();
 
+  size_t WG[Dimensions];
+  WG[0] = WorkGroupSize[0];
+  if constexpr (Dimensions >= 2)
+    WG[1] = WorkGroupSize[1];
+  if constexpr (Dimensions == 3)
+    WG[2] = WorkGroupSize[2];
+
   uint32_t GroupCount{0};
   if (auto Result = Adapter->call_nocheck<
                     UrApiKind::urKernelSuggestMaxCooperativeGroupCountExp>(
-          Handle, WorkGroupSize.size(), DynamicLocalMemorySize, &GroupCount);
+          Handle, Dimensions, WG, DynamicLocalMemorySize, &GroupCount);
       Result != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) {
     // The feature is supported. Check for other errors and throw if any.
     Adapter->checkUrResult(Result);
@@ -277,15 +346,33 @@ inline typename syclex::info::kernel_queue_specific::max_num_work_groups::
 }
 
 template <>
-inline typename syclex::info::kernel_queue_specific::max_num_work_group_sync::
+inline typename syclex::info::kernel_queue_specific::max_num_work_groups::
     return_type
     kernel_impl::ext_oneapi_get_info<
-        syclex::info::kernel_queue_specific::max_num_work_group_sync>(
+        syclex::info::kernel_queue_specific::max_num_work_groups>(
+        queue Queue, const range<1> &WorkGroupSize,
+        size_t DynamicLocalMemorySize) const {
+  return queryMaxNumWorkGroups(Queue, WorkGroupSize, DynamicLocalMemorySize);
+}
+
+template <>
+inline typename syclex::info::kernel_queue_specific::max_num_work_groups::
+    return_type
+    kernel_impl::ext_oneapi_get_info<
+        syclex::info::kernel_queue_specific::max_num_work_groups>(
+        queue Queue, const range<2> &WorkGroupSize,
+        size_t DynamicLocalMemorySize) const {
+  return queryMaxNumWorkGroups(Queue, WorkGroupSize, DynamicLocalMemorySize);
+}
+
+template <>
+inline typename syclex::info::kernel_queue_specific::max_num_work_groups::
+    return_type
+    kernel_impl::ext_oneapi_get_info<
+        syclex::info::kernel_queue_specific::max_num_work_groups>(
         queue Queue, const range<3> &WorkGroupSize,
         size_t DynamicLocalMemorySize) const {
-  return ext_oneapi_get_info<
-      syclex::info::kernel_queue_specific::max_num_work_groups>(
-      Queue, WorkGroupSize, DynamicLocalMemorySize);
+  return queryMaxNumWorkGroups(Queue, WorkGroupSize, DynamicLocalMemorySize);
 }
 
 template <>
@@ -299,7 +386,7 @@ inline typename syclex::info::kernel_queue_specific::max_num_work_group_sync::
       get_info<info::kernel_device_specific::work_group_size>(Device);
   const sycl::range<3> WorkGroupSize{MaxWorkGroupSize, 1, 1};
   return ext_oneapi_get_info<
-      syclex::info::kernel_queue_specific::max_num_work_group_sync>(
+      syclex::info::kernel_queue_specific::max_num_work_groups>(
       Queue, WorkGroupSize, /* DynamicLocalMemorySize */ 0);
 }
 
diff --git a/sycl/source/kernel.cpp b/sycl/source/kernel.cpp
index 634f22b09bafb..ceb036098552e 100644
--- a/sycl/source/kernel.cpp
+++ b/sycl/source/kernel.cpp
@@ -111,6 +111,22 @@ kernel::ext_oneapi_get_info(queue Queue) const {
   return impl->ext_oneapi_get_info<Param>(Queue);
 }
 
+template <typename Param>
+typename detail::is_kernel_queue_specific_info_desc<Param>::return_type
+kernel::ext_oneapi_get_info(queue Queue, const range<1> &WorkGroupSize,
+                            size_t DynamicLocalMemorySize) const {
+  return impl->ext_oneapi_get_info<Param>(Queue, WorkGroupSize,
+                                          DynamicLocalMemorySize);
+}
+
+template <typename Param>
+typename detail::is_kernel_queue_specific_info_desc<Param>::return_type
+kernel::ext_oneapi_get_info(queue Queue, const range<2> &WorkGroupSize,
+                            size_t DynamicLocalMemorySize) const {
+  return impl->ext_oneapi_get_info<Param>(Queue, WorkGroupSize,
+                                          DynamicLocalMemorySize);
+}
+
 template <typename Param>
 typename detail::is_kernel_queue_specific_info_desc<Param>::return_type
 kernel::ext_oneapi_get_info(queue Queue, const range<3> &WorkGroupSize,
@@ -127,12 +143,17 @@ template __SYCL_EXPORT typename ext::oneapi::experimental::info::
 
 #define __SYCL_PARAM_TRAITS_SPEC(Namespace, DescType, Desc, ReturnT)           \
   template __SYCL_EXPORT ReturnT                                               \
+  kernel::ext_oneapi_get_info<Namespace::info::DescType::Desc>(                \
+      queue, const range<1> &, size_t) const;                                  \
+  template __SYCL_EXPORT ReturnT                                               \
+  kernel::ext_oneapi_get_info<Namespace::info::DescType::Desc>(                \
+      queue, const range<2> &, size_t) const;                                  \
+  template __SYCL_EXPORT ReturnT                                               \
   kernel::ext_oneapi_get_info<Namespace::info::DescType::Desc>(                \
       queue, const range<3> &, size_t) const;
 // Not including "ext_oneapi_kernel_queue_specific_traits.def" because not all
 // kernel_queue_specific queries require the above-defined get_info interface.
 // clang-format off
-__SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, kernel_queue_specific, max_num_work_group_sync, size_t)
 __SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, kernel_queue_specific, max_num_work_groups, size_t)
 // clang-format on
 #undef __SYCL_PARAM_TRAITS_SPEC

From aed4951c3185129621c659aebf125545750e7b33 Mon Sep 17 00:00:00 2001
From: Michael Aziz <michael.aziz@intel.com>
Date: Thu, 21 Nov 2024 07:37:42 -0800
Subject: [PATCH 2/9] Update UR tag

Signed-off-by: Michael Aziz <michael.aziz@intel.com>
---
 sycl/cmake/modules/FetchUnifiedRuntime.cmake | 2 +-
 sycl/cmake/modules/UnifiedRuntimeTag.cmake   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sycl/cmake/modules/FetchUnifiedRuntime.cmake b/sycl/cmake/modules/FetchUnifiedRuntime.cmake
index 72841724fa01d..5f8caf7f43c6d 100644
--- a/sycl/cmake/modules/FetchUnifiedRuntime.cmake
+++ b/sycl/cmake/modules/FetchUnifiedRuntime.cmake
@@ -116,7 +116,7 @@ if(SYCL_UR_USE_FETCH_CONTENT)
       CACHE PATH "Path to external '${name}' adapter source dir" FORCE)
   endfunction()
 
-  set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
+  set(UNIFIED_RUNTIME_REPO "https://github.com/0x12CC/unified-runtime.git")
   include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/UnifiedRuntimeTag.cmake)
 
   set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES")
diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake
index 060f402ee7c35..8af26fb7c8b83 100644
--- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake
+++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake
@@ -4,4 +4,4 @@
 # Date:   Mon Oct 28 16:29:45 2024 +0100
 #   Merge pull request #2242 from nrspruit/sysman_env_disable
 #   [L0] Enable Sysman Thru Env by default and have zesInit be optional
-set(UNIFIED_RUNTIME_TAG dbd168cbed2d2590b47904728cd5762f1c2f4c6b)
+set(UNIFIED_RUNTIME_TAG f99adf104ef5ce94d66b6ed4f9dccc450f1f638c)

From dd5f9187b235ea995ea8e510e3e88b2329dfd904 Mon Sep 17 00:00:00 2001
From: Michael Aziz <michael.aziz@intel.com>
Date: Fri, 22 Nov 2024 13:19:34 -0800
Subject: [PATCH 3/9] Fix test failures

Signed-off-by: Michael Aziz <michael.aziz@intel.com>
---
 .../ext_oneapi_kernel_queue_specific_traits.def   |  3 ---
 sycl/source/detail/kernel_impl.hpp                | 15 ---------------
 sycl/source/kernel.cpp                            |  6 ------
 sycl/test-e2e/GroupAlgorithm/root_group.cpp       | 11 ++++++-----
 sycl/test/abi/sycl_symbols_linux.dump             |  4 ++--
 sycl/test/abi/sycl_symbols_windows.dump           |  2 --
 6 files changed, 8 insertions(+), 33 deletions(-)

diff --git a/sycl/include/sycl/info/ext_oneapi_kernel_queue_specific_traits.def b/sycl/include/sycl/info/ext_oneapi_kernel_queue_specific_traits.def
index 0ec11af0bb6b1..f1c92ccd1bea1 100644
--- a/sycl/include/sycl/info/ext_oneapi_kernel_queue_specific_traits.def
+++ b/sycl/include/sycl/info/ext_oneapi_kernel_queue_specific_traits.def
@@ -1,4 +1 @@
-// TODO: Revisit 'max_num_work_group_sync' and align it with the
-// 'sycl_ext_oneapi_forward_progress' extension once #7598 is merged.
-__SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, kernel_queue_specific, max_num_work_group_sync, size_t,)
 __SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, kernel_queue_specific, max_num_work_groups, size_t,)
diff --git a/sycl/source/detail/kernel_impl.hpp b/sycl/source/detail/kernel_impl.hpp
index a4afc19e3a328..80cbd9b8689b4 100644
--- a/sycl/source/detail/kernel_impl.hpp
+++ b/sycl/source/detail/kernel_impl.hpp
@@ -376,21 +376,6 @@ inline typename syclex::info::kernel_queue_specific::max_num_work_groups::
   return queryMaxNumWorkGroups(Queue, WorkGroupSize, DynamicLocalMemorySize);
 }
 
-template <>
-inline typename syclex::info::kernel_queue_specific::max_num_work_group_sync::
-    return_type
-    kernel_impl::ext_oneapi_get_info<
-        syclex::info::kernel_queue_specific::max_num_work_group_sync>(
-        queue Queue) const {
-  auto Device = Queue.get_device();
-  const auto MaxWorkGroupSize =
-      get_info<info::kernel_device_specific::work_group_size>(Device);
-  const sycl::range<3> WorkGroupSize{MaxWorkGroupSize, 1, 1};
-  return ext_oneapi_get_info<
-      syclex::info::kernel_queue_specific::max_num_work_groups>(
-      Queue, WorkGroupSize, /* DynamicLocalMemorySize */ 0);
-}
-
 } // namespace detail
 } // namespace _V1
 } // namespace sycl
diff --git a/sycl/source/kernel.cpp b/sycl/source/kernel.cpp
index ceb036098552e..2b61c9dd22d05 100644
--- a/sycl/source/kernel.cpp
+++ b/sycl/source/kernel.cpp
@@ -135,12 +135,6 @@ kernel::ext_oneapi_get_info(queue Queue, const range<3> &WorkGroupSize,
                                           DynamicLocalMemorySize);
 }
 
-template __SYCL_EXPORT typename ext::oneapi::experimental::info::
-    kernel_queue_specific::max_num_work_group_sync::return_type
-    kernel::ext_oneapi_get_info<
-        ext::oneapi::experimental::info::kernel_queue_specific::
-            max_num_work_group_sync>(queue Queue) const;
-
 #define __SYCL_PARAM_TRAITS_SPEC(Namespace, DescType, Desc, ReturnT)           \
   template __SYCL_EXPORT ReturnT                                               \
   kernel::ext_oneapi_get_info<Namespace::info::DescType::Desc>(                \
diff --git a/sycl/test-e2e/GroupAlgorithm/root_group.cpp b/sycl/test-e2e/GroupAlgorithm/root_group.cpp
index 92e5d69ffcab4..9346e2f46eb4f 100644
--- a/sycl/test-e2e/GroupAlgorithm/root_group.cpp
+++ b/sycl/test-e2e/GroupAlgorithm/root_group.cpp
@@ -27,13 +27,14 @@ void testQueriesAndProperties() {
   const auto bundle =
       sycl::get_kernel_bundle<sycl::bundle_state::executable>(q.get_context());
   const auto kernel = bundle.get_kernel<class QueryKernel>();
+  const auto local_range = sycl::range<1>(1);
   const auto maxWGs = kernel.ext_oneapi_get_info<
       sycl::ext::oneapi::experimental::info::kernel_queue_specific::
-          max_num_work_group_sync>(q);
-  const auto wgRange = sycl::range{WorkGroupSize, 1, 1};
+          max_num_work_groups>(q, local_range, 0);
+  const auto wgRange = sycl::range<3>{WorkGroupSize, 1, 1};
   const auto maxWGsWithLimits = kernel.ext_oneapi_get_info<
       sycl::ext::oneapi::experimental::info::kernel_queue_specific::
-          max_num_work_group_sync>(q, wgRange, wgRange.size() * sizeof(int));
+          max_num_work_groups>(q, wgRange, wgRange.size() * sizeof(int));
   const auto props = sycl::ext::oneapi::experimental::properties{
       sycl::ext::oneapi::experimental::use_root_sync};
   q.single_task<class QueryKernel>(props, []() {});
@@ -54,7 +55,7 @@ void testRootGroup() {
   const auto kernel = bundle.get_kernel<class RootGroupKernel>();
   const auto maxWGs = kernel.ext_oneapi_get_info<
       sycl::ext::oneapi::experimental::info::kernel_queue_specific::
-          max_num_work_group_sync>(q);
+          max_num_work_groups>(q, WorkGroupSize, 0);
   const auto props = sycl::ext::oneapi::experimental::properties{
       sycl::ext::oneapi::experimental::use_root_sync};
   sycl::buffer<int> dataBuf{sycl::range{maxWGs * WorkGroupSize}};
@@ -96,7 +97,7 @@ void testRootGroupFunctions() {
   const auto kernel = bundle.get_kernel<class RootGroupFunctionsKernel>();
   const auto maxWGs = kernel.ext_oneapi_get_info<
       sycl::ext::oneapi::experimental::info::kernel_queue_specific::
-          max_num_work_group_sync>(q);
+          max_num_work_groups>(q, WorkGroupSize, 0);
   const auto props = sycl::ext::oneapi::experimental::properties{
       sycl::ext::oneapi::experimental::use_root_sync};
 
diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump
index a5134a7a524ca..5b449842e8699 100644
--- a/sycl/test/abi/sycl_symbols_linux.dump
+++ b/sycl/test/abi/sycl_symbols_linux.dump
@@ -3939,9 +3939,9 @@ _ZNK4sycl3_V16kernel16get_backend_infoINS0_4info6device15backend_versionEEENS0_6
 _ZNK4sycl3_V16kernel16get_backend_infoINS0_4info6device7versionEEENS0_6detail20is_backend_info_descIT_E11return_typeEv
 _ZNK4sycl3_V16kernel16get_backend_infoINS0_4info8platform7versionEEENS0_6detail20is_backend_info_descIT_E11return_typeEv
 _ZNK4sycl3_V16kernel17get_kernel_bundleEv
+_ZNK4sycl3_V16kernel19ext_oneapi_get_infoINS0_3ext6oneapi12experimental4info21kernel_queue_specific19max_num_work_groupsEEENS0_6detail34is_kernel_queue_specific_info_descIT_E11return_typeENS0_5queueERKNS0_5rangeILi1EEEm
+_ZNK4sycl3_V16kernel19ext_oneapi_get_infoINS0_3ext6oneapi12experimental4info21kernel_queue_specific19max_num_work_groupsEEENS0_6detail34is_kernel_queue_specific_info_descIT_E11return_typeENS0_5queueERKNS0_5rangeILi2EEEm
 _ZNK4sycl3_V16kernel19ext_oneapi_get_infoINS0_3ext6oneapi12experimental4info21kernel_queue_specific19max_num_work_groupsEEENS0_6detail34is_kernel_queue_specific_info_descIT_E11return_typeENS0_5queueERKNS0_5rangeILi3EEEm
-_ZNK4sycl3_V16kernel19ext_oneapi_get_infoINS0_3ext6oneapi12experimental4info21kernel_queue_specific23max_num_work_group_syncEEENS0_6detail34is_kernel_queue_specific_info_descIT_E11return_typeENS0_5queueE
-_ZNK4sycl3_V16kernel19ext_oneapi_get_infoINS0_3ext6oneapi12experimental4info21kernel_queue_specific23max_num_work_group_syncEEENS0_6detail34is_kernel_queue_specific_info_descIT_E11return_typeENS0_5queueERKNS0_5rangeILi3EEEm
 _ZNK4sycl3_V16kernel3getEv
 _ZNK4sycl3_V16kernel8get_infoINS0_4info22kernel_device_specific15work_group_sizeEEENS0_6detail35is_kernel_device_specific_info_descIT_E11return_typeERKNS0_6deviceE
 _ZNK4sycl3_V16kernel8get_infoINS0_4info22kernel_device_specific16global_work_sizeEEENS0_6detail35is_kernel_device_specific_info_descIT_E11return_typeERKNS0_6deviceE
diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump
index a6e6a5e47c137..d6550fbd3a52b 100644
--- a/sycl/test/abi/sycl_symbols_windows.dump
+++ b/sycl/test/abi/sycl_symbols_windows.dump
@@ -13,8 +13,6 @@
 ??$create_sub_devices@$0BAIH@@device@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@AEBV?$vector@_KV?$allocator@_K@std@@@4@@Z
 ??$create_sub_devices@$0BAII@@device@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@W4partition_affinity_domain@info@12@@Z
 ??$create_sub_devices@$0BAIJ@@device@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@XZ
-??$ext_oneapi_get_info@Umax_num_work_group_sync@kernel_queue_specific@info@experimental@oneapi@ext@_V1@sycl@@@kernel@_V1@sycl@@QEBA_KVqueue@12@@Z
-??$ext_oneapi_get_info@Umax_num_work_group_sync@kernel_queue_specific@info@experimental@oneapi@ext@_V1@sycl@@@kernel@_V1@sycl@@QEBA_KVqueue@12@AEBV?$range@$02@12@_K@Z
 ??$ext_oneapi_get_info@Umax_num_work_groups@kernel_queue_specific@info@experimental@oneapi@ext@_V1@sycl@@@kernel@_V1@sycl@@QEBA_KVqueue@12@AEBV?$range@$02@12@_K@Z
 ??$get_backend_info@Ubackend_version@device@info@_V1@sycl@@@context@_V1@sycl@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ
 ??$get_backend_info@Ubackend_version@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ

From bf2037173924be92eeb5925c00f271bb27611d8d Mon Sep 17 00:00:00 2001
From: Michael Aziz <michael.aziz@intel.com>
Date: Mon, 25 Nov 2024 09:25:35 -0800
Subject: [PATCH 4/9] Run clang-format on test source

Signed-off-by: Michael Aziz <michael.aziz@intel.com>
---
 sycl/test-e2e/GroupAlgorithm/root_group.cpp | 32 +++++++++++++--------
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/sycl/test-e2e/GroupAlgorithm/root_group.cpp b/sycl/test-e2e/GroupAlgorithm/root_group.cpp
index 9346e2f46eb4f..369057d63adac 100644
--- a/sycl/test-e2e/GroupAlgorithm/root_group.cpp
+++ b/sycl/test-e2e/GroupAlgorithm/root_group.cpp
@@ -28,13 +28,17 @@ void testQueriesAndProperties() {
       sycl::get_kernel_bundle<sycl::bundle_state::executable>(q.get_context());
   const auto kernel = bundle.get_kernel<class QueryKernel>();
   const auto local_range = sycl::range<1>(1);
-  const auto maxWGs = kernel.ext_oneapi_get_info<
-      sycl::ext::oneapi::experimental::info::kernel_queue_specific::
-          max_num_work_groups>(q, local_range, 0);
+  const auto maxWGs =
+      kernel
+          .ext_oneapi_get_info<sycl::ext::oneapi::experimental::info::
+                                   kernel_queue_specific::max_num_work_groups>(
+              q, local_range, 0);
   const auto wgRange = sycl::range<3>{WorkGroupSize, 1, 1};
-  const auto maxWGsWithLimits = kernel.ext_oneapi_get_info<
-      sycl::ext::oneapi::experimental::info::kernel_queue_specific::
-          max_num_work_groups>(q, wgRange, wgRange.size() * sizeof(int));
+  const auto maxWGsWithLimits =
+      kernel
+          .ext_oneapi_get_info<sycl::ext::oneapi::experimental::info::
+                                   kernel_queue_specific::max_num_work_groups>(
+              q, wgRange, wgRange.size() * sizeof(int));
   const auto props = sycl::ext::oneapi::experimental::properties{
       sycl::ext::oneapi::experimental::use_root_sync};
   q.single_task<class QueryKernel>(props, []() {});
@@ -53,9 +57,11 @@ void testRootGroup() {
   const auto bundle =
       sycl::get_kernel_bundle<sycl::bundle_state::executable>(q.get_context());
   const auto kernel = bundle.get_kernel<class RootGroupKernel>();
-  const auto maxWGs = kernel.ext_oneapi_get_info<
-      sycl::ext::oneapi::experimental::info::kernel_queue_specific::
-          max_num_work_groups>(q, WorkGroupSize, 0);
+  const auto maxWGs =
+      kernel
+          .ext_oneapi_get_info<sycl::ext::oneapi::experimental::info::
+                                   kernel_queue_specific::max_num_work_groups>(
+              q, WorkGroupSize, 0);
   const auto props = sycl::ext::oneapi::experimental::properties{
       sycl::ext::oneapi::experimental::use_root_sync};
   sycl::buffer<int> dataBuf{sycl::range{maxWGs * WorkGroupSize}};
@@ -95,9 +101,11 @@ void testRootGroupFunctions() {
   const auto bundle =
       sycl::get_kernel_bundle<sycl::bundle_state::executable>(q.get_context());
   const auto kernel = bundle.get_kernel<class RootGroupFunctionsKernel>();
-  const auto maxWGs = kernel.ext_oneapi_get_info<
-      sycl::ext::oneapi::experimental::info::kernel_queue_specific::
-          max_num_work_groups>(q, WorkGroupSize, 0);
+  const auto maxWGs =
+      kernel
+          .ext_oneapi_get_info<sycl::ext::oneapi::experimental::info::
+                                   kernel_queue_specific::max_num_work_groups>(
+              q, WorkGroupSize, 0);
   const auto props = sycl::ext::oneapi::experimental::properties{
       sycl::ext::oneapi::experimental::use_root_sync};
 

From 58eee69809795a8f39a66fe9a0fe6d1c2f83bc7f Mon Sep 17 00:00:00 2001
From: Michael Aziz <michael.aziz@intel.com>
Date: Mon, 25 Nov 2024 10:16:23 -0800
Subject: [PATCH 5/9] Add Windows symbols

Signed-off-by: Michael Aziz <michael.aziz@intel.com>
---
 sycl/test/abi/sycl_symbols_windows.dump | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump
index d6550fbd3a52b..4d59de4ea82ac 100644
--- a/sycl/test/abi/sycl_symbols_windows.dump
+++ b/sycl/test/abi/sycl_symbols_windows.dump
@@ -13,6 +13,8 @@
 ??$create_sub_devices@$0BAIH@@device@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@AEBV?$vector@_KV?$allocator@_K@std@@@4@@Z
 ??$create_sub_devices@$0BAII@@device@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@W4partition_affinity_domain@info@12@@Z
 ??$create_sub_devices@$0BAIJ@@device@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@XZ
+??$ext_oneapi_get_info@Umax_num_work_groups@kernel_queue_specific@info@experimental@oneapi@ext@_V1@sycl@@@kernel@_V1@sycl@@QEBA_KVqueue@12@AEBV?$range@$00@12@_K@Z
+??$ext_oneapi_get_info@Umax_num_work_groups@kernel_queue_specific@info@experimental@oneapi@ext@_V1@sycl@@@kernel@_V1@sycl@@QEBA_KVqueue@12@AEBV?$range@$01@12@_K@Z
 ??$ext_oneapi_get_info@Umax_num_work_groups@kernel_queue_specific@info@experimental@oneapi@ext@_V1@sycl@@@kernel@_V1@sycl@@QEBA_KVqueue@12@AEBV?$range@$02@12@_K@Z
 ??$get_backend_info@Ubackend_version@device@info@_V1@sycl@@@context@_V1@sycl@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ
 ??$get_backend_info@Ubackend_version@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ

From c5798709fb1d3620d1c6547ccb2e697ea3405aa4 Mon Sep 17 00:00:00 2001
From: Michael Aziz <michael.aziz@intel.com>
Date: Mon, 25 Nov 2024 18:32:43 -0800
Subject: [PATCH 6/9] Empty commit

Signed-off-by: Michael Aziz <michael.aziz@intel.com>

From 2801e1e8f687afc11ea6d15604ba4482779029e1 Mon Sep 17 00:00:00 2001
From: Michael Aziz <michael.aziz@intel.com>
Date: Tue, 26 Nov 2024 13:39:38 -0800
Subject: [PATCH 7/9] Fix launch query for invalid group sizes

Signed-off-by: Michael Aziz <michael.aziz@intel.com>
---
 sycl/source/detail/kernel_impl.hpp                   |  6 ++++--
 .../Basic/launch_queries/max_num_work_groups.cpp     | 12 ++++--------
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/sycl/source/detail/kernel_impl.hpp b/sycl/source/detail/kernel_impl.hpp
index 80cbd9b8689b4..59429f9bf302a 100644
--- a/sycl/source/detail/kernel_impl.hpp
+++ b/sycl/source/detail/kernel_impl.hpp
@@ -332,8 +332,10 @@ kernel_impl::queryMaxNumWorkGroups(queue Queue,
   if (auto Result = Adapter->call_nocheck<
                     UrApiKind::urKernelSuggestMaxCooperativeGroupCountExp>(
           Handle, Dimensions, WG, DynamicLocalMemorySize, &GroupCount);
-      Result != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) {
-    // The feature is supported. Check for other errors and throw if any.
+      Result != UR_RESULT_ERROR_UNSUPPORTED_FEATURE &&
+      Result != UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE) {
+    // The feature is supported and the group size is valid. Check for other
+    // errors and throw if any.
     Adapter->checkUrResult(Result);
     return GroupCount;
   }
diff --git a/sycl/test-e2e/Basic/launch_queries/max_num_work_groups.cpp b/sycl/test-e2e/Basic/launch_queries/max_num_work_groups.cpp
index fe8246bb2f56d..06a8a52b56cf0 100644
--- a/sycl/test-e2e/Basic/launch_queries/max_num_work_groups.cpp
+++ b/sycl/test-e2e/Basic/launch_queries/max_num_work_groups.cpp
@@ -161,14 +161,10 @@ int test_max_num_work_groups(sycl::queue &q, const sycl::device &dev) {
   // It cannot be possible to launch a kernel successfully with a configuration
   // that exceeds the available resources as in the above defined workGroupSize.
   // workGroupSize is larger than maxWorkGroupSize, hence maxWGs must equal 0.
-  // Note: Level-Zero currently always returns a non-zero value.
-  // TODO: Remove the backend condition once the Level-Zero API issue is fixed.
-  if (dev.get_backend() != sycl::backend::ext_oneapi_level_zero) {
-    assert(maxWGs == 0 &&
-           "max_num_work_groups query failed.\n"
-           "It should return 0 possible groups when the requested resources "
-           "by the lanuch config exceed those available in the hardware.");
-  }
+  assert(maxWGs == 0 &&
+         "max_num_work_groups query failed.\n"
+         "It should return 0 possible groups when the requested resources "
+         "by the lanuch config exceed those available in the hardware.");
 
   // As we ensured that the 'max_num_work_groups' query correctly
   // returns 0 possible work-groups, test that the kernel launch will fail.

From c559a6688c40a7fe5da7fcbecc2e79eaf4db61e7 Mon Sep 17 00:00:00 2001
From: Michael Aziz <michael.aziz@intel.com>
Date: Tue, 26 Nov 2024 13:55:21 -0800
Subject: [PATCH 8/9] Update UR tag

Signed-off-by: Michael Aziz <michael.aziz@intel.com>
---
 sycl/cmake/modules/UnifiedRuntimeTag.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake
index 5ddaea33cda91..dc76d2cbfa8b3 100644
--- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake
+++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake
@@ -4,4 +4,4 @@
 # Date:   Mon Nov 25 11:04:37 2024 +0000
 #     Merge pull request #2303 from nrspruit/zeInitDrivers
 #     [L0] Implement Support for zeInitDrivers
-set(UNIFIED_RUNTIME_TAG 39b72622cf2997459788000ac665e259f728a9ff)
+set(UNIFIED_RUNTIME_TAG ca0a0e3e3b3ad933605a35850ebc11bbf5d5f367)

From 90c2fa8600df0a3c0bb05dc1b419ae7502ac0bda Mon Sep 17 00:00:00 2001
From: Michael Aziz <michael.aziz@intel.com>
Date: Tue, 26 Nov 2024 14:32:05 -0800
Subject: [PATCH 9/9] Empty commit