diff --git a/sycl/cmake/modules/FetchUnifiedRuntime.cmake b/sycl/cmake/modules/FetchUnifiedRuntime.cmake
index 72841724fa01d..202bfdcdc19e3 100644
--- a/sycl/cmake/modules/FetchUnifiedRuntime.cmake
+++ b/sycl/cmake/modules/FetchUnifiedRuntime.cmake
@@ -116,7 +116,7 @@ if(SYCL_UR_USE_FETCH_CONTENT)
       CACHE PATH "Path to external '${name}' adapter source dir" FORCE)
   endfunction()
 
-  set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
+  set(UNIFIED_RUNTIME_REPO "https://github.com/Bensuo/unified-runtime.git")
   include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/UnifiedRuntimeTag.cmake)
 
   set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES")
diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake
index 95a7b4dc86bce..e73e7a3d52331 100644
--- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake
+++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake
@@ -1,7 +1 @@
-# commit 9937d029c7fdcbf101e89f8515f640c145e059c5
-# Merge: 9ac6d5d9 10b0e101
-# Author: Callum Fare <callum@codeplay.com>
-# Date:   Wed Nov 20 14:49:17 2024 +0000
-#     Merge pull request #2258 from aarongreig/aaron/tryUseExtensionSubgroupInfo
-#     Use extension version of clGetKernelSubGroupInfo when necessary.
-set(UNIFIED_RUNTIME_TAG 9937d029c7fdcbf101e89f8515f640c145e059c5)
+set(UNIFIED_RUNTIME_TAG "ewan/cuda_update_local_size")
diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp
index 861ec2a883601..d723d8d83511f 100644
--- a/sycl/source/detail/graph_impl.cpp
+++ b/sycl/source/detail/graph_impl.cpp
@@ -1510,7 +1510,10 @@ void exec_graph_impl::updateImpl(std::shared_ptr<node_impl> Node) {
   PtrDescs.reserve(MaskedArgs.size());
   ValueDescs.reserve(MaskedArgs.size());
 
-  ur_exp_command_buffer_update_kernel_launch_desc_t UpdateDesc;
+  ur_exp_command_buffer_update_kernel_launch_desc_t UpdateDesc{};
+  UpdateDesc.stype =
+      UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC;
+  UpdateDesc.pNext = nullptr;
 
   // Collect arg descriptors and fill kernel launch descriptor
   using sycl::detail::kernel_param_kind_t;
diff --git a/sycl/test-e2e/Graph/Explicit/local_accessor_multiple_accessors.cpp b/sycl/test-e2e/Graph/Explicit/local_accessor_multiple_accessors.cpp
new file mode 100644
index 0000000000000..c4d704216a082
--- /dev/null
+++ b/sycl/test-e2e/Graph/Explicit/local_accessor_multiple_accessors.cpp
@@ -0,0 +1,10 @@
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+// Extra run to check for immediate-command-list in Level Zero
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+
+#define GRAPH_E2E_EXPLICIT
+
+#include "../Inputs/local_accessor_multiple_accessors.cpp"
diff --git a/sycl/test-e2e/Graph/Explicit/local_accessor_multiple_nodes.cpp b/sycl/test-e2e/Graph/Explicit/local_accessor_multiple_nodes.cpp
new file mode 100644
index 0000000000000..83eff54307ea3
--- /dev/null
+++ b/sycl/test-e2e/Graph/Explicit/local_accessor_multiple_nodes.cpp
@@ -0,0 +1,10 @@
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+// Extra run to check for immediate-command-list in Level Zero
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+
+#define GRAPH_E2E_EXPLICIT
+
+#include "../Inputs/local_accessor_multiple_nodes.cpp"
diff --git a/sycl/test-e2e/Graph/Explicit/opencl_local_acc.cpp b/sycl/test-e2e/Graph/Explicit/opencl_local_acc.cpp
new file mode 100644
index 0000000000000..48c41ad0e78c5
--- /dev/null
+++ b/sycl/test-e2e/Graph/Explicit/opencl_local_acc.cpp
@@ -0,0 +1,12 @@
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+// Extra run to check for immediate-command-list in Level Zero
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+
+// REQUIRES: ocloc && (opencl || level_zero)
+
+#define GRAPH_E2E_EXPLICIT
+
+#include "../Inputs/opencl_local_acc.cpp"
diff --git a/sycl/test-e2e/Graph/Inputs/local_accessor.cpp b/sycl/test-e2e/Graph/Inputs/local_accessor.cpp
index b3ac9fde67b6e..b6f6a7dd34348 100644
--- a/sycl/test-e2e/Graph/Inputs/local_accessor.cpp
+++ b/sycl/test-e2e/Graph/Inputs/local_accessor.cpp
@@ -10,20 +10,18 @@ int main() {
 
   const size_t LocalSize = 128;
 
-  std::vector<T> DataA(Size), DataB(Size), DataC(Size);
+  std::vector<T> HostData(Size);
 
-  std::iota(DataA.begin(), DataA.end(), 10);
-
-  std::vector<T> ReferenceA(DataA);
+  std::iota(HostData.begin(), HostData.end(), 10);
 
   exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()};
 
   T *PtrA = malloc_device<T>(Size, Queue);
 
-  Queue.copy(DataA.data(), PtrA, Size);
+  Queue.copy(HostData.data(), PtrA, Size);
   Queue.wait_and_throw();
 
-  auto node = add_node(Graph, Queue, [&](handler &CGH) {
+  auto Node = add_node(Graph, Queue, [&](handler &CGH) {
     local_accessor<T, 1> LocalMem(LocalSize, CGH);
 
     CGH.parallel_for(nd_range({Size}, {LocalSize}), [=](nd_item<1> Item) {
@@ -40,14 +38,14 @@ int main() {
 
   Queue.wait_and_throw();
 
-  Queue.copy(PtrA, DataA.data(), Size);
+  Queue.copy(PtrA, HostData.data(), Size);
   Queue.wait_and_throw();
 
   free(PtrA, Queue);
 
   for (size_t i = 0; i < Size; i++) {
-    T Ref = 10 + i + (i * 2);
-    check_value(i, Ref, ReferenceA[i], "PtrA");
+    T Ref = 10 + i + (Iterations * (i * 2));
+    assert(check_value(i, Ref, HostData[i], "PtrA"));
   }
 
   return 0;
diff --git a/sycl/test-e2e/Graph/Inputs/local_accessor_multiple_accessors.cpp b/sycl/test-e2e/Graph/Inputs/local_accessor_multiple_accessors.cpp
new file mode 100644
index 0000000000000..0ae67a4469a8c
--- /dev/null
+++ b/sycl/test-e2e/Graph/Inputs/local_accessor_multiple_accessors.cpp
@@ -0,0 +1,62 @@
+// Tests adding of nodes with more than one local accessor,
+// and submitting the graph.
+
+#include "../graph_common.hpp"
+
+int main() {
+  queue Queue{};
+
+  using T = int;
+
+  const size_t LocalSize = 128;
+
+  std::vector<T> HostData(Size);
+
+  std::iota(HostData.begin(), HostData.end(), 10);
+
+  exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()};
+
+  T *PtrIn = malloc_device<T>(Size, Queue);
+  T *PtrOut = malloc_device<T>(Size, Queue);
+
+  Queue.memset(PtrOut, 0, Size * sizeof(T));
+  Queue.copy(HostData.data(), PtrIn, Size);
+  Queue.wait_and_throw();
+
+  auto Node = add_node(Graph, Queue, [&](handler &CGH) {
+    local_accessor<T, 1> LocalMemA(LocalSize, CGH);
+    local_accessor<T, 1> LocalMemB(LocalSize, CGH);
+
+    CGH.parallel_for(nd_range({Size}, {LocalSize}), [=](nd_item<1> Item) {
+      auto LocalID = Item.get_local_linear_id();
+      auto GlobalID = Item.get_global_linear_id();
+      LocalMemA[LocalID] = GlobalID;
+      LocalMemB[LocalID] = PtrIn[GlobalID];
+      PtrOut[GlobalID] += LocalMemA[LocalID] * LocalMemB[LocalID];
+    });
+  });
+
+  auto GraphExec = Graph.finalize();
+
+  for (unsigned n = 0; n < Iterations; n++) {
+    Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); });
+  }
+
+  Queue.wait_and_throw();
+
+  Queue.copy(PtrOut, HostData.data(), Size);
+  Queue.wait_and_throw();
+
+  free(PtrIn, Queue);
+  free(PtrOut, Queue);
+
+  for (size_t i = 0; i < Size; i++) {
+    T Ref = 0;
+    for (size_t n = 0; n < Iterations; n++) {
+      Ref += (i * (10 + i));
+    }
+    assert(check_value(i, Ref, HostData[i], "PtrOut"));
+  }
+
+  return 0;
+}
diff --git a/sycl/test-e2e/Graph/Inputs/local_accessor_multiple_nodes.cpp b/sycl/test-e2e/Graph/Inputs/local_accessor_multiple_nodes.cpp
new file mode 100644
index 0000000000000..8d38824c680ae
--- /dev/null
+++ b/sycl/test-e2e/Graph/Inputs/local_accessor_multiple_nodes.cpp
@@ -0,0 +1,71 @@
+// Test creating a graph where more than one nodes uses local accessors,
+// and submits of the graph.
+
+#include "../graph_common.hpp"
+
+int main() {
+  queue Queue{};
+
+  using T = int;
+
+  const size_t LocalSize = 128;
+
+  std::vector<T> HostData(Size);
+
+  std::iota(HostData.begin(), HostData.end(), 10);
+
+  exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()};
+
+  T *PtrA = malloc_device<T>(Size, Queue);
+
+  Queue.copy(HostData.data(), PtrA, Size);
+  Queue.wait_and_throw();
+
+  auto NodeA = add_node(Graph, Queue, [&](handler &CGH) {
+    local_accessor<T, 1> LocalMem(LocalSize, CGH);
+
+    CGH.parallel_for(nd_range({Size}, {LocalSize}), [=](nd_item<1> Item) {
+      LocalMem[Item.get_local_linear_id()] = Item.get_global_linear_id() * 2;
+      PtrA[Item.get_global_linear_id()] += LocalMem[Item.get_local_linear_id()];
+    });
+  });
+
+  auto NodeB = add_node(
+      Graph, Queue,
+      [&](handler &CGH) {
+        local_accessor<T, 1> LocalMem(LocalSize, CGH);
+        depends_on_helper(CGH, NodeA);
+
+        CGH.parallel_for(nd_range({Size}, {LocalSize}), [=](nd_item<1> Item) {
+          LocalMem[Item.get_local_linear_id()] = 3;
+          PtrA[Item.get_global_linear_id()] *=
+              LocalMem[Item.get_local_linear_id()];
+        });
+      },
+      NodeA);
+
+  auto GraphExec = Graph.finalize();
+
+  for (unsigned n = 0; n < Iterations; n++) {
+    Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); });
+  }
+
+  Queue.wait_and_throw();
+
+  Queue.copy(PtrA, HostData.data(), Size);
+  Queue.wait_and_throw();
+
+  free(PtrA, Queue);
+
+  for (size_t i = 0; i < Size; i++) {
+    T Ref = 10 + i;
+
+    for (size_t n = 0; n < Iterations; n++) {
+      Ref += i * 2;
+      Ref *= 3;
+    }
+    assert(check_value(i, Ref, HostData[i], "PtrA"));
+  }
+
+  return 0;
+}
diff --git a/sycl/test-e2e/Graph/Inputs/opencl_local_acc.cpp b/sycl/test-e2e/Graph/Inputs/opencl_local_acc.cpp
new file mode 100644
index 0000000000000..c4f51ba9c9232
--- /dev/null
+++ b/sycl/test-e2e/Graph/Inputs/opencl_local_acc.cpp
@@ -0,0 +1,78 @@
+// Tests using an OpenCL-C defined kernel with multiple local accessors
+
+#include "../graph_common.hpp"
+
+using source_kb = sycl::kernel_bundle<sycl::bundle_state::ext_oneapi_source>;
+using exe_kb = sycl::kernel_bundle<sycl::bundle_state::executable>;
+
+auto constexpr LocalAccCLSource = R"===(
+    kernel void test_la(__global int *out, __local int* local_ptr1,
+                        __local int2* local_ptr2, int n) {
+        __local int4 local_data[1];
+
+        size_t gid = get_global_id(0);
+        size_t lid = get_local_id(0);
+        size_t wg_size = get_num_groups(0);
+
+        local_ptr1[lid] = lid;
+        local_ptr2[lid].x = n;
+        local_ptr2[lid].y = wg_size;
+
+        if (lid == 0) {
+          local_data[lid] = (int4)(0xA, 0xB, 0xC, 0xD);
+        }
+
+        barrier(CLK_LOCAL_MEM_FENCE);
+
+        int acc = local_data[0].x + local_data[0].y + local_data[0].z +
+                  local_data[0].w;
+        out[gid] = (local_ptr1[lid] * local_ptr2[lid].x) +
+                   (local_ptr2[lid].y * acc);
+    }
+)===";
+
+int main() {
+  queue Queue;
+
+  source_kb kbSrc = exp_ext::create_kernel_bundle_from_source(
+      Queue.get_context(), exp_ext::source_language::opencl, LocalAccCLSource);
+  exe_kb kbExe1 = exp_ext::build(kbSrc);
+  sycl::kernel test_kernel = kbExe1.ext_oneapi_get_kernel("test_la");
+
+  exp_ext::command_graph Graph{Queue};
+
+  int32_t *Ptr = malloc_device<int32_t>(Size, Queue);
+
+  int32_t N = 42;
+  constexpr size_t LocalSize = 256;
+  auto Node = add_node(Graph, Queue, [&](handler &cgh) {
+    local_accessor<int32_t, 1> acc_local1(LocalSize, cgh);
+    local_accessor<sycl::int2, 1> acc_local2(LocalSize, cgh);
+
+    cgh.set_arg(0, Ptr);
+    cgh.set_arg(1, acc_local1);
+    cgh.set_arg(2, acc_local2);
+    cgh.set_arg(3, N);
+
+    cgh.parallel_for(nd_range<1>(Size, LocalSize), test_kernel);
+  });
+
+  auto ExecGraph = Graph.finalize();
+  Queue.ext_oneapi_graph(ExecGraph).wait();
+
+  std::vector<int32_t> HostData(Size);
+  Queue.copy(Ptr, HostData.data(), Size).wait();
+
+  constexpr int32_t Acc = 0xA + 0xB + 0xC + 0xD;
+  constexpr int32_t WorkGroups = Size / LocalSize;
+  constexpr int32_t Tmp = Acc * WorkGroups;
+  for (size_t i = 0; i < Size; i++) {
+    int32_t local_id = i % LocalSize;
+    int32_t Ref = (local_id * N) + Tmp;
+    assert(HostData[i] == Ref);
+  }
+
+  sycl::free(Ptr, Queue);
+
+  return 0;
+}
diff --git a/sycl/test-e2e/Graph/Inputs/whole_update_local_acc.cpp b/sycl/test-e2e/Graph/Inputs/whole_update_local_acc.cpp
index 100792a2e4762..6feb497115b0e 100644
--- a/sycl/test-e2e/Graph/Inputs/whole_update_local_acc.cpp
+++ b/sycl/test-e2e/Graph/Inputs/whole_update_local_acc.cpp
@@ -28,8 +28,6 @@ int main() {
   std::iota(DataA.begin(), DataA.end(), 10);
   std::iota(DataB.begin(), DataB.end(), 10);
 
-  std::vector<T> ReferenceA(DataA), ReferenceB(DataB);
-
   exp_ext::command_graph GraphA{Queue.get_context(), Queue.get_device()};
 
   T *PtrA = malloc_device<T>(Size, Queue);
@@ -59,10 +57,10 @@ int main() {
   Queue.wait_and_throw();
 
   for (size_t i = 0; i < Size; i++) {
-    T RefA = 10 + i + (i * 2) + LocalSize / 2;
+    T RefA = 10 + i + Iterations * ((i * 2) + (LocalSize / 2));
     T RefB = 10 + i;
-    check_value(i, RefA, ReferenceA[i], "PtrA");
-    check_value(i, RefB, ReferenceB[i], "PtrB");
+    assert(check_value(i, RefA, DataA[i], "PtrA"));
+    assert(check_value(i, RefB, DataB[i], "PtrB"));
   }
 
   // Update GraphExecA using whole graph update
@@ -81,10 +79,10 @@ int main() {
   Queue.wait_and_throw();
 
   for (size_t i = 0; i < Size; i++) {
-    T RefA = 10 + i + (i * 2) + LocalSize / 2;
-    T RefB = 10 + i + (i * 2) + LocalSize;
-    check_value(i, RefA, ReferenceA[i], "PtrA");
-    check_value(i, RefB, ReferenceB[i], "PtrB");
+    T RefA = 10 + i + Iterations * ((i * 2) + (LocalSize / 2));
+    T RefB = 10 + i + Iterations * ((i * 2) + LocalSize);
+    assert(check_value(i, RefA, DataA[i], "PtrA"));
+    assert(check_value(i, RefB, DataB[i], "PtrB"));
   }
 
   free(PtrA, Queue);
diff --git a/sycl/test-e2e/Graph/Inputs/whole_update_local_acc_multi.cpp b/sycl/test-e2e/Graph/Inputs/whole_update_local_acc_multi.cpp
new file mode 100644
index 0000000000000..95f55736702a5
--- /dev/null
+++ b/sycl/test-e2e/Graph/Inputs/whole_update_local_acc_multi.cpp
@@ -0,0 +1,122 @@
+// Tests whole graph update of nodes with 2 local accessors,
+// and submission of the graph.
+
+#include "../graph_common.hpp"
+
+using T = int;
+
+void add_graph_nodes(
+    exp_ext::command_graph<exp_ext::graph_state::modifiable> &Graph,
+    queue &Queue, size_t Size, size_t LocalSize, T *Ptr) {
+  auto NodeA = add_node(Graph, Queue, [&](handler &CGH) {
+    local_accessor<T, 1> LocalMemA(LocalSize, CGH);
+    local_accessor<T, 1> LocalMemB(LocalSize, CGH);
+    CGH.parallel_for(nd_range({Size}, {LocalSize}), [=](nd_item<1> Item) {
+      auto LocalID = Item.get_local_linear_id();
+      auto GlobalID = Item.get_global_linear_id();
+      LocalMemA[LocalID] = GlobalID;
+      LocalMemB[LocalID] = Item.get_local_range(0);
+      Ptr[GlobalID] += LocalMemA[LocalID] * LocalMemB[LocalID];
+    });
+  });
+
+  // Introduce value params so that local arguments are not contiguous indices
+  // when set as kernel arguments
+  T Constant = 2;
+  auto NodeB = add_node(
+      Graph, Queue,
+      [&](handler &CGH) {
+        local_accessor<T, 1> LocalMemA(LocalSize, CGH);
+        local_accessor<T, 1> LocalMemB(LocalSize * 2, CGH);
+
+        depends_on_helper(CGH, NodeA);
+
+        CGH.parallel_for(nd_range({Size}, {LocalSize}), [=](nd_item<1> Item) {
+          auto LocalID = Item.get_local_linear_id();
+          auto GlobalID = Item.get_global_linear_id();
+          LocalMemA[LocalID] = GlobalID;
+          LocalMemB[LocalID * 2] = Constant;
+          LocalMemB[(LocalID * 2) + 1] = Constant;
+          Ptr[GlobalID] += LocalMemA[LocalID] * LocalMemB[LocalID * 2] *
+                           LocalMemB[(LocalID * 2) + 1];
+        });
+      },
+      NodeA);
+}
+int main() {
+  queue Queue{};
+
+  const size_t LocalSize = 128;
+
+  std::vector<T> DataA(Size), DataB(Size);
+
+  std::iota(DataA.begin(), DataA.end(), 10);
+  std::iota(DataB.begin(), DataB.end(), 10);
+
+  exp_ext::command_graph GraphA{Queue.get_context(), Queue.get_device()};
+
+  T *PtrA = malloc_device<T>(Size, Queue);
+  T *PtrB = malloc_device<T>(Size, Queue);
+
+  Queue.copy(DataA.data(), PtrA, Size);
+  Queue.copy(DataB.data(), PtrB, Size);
+  Queue.wait_and_throw();
+
+  size_t GraphALocalSize = LocalSize / 2;
+  add_graph_nodes(GraphA, Queue, Size, GraphALocalSize, PtrA);
+
+  auto GraphExecA = GraphA.finalize(exp_ext::property::graph::updatable{});
+
+  // Create second graph for whole graph update with a different local size
+  exp_ext::command_graph GraphB{Queue.get_context(), Queue.get_device()};
+  add_graph_nodes(GraphB, Queue, Size, LocalSize, PtrB);
+
+  // Execute graphs before updating and check outputs
+  for (unsigned n = 0; n < Iterations; n++) {
+    Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExecA); });
+  }
+
+  Queue.wait_and_throw();
+
+  Queue.copy(PtrA, DataA.data(), Size);
+  Queue.copy(PtrB, DataB.data(), Size);
+  Queue.wait_and_throw();
+
+  for (size_t i = 0; i < Size; i++) {
+    T Init = 10 + i;
+    T NodeA = i * GraphALocalSize;
+    T NodeB = i * 4;
+    T RefA = Init + Iterations * (NodeA + NodeB);
+    assert(check_value(i, RefA, DataA[i], "PtrA"));
+    assert(check_value(i, Init, DataB[i], "PtrB"));
+  }
+
+  // Update GraphExecA using whole graph update
+  GraphExecA.update(GraphB);
+
+  // Execute graphs again and check outputs
+  for (unsigned n = 0; n < Iterations; n++) {
+    Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExecA); });
+  }
+
+  Queue.wait_and_throw();
+
+  Queue.copy(PtrA, DataA.data(), Size);
+  Queue.copy(PtrB, DataB.data(), Size);
+  Queue.wait_and_throw();
+
+  for (size_t i = 0; i < Size; i++) {
+    T Init = 10 + i;
+    T NodeAGraphA = i * GraphALocalSize;
+    T NodeAGraphB = i * LocalSize;
+    T NodeB = i * 4;
+    T RefA = Init + Iterations * (NodeAGraphA + NodeB);
+    T RefB = Init + Iterations * (NodeAGraphB + NodeB);
+    assert(check_value(i, RefA, DataA[i], "PtrA"));
+    assert(check_value(i, RefB, DataB[i], "PtrB"));
+  }
+
+  free(PtrA, Queue);
+  free(PtrB, Queue);
+  return 0;
+}
diff --git a/sycl/test-e2e/Graph/RecordReplay/local_accessor_multiple_accessors.cpp b/sycl/test-e2e/Graph/RecordReplay/local_accessor_multiple_accessors.cpp
new file mode 100644
index 0000000000000..aa044c80e19ec
--- /dev/null
+++ b/sycl/test-e2e/Graph/RecordReplay/local_accessor_multiple_accessors.cpp
@@ -0,0 +1,10 @@
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+// Extra run to check for immediate-command-list in Level Zero
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+
+#define GRAPH_E2E_RECORD_REPLAY
+
+#include "../Inputs/local_accessor_multiple_accessors.cpp"
diff --git a/sycl/test-e2e/Graph/RecordReplay/local_accessor_multiple_nodes.cpp b/sycl/test-e2e/Graph/RecordReplay/local_accessor_multiple_nodes.cpp
new file mode 100644
index 0000000000000..5830c21a57431
--- /dev/null
+++ b/sycl/test-e2e/Graph/RecordReplay/local_accessor_multiple_nodes.cpp
@@ -0,0 +1,10 @@
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+// Extra run to check for immediate-command-list in Level Zero
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+
+#define GRAPH_E2E_RECORD_REPLAY
+
+#include "../Inputs/local_accessor_multiple_nodes.cpp"
diff --git a/sycl/test-e2e/Graph/RecordReplay/opencl_local_acc.cpp b/sycl/test-e2e/Graph/RecordReplay/opencl_local_acc.cpp
new file mode 100644
index 0000000000000..a924d018ba8ec
--- /dev/null
+++ b/sycl/test-e2e/Graph/RecordReplay/opencl_local_acc.cpp
@@ -0,0 +1,12 @@
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+// Extra run to check for immediate-command-list in Level Zero
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+
+// REQUIRES: ocloc && (opencl || level_zero)
+
+#define GRAPH_E2E_RECORD_REPLAY
+
+#include "../Inputs/opencl_local_acc.cpp"
diff --git a/sycl/test-e2e/Graph/Update/Explicit/whole_update_local_acc.cpp b/sycl/test-e2e/Graph/Update/Explicit/whole_update_local_acc.cpp
index 1db9905457ae7..a99eec42afa02 100644
--- a/sycl/test-e2e/Graph/Update/Explicit/whole_update_local_acc.cpp
+++ b/sycl/test-e2e/Graph/Update/Explicit/whole_update_local_acc.cpp
@@ -5,6 +5,9 @@
 // Extra run to check for immediate-command-list in Level Zero
 // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
 
+// XFAIL: level_zero
+// XFAIL-TRACKER: OFNAAO-422
+
 #define GRAPH_E2E_EXPLICIT
 
 #include "../../Inputs/whole_update_local_acc.cpp"
diff --git a/sycl/test-e2e/Graph/Update/Explicit/whole_update_local_acc_multi.cpp b/sycl/test-e2e/Graph/Update/Explicit/whole_update_local_acc_multi.cpp
new file mode 100644
index 0000000000000..d15ec880d89ad
--- /dev/null
+++ b/sycl/test-e2e/Graph/Update/Explicit/whole_update_local_acc_multi.cpp
@@ -0,0 +1,13 @@
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+// Extra run to check for immediate-command-list in Level Zero
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+
+// XFAIL: level_zero
+// XFAIL-TRACKER: OFNAAO-422
+
+#define GRAPH_E2E_EXPLICIT
+
+#include "../../Inputs/whole_update_local_acc_multi.cpp"
diff --git a/sycl/test-e2e/Graph/Update/RecordReplay/whole_update_local_acc.cpp b/sycl/test-e2e/Graph/Update/RecordReplay/whole_update_local_acc.cpp
index 03645b2f19bfd..ab9bb3fe37fa5 100644
--- a/sycl/test-e2e/Graph/Update/RecordReplay/whole_update_local_acc.cpp
+++ b/sycl/test-e2e/Graph/Update/RecordReplay/whole_update_local_acc.cpp
@@ -5,6 +5,9 @@
 // Extra run to check for immediate-command-list in Level Zero
 // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
 
+// XFAIL: level_zero
+// XFAIL-TRACKER: OFNAAO-422
+
 #define GRAPH_E2E_RECORD_REPLAY
 
 #include "../../Inputs/whole_update_local_acc.cpp"
diff --git a/sycl/test-e2e/Graph/Update/RecordReplay/whole_update_local_acc_multi.cpp b/sycl/test-e2e/Graph/Update/RecordReplay/whole_update_local_acc_multi.cpp
new file mode 100644
index 0000000000000..97a83de1129b3
--- /dev/null
+++ b/sycl/test-e2e/Graph/Update/RecordReplay/whole_update_local_acc_multi.cpp
@@ -0,0 +1,13 @@
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+// Extra run to check for immediate-command-list in Level Zero
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+
+// XFAIL: level_zero
+// XFAIL-TRACKER: OFNAAO-422
+
+#define GRAPH_E2E_RECORD_REPLAY
+
+#include "../../Inputs/whole_update_local_acc_multi.cpp"
diff --git a/sycl/test-e2e/KernelCompiler/opencl_local_mem.cpp b/sycl/test-e2e/KernelCompiler/opencl_local_mem.cpp
new file mode 100644
index 0000000000000..dfc54b786703a
--- /dev/null
+++ b/sycl/test-e2e/KernelCompiler/opencl_local_mem.cpp
@@ -0,0 +1,84 @@
+// Tests using an OpenCL-C defined kernel with multiple local accessors
+
+// REQUIRES: ocloc && (opencl || level_zero)
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+
+#include <sycl/detail/core.hpp>
+#include <sycl/usm.hpp>
+
+using namespace sycl;
+namespace syclex = sycl::ext::oneapi::experimental;
+using source_kb = sycl::kernel_bundle<sycl::bundle_state::ext_oneapi_source>;
+using exe_kb = sycl::kernel_bundle<sycl::bundle_state::executable>;
+
+auto constexpr LocalAccCLSource = R"===(
+    kernel void test_la(__global int *out, __local int* local_ptr1,
+                        __local int2* local_ptr2, int n) {
+        __local int4 local_data[1];
+
+        size_t gid = get_global_id(0);
+        size_t lid = get_local_id(0);
+        size_t wg_size = get_num_groups(0);
+
+        local_ptr1[lid] = lid;
+        local_ptr2[lid].x = n;
+        local_ptr2[lid].y = wg_size;
+
+        if (lid == 0) {
+          local_data[lid] = (int4)(0xA, 0xB, 0xC, 0xD);
+        }
+
+        barrier(CLK_LOCAL_MEM_FENCE);
+
+        int acc = local_data[0].x + local_data[0].y + local_data[0].z +
+                  local_data[0].w;
+        out[gid] = (local_ptr1[lid] * local_ptr2[lid].x) +
+                   (local_ptr2[lid].y * acc);
+    }
+)===";
+
+int main() {
+  sycl::queue Queue;
+
+  source_kb kbSrc = syclex::create_kernel_bundle_from_source(
+      Queue.get_context(), syclex::source_language::opencl, LocalAccCLSource);
+  exe_kb kbExe1 = syclex::build(kbSrc);
+  sycl::kernel test_kernel = kbExe1.ext_oneapi_get_kernel("test_la");
+
+  constexpr size_t Size = 1024;
+  constexpr size_t LocalSize = 256;
+
+  int32_t *Ptr = malloc_device<int32_t>(Size, Queue);
+
+  int32_t N = 42;
+  Queue
+      .submit([&](handler &cgh) {
+        local_accessor<int32_t, 1> acc_local1(LocalSize, cgh);
+        local_accessor<sycl::int2, 1> acc_local2(LocalSize, cgh);
+
+        cgh.set_arg(0, Ptr);
+        cgh.set_arg(1, acc_local1);
+        cgh.set_arg(2, acc_local2);
+        cgh.set_arg(3, N);
+
+        cgh.parallel_for(nd_range<1>(Size, LocalSize), test_kernel);
+      })
+      .wait();
+
+  std::vector<int32_t> HostData(Size);
+  Queue.copy(Ptr, HostData.data(), Size).wait();
+
+  constexpr int32_t Acc = 0xA + 0xB + 0xC + 0xD;
+  constexpr int32_t WorkGroups = Size / LocalSize;
+  constexpr int32_t Tmp = Acc * WorkGroups;
+  for (size_t i = 0; i < Size; i++) {
+    int32_t local_id = i % LocalSize;
+    int32_t Ref = (local_id * N) + Tmp;
+    assert(HostData[i] == Ref);
+  }
+
+  sycl::free(Ptr, Queue);
+
+  return 0;
+}