From da1a93932cc9c9197e46fffd5dd6da4bd862e226 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Wed, 17 Jul 2024 10:45:31 +0200 Subject: [PATCH 1/3] Fix warning warning: unnecessary temporary object created while calling emplace_back [modernize-use-emplace] points.emplace_back(PointLonLat(lonlatView(i, LON), lonlatView(i, LAT))); --- src/atlas/interpolation/method/cubedsphere/CellFinder.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/atlas/interpolation/method/cubedsphere/CellFinder.cc b/src/atlas/interpolation/method/cubedsphere/CellFinder.cc index 9912d16f1..4a77231a0 100644 --- a/src/atlas/interpolation/method/cubedsphere/CellFinder.cc +++ b/src/atlas/interpolation/method/cubedsphere/CellFinder.cc @@ -37,7 +37,7 @@ CellFinder::CellFinder(const Mesh& mesh, const util::Config& config): mesh_{mesh auto halo = config.getInt("halo", 0); for (idx_t i = 0; i < mesh_.cells().size(); ++i) { if (haloView(i) <= halo) { - points.emplace_back(PointLonLat(lonlatView(i, LON), lonlatView(i, LAT))); + points.emplace_back(lonlatView(i, LON), lonlatView(i, LAT)); payloads.emplace_back(i); } } From c0a58f2ddc78841c1145ba1151a4f3f9d09d8781 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 8 Aug 2024 14:56:07 +0000 Subject: [PATCH 2/3] Fix atlas_test_array , allocateDevice() before accMap() --- src/tests/array/test_array.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/tests/array/test_array.cc b/src/tests/array/test_array.cc index 9ce8cbec9..c1ae03e5f 100644 --- a/src/tests/array/test_array.cc +++ b/src/tests/array/test_array.cc @@ -555,6 +555,7 @@ CASE("test_wrap") { CASE("test_acc_map") { Array* ds = Array::create(2, 3, 4); + EXPECT_NO_THROW(ds->allocateDevice()); EXPECT_NO_THROW(ds->accMap()); EXPECT(ds->accMapped() == ATLAS_HAVE_ACC); delete ds; From b4901e94efac1270b9a29b97f3c8d82ae03bde03 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 8 Aug 2024 14:49:22 +0000 Subject: [PATCH 3/3] Fix atlas_test_haloexchange and atlas_test_haloexchange_adjoint --- src/tests/parallel/test_haloexchange.cc | 50 +++--- .../parallel/test_haloexchange_adjoint.cc | 156 +++++++++++------- 2 files changed, 125 insertions(+), 81 deletions(-) diff --git a/src/tests/parallel/test_haloexchange.cc b/src/tests/parallel/test_haloexchange.cc index 3bf2ccdd2..bdc8f5c90 100644 --- a/src/tests/parallel/test_haloexchange.cc +++ b/src/tests/parallel/test_haloexchange.cc @@ -105,7 +105,7 @@ struct validate { }; struct Fixture { - Fixture(bool on_device): on_device_(on_device) { + Fixture(bool on_device = false): on_device_(on_device) { int nnodes_c[] = {5, 6, 7}; nb_nodes = vec(nnodes_c); N = nb_nodes[mpi::comm().rank()]; @@ -226,8 +226,6 @@ void test_rank1_strided_v1(Fixture& f) { arrv_t(j, 1) = (size_t(f.part[j]) != mpi::comm().rank() ? 0 : f.gidx[j] * 100); } - arr_t.syncHostDevice(); - // create a wrap array where we fake the strides in a way that the second // dimension // (number of components) contains only one component but the associated @@ -240,13 +238,11 @@ void test_rank1_strided_v1(Fixture& f) { array::ArraySpec { array::make_shape(f.N, 1), #if ATLAS_GRIDTOOLS_STORAGE_BACKEND_CUDA - array::make_strides(32, 1) - } + array::make_strides(32, 1) #else - array::make_strides(2, 1) - } + array::make_strides(2, 1) #endif - )); + })); arr->syncHostDevice(); @@ -282,8 +278,6 @@ void test_rank1_strided_v2(Fixture& f) { arrv_t(j, 1) = (size_t(f.part[j]) != mpi::comm().rank() ? 0 : f.gidx[j] * 100); } - arr_t.syncHostDevice(); - // create a wrap array where we fake the strides in a way that the second // dimension // (number of components) contains only one component but the associated @@ -295,12 +289,14 @@ void test_rank1_strided_v2(Fixture& f) { &(arrv_t(0, 1)), array::ArraySpec { array::make_shape(f.N, 1), #if ATLAS_GRIDTOOLS_STORAGE_BACKEND_CUDA - array::make_strides(32, 1) + array::make_strides(32, 1) #else - array::make_strides(2, 1) + array::make_strides(2, 1) #endif })); + arr->syncHostDevice(); + f.halo_exchange.execute(*arr, false); switch (mpi::comm().rank()) { @@ -373,23 +369,22 @@ void test_rank2_l1(Fixture& f) { (size_t(f.part[p]) != mpi::comm().rank() ? 0 : f.gidx[p] * std::pow(10, i)); } } - arr_t.syncHostDevice(); std::unique_ptr arr(array::Array::wrap( arrv_t.data(), array::ArraySpec { array::make_shape(f.N, 1, 2), #if ATLAS_GRIDTOOLS_STORAGE_BACKEND_CUDA - array::make_strides(96, 32, 1) + array::make_strides(96, 32, 1) #else - array::make_strides(6, 2, 1) + array::make_strides(6, 2, 1) #endif })); - arr_t.syncHostDevice(); + arr->syncHostDevice(); f.halo_exchange.execute(*arr, false); - arr_t.syncHostDevice(); + arr->syncHostDevice(); switch (mpi::comm().rank()) { case 0: { @@ -443,9 +438,9 @@ void test_rank2_l2_v2(Fixture& f) { &arrv_t(0, 1, 1), array::ArraySpec { array::make_shape(f.N, 1, 1), #if ATLAS_GRIDTOOLS_STORAGE_BACKEND_CUDA - array::make_strides(192, 32, 1) + array::make_strides(192, 32, 1) #else - array::make_strides(6, 2, 1) + array::make_strides(6, 2, 1) #endif })); @@ -503,9 +498,9 @@ void test_rank2_v2(Fixture& f) { &arrv_t(0, 0, 1), array::ArraySpec { array::make_shape(f.N, 3, 1), #if ATLAS_GRIDTOOLS_STORAGE_BACKEND_CUDA - array::make_strides(192, 32, 2) + array::make_strides(192, 32, 2) #else - array::make_strides(6, 2, 2) + array::make_strides(6, 2, 2) #endif })); @@ -678,7 +673,7 @@ void test_rank1_cinterface(Fixture& f) { } CASE("test_haloexchange") { - Fixture f(false); + Fixture f; SECTION("test_rank0_arrview") { test_rank0_arrview(f); } @@ -701,20 +696,25 @@ CASE("test_haloexchange") { SECTION("test_rank1_paralleldim_1") { test_rank1_paralleldim1(f); } SECTION("test_rank2_paralleldim_2") { test_rank2_paralleldim2(f); } + SECTION("test_rank1_cinterface") { test_rank1_cinterface(f); } +} #if ATLAS_GRIDTOOLS_STORAGE_BACKEND_CUDA - f.on_device_ = true; +CASE("test_haloexchange on device") { + bool on_device = true; + Fixture f(on_device); SECTION("test_rank0_arrview") { test_rank0_arrview(f); } SECTION("test_rank1") { test_rank1(f); } SECTION("test_rank2") { test_rank2(f); } - SECTION("test_rank0_wrap") { test_rank0_wrap(f); } -#endif + SECTION("test_rank0_wrap") { test_rank0_wrap(f); } } +#endif + //----------------------------------------------------------------------------- diff --git a/src/tests/parallel/test_haloexchange_adjoint.cc b/src/tests/parallel/test_haloexchange_adjoint.cc index 816b68441..392bc3575 100644 --- a/src/tests/parallel/test_haloexchange_adjoint.cc +++ b/src/tests/parallel/test_haloexchange_adjoint.cc @@ -183,11 +183,15 @@ void test_rank0_arrview(Fixture& f) { } } - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } f.halo_exchange_std->execute_adjoint(arr, f.on_device_); - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } switch (mpi::comm().rank()) { case 0: { @@ -240,11 +244,15 @@ void test_rank0_arrview_adj_test(Fixture& f) { arrv(j) = arrv_init(j); } - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } f.halo_exchange_std->execute(arr, f.on_device_); - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } // sum1 POD sum1(0); @@ -252,11 +260,15 @@ void test_rank0_arrview_adj_test(Fixture& f) { sum1 += arrv(j) * arrv(j); } - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } f.halo_exchange_std->execute_adjoint(arr, f.on_device_); - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } // sum2 POD sum2(0); @@ -299,11 +311,15 @@ void test_rank1(Fixture& f) { } } - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } f.halo_exchange_std->execute_adjoint(arr, f.on_device_); - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } switch (mpi::comm().rank()) { case 0: { @@ -336,11 +352,15 @@ void test_rank1_adj_test(Fixture& f) { arrv(j, 1ul) = arrv_init(j, 1ul); } - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } f.halo_exchange_std->execute(arr, f.on_device_); - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } // sum1 POD sum1(0); @@ -350,11 +370,15 @@ void test_rank1_adj_test(Fixture& f) { } } - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } f.halo_exchange_std->execute_adjoint(arr, f.on_device_); - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } // sum2 POD sum2(0); @@ -399,8 +423,6 @@ void test_rank1_strided_v1(Fixture& f) { } } - arr_t.syncHostDevice(); - // create a wrap array where we fake the strides in a way that the second // dimension // (number of components) contains only one component but the associated @@ -460,9 +482,6 @@ void test_rank1_strided_v1_adj_test(Fixture& f) { arrv_t(j, 1ul) = arrv_init_t(j, 1ul); } - arr_init_t.syncHostDevice(); - arr_t.syncHostDevice(); - // create a wrap array where we fake the strides in a way that the second // dimension // (number of components) contains only one component but the associated @@ -483,11 +502,15 @@ void test_rank1_strided_v1_adj_test(Fixture& f) { #endif )); - arr->syncHostDevice(); + if (f.on_device_) { + arr->syncHostDevice(); + } f.halo_exchange_std->execute(*arr, f.on_device_); - arr->syncHostDevice(); + if (f.on_device_) { + arr->syncHostDevice(); + } // sum1 POD sum1(0); @@ -497,11 +520,15 @@ void test_rank1_strided_v1_adj_test(Fixture& f) { } } - arr->syncHostDevice(); + if (f.on_device_) { + arr->syncHostDevice(); + } f.halo_exchange_std->execute_adjoint(*arr, f.on_device_); - arr->syncHostDevice(); + if (f.on_device_) { + arr->syncHostDevice(); + } // sum2 POD sum2(0); @@ -546,8 +573,6 @@ void test_rank1_strided_v2(Fixture& f) { } } - arr_t.syncHostDevice(); - // create a wrap array where we fake the strides in a way that the second // dimension // (number of components) contains only one component but the associated @@ -565,6 +590,10 @@ void test_rank1_strided_v2(Fixture& f) { #endif })); + if (f.on_device_) { + arr->syncHostDevice(); + } + f.halo_exchange_std->execute_adjoint(*arr, false); switch (mpi::comm().rank()) { @@ -599,9 +628,6 @@ void test_rank1_strided_v2_adj_test(Fixture& f) { arrv_t(j, 1ul) = arrv_init_t(j, 1ul); } - arr_init_t.syncHostDevice(); - arr_t.syncHostDevice(); - // create a wrap array where we fake the strides in a way that the second // dimension // (number of components) contains only one component but the associated @@ -619,11 +645,15 @@ void test_rank1_strided_v2_adj_test(Fixture& f) { #endif })); - arr->syncHostDevice(); + if (f.on_device_) { + arr->syncHostDevice(); + } f.halo_exchange_std->execute(*arr, f.on_device_); - arr->syncHostDevice(); + if (f.on_device_) { + arr->syncHostDevice(); + } // sum1 POD sum1(0); @@ -633,11 +663,15 @@ void test_rank1_strided_v2_adj_test(Fixture& f) { } } - arr->syncHostDevice(); + if (f.on_device_) { + arr->syncHostDevice(); + } f.halo_exchange_std->execute_adjoint(*arr, f.on_device_); - arr->syncHostDevice(); + if (f.on_device_) { + arr->syncHostDevice(); + } // sum2 POD sum2(0); @@ -692,11 +726,15 @@ void test_rank2(Fixture& f) { } } - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } f.halo_exchange_std->execute_adjoint(arr, f.on_device_); - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } switch (mpi::comm().rank()) { case 0: { @@ -738,7 +776,9 @@ void test_rank2_adj_test(Fixture& f) { } } - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } f.halo_exchange_std->execute(arr, f.on_device_); @@ -752,11 +792,15 @@ void test_rank2_adj_test(Fixture& f) { } } - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } f.halo_exchange_std->execute_adjoint(arr, f.on_device_); - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } // sum2 POD sum2(0); @@ -810,8 +854,6 @@ void test_rank2_l1(Fixture& f) { } } - arr_t.syncHostDevice(); - std::unique_ptr arr(array::Array::wrap( arrv_t.data(), array::ArraySpec { array::make_shape(f.N, 1, 2), @@ -822,11 +864,8 @@ void test_rank2_l1(Fixture& f) { #endif })); - arr_t.syncHostDevice(); - f.halo_exchange_std->execute_adjoint(*arr, false); - arr_t.syncHostDevice(); switch (mpi::comm().rank()) { case 0: { @@ -877,7 +916,6 @@ void test_rank2_l1_adj_test(Fixture& f) { arrv_t(p, i, static_cast(1)) = arrv_init_t(p, i, static_cast(1)); } } - arr_t.syncHostDevice(); std::unique_ptr arr(array::Array::wrap( arrv_t.data(), array::ArraySpec { @@ -889,12 +927,8 @@ void test_rank2_l1_adj_test(Fixture& f) { #endif })); - arr_t.syncHostDevice(); - f.halo_exchange_std->execute(*arr, false); - arr_t.syncHostDevice(); - // sum1 POD sum1(0); for (std::size_t p = 0; p < static_cast(f.N); ++p) { @@ -905,12 +939,8 @@ void test_rank2_l1_adj_test(Fixture& f) { } } - arr->syncHostDevice(); - f.halo_exchange_std->execute_adjoint(*arr, false); - arr->syncHostDevice(); - // sum2 POD sum2(0); for (std::size_t p = 0; p < static_cast(f.N); ++p) { @@ -1153,7 +1183,8 @@ void test_rank2_v2(Fixture& f) { } void test_rank0_wrap(Fixture& f) { - std::unique_ptr arr(array::Array::wrap(f.gidx.data(), array::make_shape(f.N))); + std::vector existing = f.gidx; + std::unique_ptr arr(array::Array::wrap(existing.data(), array::make_shape(f.N))); array::ArrayView arrv = array::make_view(*arr); switch (mpi::comm().rank()) { case 0: { @@ -1179,11 +1210,15 @@ void test_rank0_wrap(Fixture& f) { } } - arr->syncHostDevice(); + if (f.on_device_) { + arr->syncHostDevice(); + } f.halo_exchange_std->execute_adjoint(*arr, f.on_device_); - arr->syncHostDevice(); + if (f.on_device_) { + arr->syncHostDevice(); + } switch (mpi::comm().rank()) { case 0: { @@ -1202,6 +1237,7 @@ void test_rank0_wrap(Fixture& f) { break; } } + arr->deallocateDevice(); } void test_rank0_wrap_adj_test(Fixture& f) { @@ -1226,7 +1262,9 @@ void test_rank0_wrap_adj_test(Fixture& f) { arrv_init(j) = arrv(j); } - arr->syncHostDevice(); + if (f.on_device_) { + arr->syncHostDevice(); + } f.halo_exchange_std->execute(*arr, f.on_device_); @@ -1236,11 +1274,15 @@ void test_rank0_wrap_adj_test(Fixture& f) { sum1 += arrv(j) * arrv(j); } - arr->syncHostDevice(); + if (f.on_device_) { + arr->syncHostDevice(); + } f.halo_exchange_std->execute_adjoint(*arr, f.on_device_); - arr->syncHostDevice(); + if (f.on_device_) { + arr->syncHostDevice(); + } // sum2 POD sum2(0); @@ -1485,7 +1527,9 @@ void test_rank1_cinterface(Fixture& f) { } } - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } int shapes[2] = {(int)arrv.shape(0), (int)arrv.shape(1)}; int strides[2] = {(int)arrv.stride(0), (int)arrv.stride(1)};