diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index fbb41bf93..b7037788f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -121,11 +121,11 @@ jobs: - name: macos # Xcode compiler requires empty environment variables, so we pass null (~) here - os: macos-12 - compiler: clang-14 + os: macos-14 + compiler: clang-15 compiler_cc: ~ compiler_cxx: ~ - compiler_fc: gfortran-11 + compiler_fc: gfortran-13 caching: true coverage: false cmake_options: -DMPI_SLOTS=4 diff --git a/CHANGELOG.md b/CHANGELOG.md index e79acf4aa..6417ccd00 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,19 @@ This project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html ## [Unreleased] +## [0.39.0] - 2024-09-18 + +### Added +- Add HIC abstraction layer for HIP and CUDA by @wdeconinck in https://github.com/ecmwf/atlas/pull/219 +- Support for HIP via HIC +- Add regional interpolation by @benjaminmenetrier in https://github.com/ecmwf/atlas/pull/215 +- Pack vector components into higher-rank vector fields. by @odlomax in https://github.com/ecmwf/atlas/pull/218 + +### Fixed +- Fix missing header in FieldImpl.h by @tehrengruber in https://github.com/ecmwf/atlas/pull/214 +- Bug fixes to vector interpolation with StructuredColumns and spherical vector interpolation by @MarekWlasak in https://github.com/ecmwf/atlas/pull/222 + + ## [0.38.1] - 2024-07-15 ### Fixed @@ -551,6 +564,7 @@ Fix StructuredInterpolation2D with retry for failed stencils ## 0.13.0 - 2018-02-16 [Unreleased]: https://github.com/ecmwf/atlas/compare/master...develop +[0.39.0]: https://github.com/ecmwf/atlas/compare/0.38.1...0.39.0 [0.38.1]: https://github.com/ecmwf/atlas/compare/0.38.0...0.38.1 [0.38.0]: https://github.com/ecmwf/atlas/compare/0.37.0...0.38.0 [0.37.0]: https://github.com/ecmwf/atlas/compare/0.36.0...0.37.0 diff --git a/CMakeLists.txt b/CMakeLists.txt index df9963eaa..8f89d8fa6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -64,6 +64,9 @@ ecbuild_add_option( FEATURE ECKIT_DEVELOP DESCRIPTION "Used to enable new features or API depending on eckit develop branch, not yet in a tagged release" DEFAULT OFF ) +add_subdirectory( hic ) +find_package(hic) + include( features/BOUNDSCHECKING ) include( features/FORTRAN ) diff --git a/VERSION b/VERSION index bb22182d4..4ef2eb086 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.38.1 +0.39.0 diff --git a/cmake/atlas-import.cmake.in b/cmake/atlas-import.cmake.in index 98b7c9bb2..1c812e4de 100644 --- a/cmake/atlas-import.cmake.in +++ b/cmake/atlas-import.cmake.in @@ -23,6 +23,7 @@ if( atlas_HAVE_FORTRAN ) endif() find_dependency( atlas_io HINTS ${CMAKE_CURRENT_LIST_DIR}/../atlas_io @atlas_io_DIR@ @atlas_io_BINARY_DIR@ ) +find_dependency( hic HINTS ${CMAKE_CURRENT_LIST_DIR}/../hic @hic_DIR@ @hic_BINARY_DIR@ ) ## Eigen3 set( Eigen3_HINT @Eigen3_DIR@ ) diff --git a/cmake/atlas_compile_flags.cmake b/cmake/atlas_compile_flags.cmake index 8b334f149..8261a407d 100644 --- a/cmake/atlas_compile_flags.cmake +++ b/cmake/atlas_compile_flags.cmake @@ -11,8 +11,9 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) if( CMAKE_CXX_COMPILER_ID MATCHES Cray ) - - ecbuild_add_cxx_flags("-hnomessage=3140" NAME atlas_cxx_disable_warnings ) # colon separated numbers + if( NOT CMAKE_CXX_COMPILER_ID MATCHES CrayClang ) + ecbuild_add_cxx_flags("-hnomessage=3140" NAME atlas_cxx_disable_warnings ) # colon separated numbers + endif() ecbuild_add_fortran_flags("-hnomessage=3140" NAME atlas_fortran_disable_warnings ) # colon separated numbers # CC-3140 crayc++: WARNING File = atlas/functionspace/NodeColumns.cc, Line = 1, Column = 1 diff --git a/cmake/atlas_host_device.cmake b/cmake/atlas_host_device.cmake index 3a404a8e4..d4e9a5d67 100644 --- a/cmake/atlas_host_device.cmake +++ b/cmake/atlas_host_device.cmake @@ -6,7 +6,7 @@ # granted to it by virtue of its status as an intergovernmental organisation nor # does it submit to any jurisdiction. -function( create_cuda_wrapper variable ) +function( create_hic_wrapper variable ) set( options "" ) set( single_value_args SOURCE ) set( multi_value_args "" ) @@ -17,18 +17,24 @@ function( create_cuda_wrapper variable ) get_filename_component(name ${_PAR_SOURCE} NAME) get_filename_component(abspath ${_PAR_SOURCE} ABSOLUTE) + if( HAVE_CUDA ) + set( extension "cu" ) + elseif( HAVE_HIP ) + set( extension "hip" ) + endif() + if( directory ) - set(cuda_wrapper ${CMAKE_CURRENT_BINARY_DIR}/${directory}/${base}.cu) + set(hic_wrapper ${CMAKE_CURRENT_BINARY_DIR}/${directory}/${base}.${extension}) else() - set(cuda_wrapper ${CMAKE_CURRENT_BINARY_DIR}/${base}.cu) + set(hic_wrapper ${CMAKE_CURRENT_BINARY_DIR}/${base}.${extension}) endif() - set(${variable} ${cuda_wrapper} PARENT_SCOPE) + set(${variable} ${hic_wrapper} PARENT_SCOPE) set(content " #include \"atlas/${directory}/${name}\" ") - if( ${abspath} IS_NEWER_THAN ${cuda_wrapper} ) - file(WRITE ${cuda_wrapper} "${content}") + if( ${abspath} IS_NEWER_THAN ${hic_wrapper} ) + file(WRITE ${hic_wrapper} "${content}") endif() endfunction() @@ -40,12 +46,12 @@ function( atlas_host_device srclist ) set( multi_value_args SOURCES ) cmake_parse_arguments( _PAR "${options}" "${single_value_args}" "${multi_value_args}" ${ARGN} ) - if( ATLAS_GRIDTOOLS_STORAGE_BACKEND_CUDA ) - set( use_cuda_srclist ${_PAR_SOURCES} ) + if( HAVE_GPU ) + set( use_hic_srclist ${_PAR_SOURCES} ) - foreach( src ${use_cuda_srclist} ) - create_cuda_wrapper( cuda_wrapper SOURCE ${src} ) - list( APPEND ${srclist} ${cuda_wrapper} ) + foreach( src ${use_hic_srclist} ) + create_hic_wrapper( hic_wrapper SOURCE ${src} ) + list( APPEND ${srclist} ${hic_wrapper} ) ecbuild_list_exclude_pattern( LIST ${srclist} REGEX ${src} ) endforeach() diff --git a/cmake/features/ACC.cmake b/cmake/features/ACC.cmake index 1464d9bc0..056b2c812 100644 --- a/cmake/features/ACC.cmake +++ b/cmake/features/ACC.cmake @@ -3,9 +3,14 @@ if( atlas_HAVE_ATLAS_FIELD ) set( ATLAS_ACC_CAPABLE FALSE ) -if( HAVE_CUDA ) +if( HAVE_GPU ) if( CMAKE_Fortran_COMPILER_ID MATCHES "PGI|NVHPC" ) set( ATLAS_ACC_CAPABLE TRUE ) + else() + find_package(OpenACC COMPONENTS C Fortran) + if(OpenACC_Fortran_FOUND AND OpenACC_C_FOUND) + set( ATLAS_ACC_CAPABLE TRUE ) + endif() endif() endif() @@ -22,6 +27,9 @@ if( atlas_HAVE_ACC ) if( NOT ACC_C_COMPILER ) ecbuild_error( "Could not find OpenACC capable C compiler" ) endif() + else() + set( ACC_Fortran_FLAGS ${OpenACC_Fortran_FLAGS} ) + set( ACC_C_FLAGS ${OpenACC_C_FLAGS} ) endif() endif() diff --git a/cmake/features/CUDA.cmake b/cmake/features/CUDA.cmake index 1bd57058a..80d5723d8 100644 --- a/cmake/features/CUDA.cmake +++ b/cmake/features/CUDA.cmake @@ -1,15 +1,37 @@ -ecbuild_add_option( FEATURE CUDA - DESCRIPTION "Enable CUDA support" - DEFAULT OFF - ) +# ecbuild_add_option( FEATURE CUDA +# DESCRIPTION "Enable CUDA support" +# DEFAULT OFF +# ) +# ecbuild_add_option( FEATURE HIP +# DESCRIPTION "Enable CUDA support" +# DEFAULT OFF +# ) -if( HAVE_CUDA ) - - enable_language( CUDA ) - ecbuild_info( "CUDA language enabled" ) - - find_package( CUDAToolkit REQUIRED ) +set( atlas_HAVE_CUDA 0 ) +set( atlas_HAVE_HIP 0 ) +set( atlas_HAVE_GPU 0 ) +if( hic_HAVE_CUDA ) + enable_language( CUDA ) + ecbuild_info( "CUDA language enabled" ) + find_package(CUDAToolkit REQUIRED) + set( atlas_HAVE_CUDA 1 ) + set( atlas_HAVE_GPU 1 ) +elseif( hic_HAVE_HIP ) + enable_language( HIP ) + ecbuild_info( "HIP language enabled" ) + find_package(hip CONFIG REQUIRED) + set( atlas_HAVE_HIP 1 ) + set( atlas_HAVE_GPU 1 ) endif() +set( HAVE_CUDA ${atlas_HAVE_CUDA} ) +set( HAVE_HIP ${atlas_HAVE_HIP} ) +set( HAVE_GPU ${atlas_HAVE_GPU} ) + +if( HAVE_GPU ) + ecbuild_info("GPU support enabled") +else() + ecbuild_info("GPU support not enabled") +endif() \ No newline at end of file diff --git a/cmake/project_summary.cmake b/cmake/project_summary.cmake index eb4face8b..62ead6584 100644 --- a/cmake/project_summary.cmake +++ b/cmake/project_summary.cmake @@ -60,10 +60,10 @@ if( atlas_HAVE_GRIDTOOLS_STORAGE ) else() - if( NOT atlas_HAVE_CUDA ) + if( NOT atlas_HAVE_GPU ) ecbuild_info( "Array storage backend: Native [HOST]" ) else() - ecbuild_info( "Array storage backend: Native [CUDA]" ) + ecbuild_info( "Array storage backend: Native [GPU]" ) endif() endif() diff --git a/hic/AUTHORS b/hic/AUTHORS new file mode 100644 index 000000000..e4c77d93c --- /dev/null +++ b/hic/AUTHORS @@ -0,0 +1,8 @@ +Authors +======= + +- Willem Deconinck + +Thanks for contributions from +============================= + diff --git a/hic/CHANGELOG.md b/hic/CHANGELOG.md new file mode 100644 index 000000000..e69de29bb diff --git a/hic/CMakeLists.txt b/hic/CMakeLists.txt new file mode 100644 index 000000000..904f7a077 --- /dev/null +++ b/hic/CMakeLists.txt @@ -0,0 +1,56 @@ +# (C) Copyright 2024- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation nor +# does it submit to any jurisdiction. + +############################################################################################ +# hic + +cmake_minimum_required( VERSION 3.18 FATAL_ERROR ) + +find_package( ecbuild 3.4 REQUIRED HINTS ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/../ecbuild ${CMAKE_CURRENT_SOURCE_DIR}/../../ecbuild ) + +################################################################################ +# Initialise project + +if( NOT DEFINED atlas_VERSION ) + set( atlas_VERSION 0.0.0 ) +endif() + +project( hic VERSION ${atlas_VERSION} LANGUAGES CXX ) + +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +################################################################################ +# Features that can be enabled / disabled with -DENABLE_ + +ecbuild_add_option( FEATURE CUDA DESCRIPTION "Use CUDA as backend to HIC" DEFAULT OFF ) +ecbuild_add_option( FEATURE HIP DESCRIPTION "Use HIP as backend to HIC" DEFAULT OFF ) + +if( HAVE_CUDA ) + find_package(CUDAToolkit REQUIRED) +elseif( HAVE_HIP ) + find_package(hip CONFIG REQUIRED) +endif() + +add_subdirectory( src ) +add_subdirectory( tests ) + +################################################################################ +# Export and summarize + +ecbuild_add_resources( + TARGET hic-others + SOURCES_PACK + README.md + CHANGELOG.md + LICENSE +) + +ecbuild_install_project( NAME hic ) +ecbuild_print_summary() + diff --git a/hic/LICENSE b/hic/LICENSE new file mode 100644 index 000000000..af6ca0287 --- /dev/null +++ b/hic/LICENSE @@ -0,0 +1,190 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + Copyright 1996-2018 ECMWF + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/hic/README.md b/hic/README.md new file mode 100644 index 000000000..1b756fb40 --- /dev/null +++ b/hic/README.md @@ -0,0 +1,5 @@ +hic +=== + +HIC: An abstraction to HIP and CUDA + diff --git a/hic/cmake/hic-import.cmake.in b/hic/cmake/hic-import.cmake.in new file mode 100644 index 000000000..23ac59885 --- /dev/null +++ b/hic/cmake/hic-import.cmake.in @@ -0,0 +1,13 @@ + +include( CMakeFindDependencyMacro ) + +set( hic_HAVE_CUDA @hic_HAVE_CUDA@ ) +set( hic_HAVE_HIP @hic_HAVE_HIP@ ) + +if( hic_HAVE_CUDA ) + find_dependency( CUDAToolkit ) +endif() +if( hic_HAVE_HIP ) + find_dependency( hip CONFIG ) +endif() + diff --git a/hic/src/CMakeLists.txt b/hic/src/CMakeLists.txt new file mode 100644 index 000000000..5048de357 --- /dev/null +++ b/hic/src/CMakeLists.txt @@ -0,0 +1,39 @@ +# (C) Copyright 2024- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation nor +# does it submit to any jurisdiction. + +set(HIC_BACKEND_CUDA 0) +set(HIC_BACKEND_HIP 0) +set(HIC_BACKEND_DUMMY 0) +if( HAVE_CUDA ) + set(HIC_BACKEND_CUDA 1) +elseif( HAVE_HIP ) + set(HIC_BACKEND_HIP 1) +else() + set(HIC_BACKEND_DUMMY 1) +endif() + +configure_file(${PROJECT_SOURCE_DIR}/src/hic/hic_config.h.in ${PROJECT_BINARY_DIR}/src/hic/hic_config.h @ONLY ) + +ecbuild_add_library( TARGET hic + TYPE INTERFACE + PUBLIC_INCLUDES + $ + $ + $ +) + +install( FILES ${PROJECT_BINARY_DIR}/src/hic/hic_config.h DESTINATION include/hic ) +install( FILES hic/hic.h DESTINATION include/hic ) +install( FILES hic/hic_runtime.h DESTINATION include/hic ) +install( FILES hic/hic_dummy/hic_dummy_runtime.h DESTINATION include/hic/hic_dummy ) + +if( HAVE_CUDA ) + target_link_libraries( hic INTERFACE CUDA::cudart ) +elseif( HAVE_HIP ) + target_link_libraries( hic INTERFACE hip::host ) +endif() diff --git a/hic/src/hic/hic.h b/hic/src/hic/hic.h new file mode 100644 index 000000000..66a2530bf --- /dev/null +++ b/hic/src/hic/hic.h @@ -0,0 +1,38 @@ +/* + * (C) Copyright 2024- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ +#pragma once + +#include "hic/hic_config.h" +#include "hic/hic_runtime.h" + +#include +#include + +#if HIC_BACKEND_DUMMY +#define HIC_CALL(val) +#else +#define HIC_CALL(val) hic_assert((val), #val, __FILE__, __LINE__) +#endif + +inline void hic_assert(hicError_t err, const char* const func, const char* const file, const int line) { + // Ignore errors when HIP/CUDA runtime is unloaded or deinitialized. + // This happens when calling HIP/CUDA after main has ended, e.g. in teardown of static variables calling `hicFree` + // --> ignore hicErrorDeinitialized (a.k.a. cudaErrorCudartUnloading / hipErrorDeinitialized) + if (err != hicSuccess && err != hicErrorDeinitialized) { + std::ostringstream msg; + msg << "HIC Runtime Error [code="< +#include + +#define DUMMY_SHOULD_NOT_BE_CALLED(SYMBOL) dummyShouldNotBeCalled( #SYMBOL ) +#define DUMMY_FUNCTION(SYMBOL) \ + template inline \ + dummyError_t dummy##SYMBOL(Args&&... args) { \ + DUMMY_SHOULD_NOT_BE_CALLED( hic##SYMBOL ); \ + return dummyError_t{0}; \ + } +#define DUMMY_VALUE(SYMBOL) \ + constexpr int dummy##SYMBOL = 0; + +namespace { + +[[noreturn]] void dummyShouldNotBeCalled(const char* symbol) { + throw std::runtime_error(std::string(symbol)+" is using the dummy backend and should not be called"); +} + +using dummyError_t = int; +using dummyEvent_t = void*; +using dummyStream_t = void*; + +const char* dummyGetErrorString(dummyError_t) { + DUMMY_SHOULD_NOT_BE_CALLED( hicGetErrorString ); +} + +dummyError_t dummyGetLastError( void ) { + DUMMY_SHOULD_NOT_BE_CALLED( hicGetLastError ); +} + +dummyError_t dummyPeekAtLastError( void ) { + DUMMY_SHOULD_NOT_BE_CALLED( hicPeekAtLastError ); +} + +struct dummyPointerAttributes { + int type{0}; + int device{-2}; + void* hostPointer{nullptr}; + void* devicePointer{nullptr}; +}; + +DUMMY_FUNCTION(DeviceSynchronize) +DUMMY_FUNCTION(Free) +DUMMY_FUNCTION(FreeAsync) +DUMMY_FUNCTION(GetDeviceCount); +DUMMY_FUNCTION(GetErrorString) +DUMMY_FUNCTION(GetLastError) +DUMMY_FUNCTION(HostGetDevicePointer) +DUMMY_FUNCTION(HostRegister) +DUMMY_FUNCTION(HostUnregister) +DUMMY_FUNCTION(Malloc) +DUMMY_FUNCTION(MallocAsync) +DUMMY_FUNCTION(MallocManaged) +DUMMY_FUNCTION(Memcpy) +DUMMY_FUNCTION(Memcpy2D) +DUMMY_FUNCTION(MemcpyAsync) +DUMMY_FUNCTION(Memcpy2DAsync) +DUMMY_FUNCTION(MemPrefetchAsync) +DUMMY_FUNCTION(StreamCreate) +DUMMY_FUNCTION(StreamDestroy) +DUMMY_FUNCTION(StreamSynchronize) +DUMMY_FUNCTION(PointerGetAttributes) + +DUMMY_VALUE(CpuDeviceId) +DUMMY_VALUE(HostRegisterMapped) +DUMMY_VALUE(MemoryTypeDevice) +DUMMY_VALUE(MemoryTypeHost) +DUMMY_VALUE(MemoryTypeUnregistered) +DUMMY_VALUE(MemoryTypeManaged) +DUMMY_VALUE(MemcpyDeviceToHost) +DUMMY_VALUE(MemcpyHostToDevice) +DUMMY_VALUE(Success) + +} + +#undef DUMMY_FUNCTION +#undef DUMMY_VALUE +#undef DUMMY_SHOULD_NOT_BE_CALLED \ No newline at end of file diff --git a/hic/src/hic/hic_runtime.h b/hic/src/hic/hic_runtime.h new file mode 100644 index 000000000..25af23cb0 --- /dev/null +++ b/hic/src/hic/hic_runtime.h @@ -0,0 +1,180 @@ +/* + * (C) Copyright 2024- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ +#pragma once + +#if !defined(HIC_NAMESPACE) + #define HIC_NAMESPACE + #define HIC_NAMESPACE_BEGIN + #define HIC_NAMESPACE_END +#else + #define HIC_NAMESPACE_BEGIN namespace HIC_NAMESPACE { + #define HIC_NAMESPACE_END } +#endif + +#if HIC_BACKEND_CUDA + #define HIC_BACKEND cuda + #include +#elif HIC_BACKEND_HIP + #define HIC_BACKEND hip + #if defined(DEPRECATED) + #define DEFINED_OUTERSCOPE DEPRECATED + #undef DEPRECATED + #endif + #include + #if defined(DEPRECATED) + #undef DEPRECATED + #endif + #if defined(DEFINED_OUTERSCOPE) + #define DEPRECATED DEFINED_OUTERSCOPE + #endif + +#if HIP_VERSION_MAJOR < 6 + enum hicMemoryType { + hicMemoryTypeUnregistered = 0 , + hicMemoryTypeHost = 1 , + hicMemoryTypeDevice = 2 , + hicMemoryTypeManaged = 3 + }; + struct hicPointerAttributes { + hicMemoryType type; + int device; + void* devicePointer; + void* hostPointer; + }; + + [[nodiscard]] inline hipError_t hicPointerGetAttributes(hicPointerAttributes* attributes, const void* ptr) { + hipPointerAttribute_t attr_; + auto err = hipPointerGetAttributes(&attr_, ptr); + if( err != hipSuccess ) { + attributes->device = 0; + attributes->devicePointer = nullptr; + attributes->hostPointer = nullptr; + return err; + } + const auto& type = attr_.memoryType; + if(attr_.isManaged) { + attributes->type = hicMemoryTypeManaged; + } + else if(type == hipMemoryTypeHost) { + attributes->type = hicMemoryTypeHost; + } + else if(type == hipMemoryTypeDevice) { + attributes->type = hicMemoryTypeDevice; + } + else if(type == hipMemoryTypeUnified) { + attributes->type = hicMemoryTypeManaged; + } + else { + attributes->type = hicMemoryTypeUnregistered; + } + attributes->device = attr_.device; + attributes->devicePointer = attr_.devicePointer; + attributes->hostPointer = attr_.hostPointer; + return err; + }; +#endif + +#elif HIC_BACKEND_DUMMY + #define HIC_BACKEND dummy + #include "hic/hic_dummy/hic_dummy_runtime.h" +#else + #error Unsupported hic backend. Please define HIC_BACKEND_CUDA or HIC_BACKEND_HIP or HIC_BACKEND_DUMMY +#endif + +#define HIC_PREFIX hic +#define HIC_CONCAT_(A, B) A ## B +#define HIC_CONCAT(A, B) HIC_CONCAT_(A, B) +#define HIC_SYMBOL(API) HIC_CONCAT(HIC_PREFIX, API) +#define HIC_BACKEND_SYMBOL(API) HIC_CONCAT(HIC_BACKEND, API) + +#define HIC_TYPE(TYPE) \ + using HIC_SYMBOL(TYPE) = HIC_BACKEND_SYMBOL(TYPE); + +#define HIC_FUNCTION(FUNCTION) \ + template inline \ + auto HIC_SYMBOL(FUNCTION)(Args&&... args) -> decltype(HIC_BACKEND_SYMBOL(FUNCTION)(std::forward(args)...)) { \ + return HIC_BACKEND_SYMBOL(FUNCTION)(std::forward(args)...); \ + } + +#define HIC_VALUE(VALUE) \ + constexpr decltype(HIC_BACKEND_SYMBOL(VALUE)) HIC_SYMBOL(VALUE) = HIC_BACKEND_SYMBOL(VALUE); + +//------------------------------------------------ +HIC_NAMESPACE_BEGIN +//------------------------------------------------ + +HIC_FUNCTION(DeviceSynchronize) +HIC_FUNCTION(Free) +HIC_FUNCTION(FreeAsync) +HIC_FUNCTION(GetDeviceCount) +HIC_FUNCTION(GetErrorString) +HIC_FUNCTION(GetLastError) +HIC_FUNCTION(HostGetDevicePointer) +HIC_FUNCTION(HostRegister) +HIC_FUNCTION(HostUnregister) +HIC_FUNCTION(Malloc) +HIC_FUNCTION(MallocAsync) +HIC_FUNCTION(MallocManaged) +HIC_FUNCTION(Memcpy) +HIC_FUNCTION(Memcpy2D) +HIC_FUNCTION(MemcpyAsync) +HIC_FUNCTION(Memcpy2DAsync) +HIC_FUNCTION(MemPrefetchAsync) +HIC_FUNCTION(PeekAtLastError) +#if !HIC_BACKEND_HIP +HIC_FUNCTION(PointerGetAttributes) +#elif HIP_VERSION_MAJOR >= 6 +HIC_FUNCTION(PointerGetAttributes) +#endif +HIC_FUNCTION(StreamCreate) +HIC_FUNCTION(StreamDestroy) +HIC_FUNCTION(StreamSynchronize) + +HIC_TYPE(Error_t) +HIC_TYPE(Event_t) +HIC_TYPE(Stream_t) +#if !HIC_BACKEND_HIP +HIC_TYPE(PointerAttributes) +#elif HIP_VERSION_MAJOR >= 6 +using HIC_SYMBOL(PointerAttributes) = HIC_BACKEND_SYMBOL(PointerAttribute_t); +#endif + +HIC_VALUE(CpuDeviceId) +HIC_VALUE(HostRegisterMapped) +#if !HIC_BACKEND_HIP +HIC_VALUE(MemoryTypeDevice) +HIC_VALUE(MemoryTypeHost) +HIC_VALUE(MemoryTypeUnregistered) +HIC_VALUE(MemoryTypeManaged) +#endif +HIC_VALUE(MemcpyDeviceToHost) +HIC_VALUE(MemcpyHostToDevice) +HIC_VALUE(Success) + +#if HIC_BACKEND_CUDA + constexpr hicError_t hicErrorDeinitialized = cudaErrorCudartUnloading; +#elif HIC_BACKEND_HIP + constexpr hicError_t hicErrorDeinitialized = hipErrorDeinitialized; +#else + constexpr hicError_t hicErrorDeinitialized = 4; +#endif + +//------------------------------------------------ +HIC_NAMESPACE_END +//------------------------------------------------ + +#undef HIC_FUNCTION +#undef HIC_TYPE +#undef HIC_VALUE +#undef HIC_CONCAT +#undef HIC_CONCAT_ +#undef HIC_PREFIX +#undef HIC_SYMBOL +#undef HIC_BACKEND_SYMBOL diff --git a/hic/tests/CMakeLists.txt b/hic/tests/CMakeLists.txt new file mode 100644 index 000000000..ecd23347d --- /dev/null +++ b/hic/tests/CMakeLists.txt @@ -0,0 +1,22 @@ +# (C) Copyright 2024 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation nor +# does it submit to any jurisdiction. + +ecbuild_add_test( TARGET hic_test_dummy + SOURCES test_hic_dummy.cc + LIBS hic + DEFINITIONS HIC_BACKEND_DUMMY=1 +) + +if( HAVE_HIP OR HAVE_CUDA ) + ecbuild_add_test( TARGET hic_test_hic + SOURCES test_hic.cc + LIBS hic + ) +endif() + +add_subdirectory(test_find_hic) diff --git a/hic/tests/test_find_hic/CMakeLists.txt b/hic/tests/test_find_hic/CMakeLists.txt new file mode 100644 index 000000000..e47613247 --- /dev/null +++ b/hic/tests/test_find_hic/CMakeLists.txt @@ -0,0 +1,31 @@ + +# This test builds a package that requires fypp processing +# It uses the overriding of compile flags, like IFS is using. +# +# Test created to avoid regression after fixing issue FCKIT-19, +# where compile flags were not propagated to fypp-generated files. + +if( HAVE_TESTS ) + + configure_file( test.sh.in ${CMAKE_CURRENT_BINARY_DIR}/test.sh @ONLY ) + + unset( _test_args ) + if( CMAKE_TOOLCHAIN_FILE ) + if( NOT IS_ABSOLUTE ${CMAKE_TOOLCHAIN_FILE}) + set( CMAKE_TOOLCHAIN_FILE "${CMAKE_BINARY_DIR}/${CMAKE_TOOLCHAIN_FILE}" ) + endif() + list( APPEND _test_args "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}" ) + endif() + foreach( lang CXX ) + if( CMAKE_${lang}_COMPILER ) + list( APPEND _test_args "-DCMAKE_${lang}_COMPILER=${CMAKE_${lang}_COMPILER}" ) + endif() + if( CMAKE_${lang}_FLAGS ) + list( APPEND _test_args "-DCMAKE_${lang}_FLAGS=${CMAKE_${lang}_FLAGS}" ) + endif() + endforeach() + + add_test( NAME hic_test_find_hic + COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test.sh ${_test_args} ) + +endif() diff --git a/hic/tests/test_find_hic/find_hic/CMakeLists.txt b/hic/tests/test_find_hic/find_hic/CMakeLists.txt new file mode 100644 index 000000000..4900f462c --- /dev/null +++ b/hic/tests/test_find_hic/find_hic/CMakeLists.txt @@ -0,0 +1,6 @@ + +cmake_minimum_required( VERSION 3.18 FATAL_ERROR ) + +project( find_hic LANGUAGES CXX ) + +find_package( hic REQUIRED ) diff --git a/hic/tests/test_find_hic/test.sh.in b/hic/tests/test_find_hic/test.sh.in new file mode 100755 index 000000000..81b37eda1 --- /dev/null +++ b/hic/tests/test_find_hic/test.sh.in @@ -0,0 +1,47 @@ +#!/usr/bin/env bash + +# Description: +# Build downstream example projects +# each individually with make/install +# +# Usage: +# test-individual.sh [CMAKE_ARGUMENTS] + +SOURCE=@PROJECT_SOURCE_DIR@/tests/test_find_hic/find_hic +BUILD=@CMAKE_CURRENT_BINARY_DIR@/build + +# Error handling +function test_failed { + EXIT_CODE=$? + { set +ex; } 2>/dev/null + if [ $EXIT_CODE -ne 0 ]; then + echo "+++++++++++++++++" + echo "Test failed" + echo "+++++++++++++++++" + fi + exit $EXIT_CODE +} +trap test_failed EXIT +set -e -o pipefail +set -x + +# Start with clean build +rm -rf $BUILD + +export hic_ROOT=@PROJECT_BINARY_DIR@ +export ecbuild_DIR=@ecbuild_DIR@ + +# Build +mkdir -p $BUILD && cd $BUILD +cmake $SOURCE \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + "$@" + +# make VERBOSE=1 + +# ctest -VV + +{ set +ex; } 2>/dev/null +echo "+++++++++++++++++" +echo "Test passed" +echo "+++++++++++++++++" diff --git a/hic/tests/test_hic.cc b/hic/tests/test_hic.cc new file mode 100644 index 000000000..7355ec807 --- /dev/null +++ b/hic/tests/test_hic.cc @@ -0,0 +1,66 @@ +/* + * (C) Copyright 2024- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ +#include +#include +#include +#include + +#include "hic/hic.h" + +// ----------------------------------------------------------------------------- + +int test_hicMalloc() { + std::cout << "--- " << __FUNCTION__ << std::endl; + void* p; + HIC_CALL( hicMalloc(&p, 8) ); + HIC_CALL( hicFree(p) ); + std::cout << "--- " << __FUNCTION__ << " SUCCEEDED " << std::endl; + return 0; // success +} + +// ----------------------------------------------------------------------------- + +int test_hicMallocManaged() { + std::cout << "--- " << __FUNCTION__ << std::endl; + void* p; + HIC_CALL( hicMallocManaged(&p, 8) ); + HIC_CALL( hicFree(p) ); + std::cout << "--- " << __FUNCTION__ << " SUCCEEDED " << std::endl; + return 0; // success +} + +// ----------------------------------------------------------------------------- + +std::vector> tests = { + test_hicMalloc, + test_hicMallocManaged, +}; + +int main(int argc, char* argv[]) { + int num_devices = 0; + hicGetDeviceCount(&num_devices); + if( num_devices == 0 ) { + std::ignore = hicGetLastError(); + std::cout << "TEST IGNORED, hicGetDeviceCount -> 0" << std::endl; + return 0; + } + std::cout << "hicGetDeviceCount -> " << num_devices << std::endl; + int error = 0; + for( auto& test: tests) { + try { + error += test(); + } + catch( std::exception& e ) { + error += 1; + std::cout << e.what() << std::endl; + } + } + return error; +} diff --git a/hic/tests/test_hic_dummy.cc b/hic/tests/test_hic_dummy.cc new file mode 100644 index 000000000..cb78b8186 --- /dev/null +++ b/hic/tests/test_hic_dummy.cc @@ -0,0 +1,43 @@ +/* + * (C) Copyright 2024- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ +#include +#include +#include +#include + +#include "hic/hic.h" + +int test_throw() { + std::cout << "--- " << __FUNCTION__ << std::endl; + // For the dummy backend we expect to have an error thrown when used + void* p; + try { + std::ignore = hicMalloc(&p, 8); + std::ignore = hicFree(p); + } catch(std::runtime_error& e) { + std::string expected_message = "hicMalloc is using the dummy backend and should not be called"; + if (e.what() == expected_message) { + std::cout << "--- " << __FUNCTION__ << " SUCCEEDED " << std::endl; + return 0; // success + } + } + std::cout << "--- " << __FUNCTION__ << " SUCCEEDED " << std::endl; + return 1; // fail +} + +std::vector> tests = { test_throw }; + +int main(int argc, char* argv[]) { + int error = 0; + for( auto& test: tests) { + error += test(); + } + return error; +} diff --git a/src/atlas/CMakeLists.txt b/src/atlas/CMakeLists.txt index d3f0c6b5c..47c82b031 100644 --- a/src/atlas/CMakeLists.txt +++ b/src/atlas/CMakeLists.txt @@ -60,6 +60,8 @@ library/detail/BlackMagic.h library/detail/Debug.h +parallel/acc/acc.cc +parallel/acc/acc.h parallel/mpi/mpi.cc parallel/mpi/mpi.h parallel/omp/omp.cc @@ -655,6 +657,8 @@ interpolation/method/structured/QuasiCubic2D.cc interpolation/method/structured/QuasiCubic2D.h interpolation/method/structured/QuasiCubic3D.cc interpolation/method/structured/QuasiCubic3D.h +interpolation/method/structured/RegionalLinear2D.cc +interpolation/method/structured/RegionalLinear2D.h interpolation/method/structured/StructuredInterpolation2D.h interpolation/method/structured/StructuredInterpolation2D.tcc interpolation/method/structured/StructuredInterpolation3D.h @@ -824,6 +828,8 @@ util/PolygonXY.cc util/PolygonXY.h util/Metadata.cc util/Metadata.h +util/PackVectorFields.cc +util/PackVectorFields.h util/Point.cc util/Point.h util/Polygon.cc @@ -923,14 +929,15 @@ list( APPEND source_list ) -if( atlas_HAVE_CUDA ) +if( atlas_HAVE_GPU ) include( atlas_host_device ) list( APPEND source_list - parallel/HaloExchangeCUDA.h - parallel/HaloExchangeCUDA.cu + parallel/HaloExchangeGPU.h + parallel/HaloExchangeGPU.hic ) atlas_host_device( source_list SOURCES + parallel/HaloExchangeGPU.hic mesh/Connectivity.cc ) @@ -944,7 +951,7 @@ if( atlas_HAVE_CUDA ) atlas_host_device( source_list SOURCES array/native/NativeArrayView.cc - array/native/NativeDataStore.h + array/native/NativeIndexView.cc ) endif() endif() @@ -977,10 +984,10 @@ ecbuild_add_library( TARGET atlas eckit_mpi eckit_option atlas_io + hic $<${atlas_HAVE_EIGEN}:Eigen3::Eigen> $<${atlas_HAVE_OMP_CXX}:OpenMP::OpenMP_CXX> $<${atlas_HAVE_GRIDTOOLS_STORAGE}:GridTools::gridtools> - $<${atlas_HAVE_CUDA}:CUDA::cudart> PRIVATE_INCLUDES ${CGAL_INCLUDE_DIRS} @@ -994,4 +1001,12 @@ ecbuild_add_library( TARGET atlas ) +if( HAVE_ACC ) + target_link_options( atlas INTERFACE + $<$:SHELL:-acc=gpu> + $<$:SHELL:-acc=gpu> + $<$:SHELL:-acc=gpu> + $<$:SHELL:-acc=gpu> ) +endif() + target_compile_features( atlas PUBLIC cxx_std_17 ) diff --git a/src/atlas/array/SVector.h b/src/atlas/array/SVector.h index af6c81853..b275dc09d 100644 --- a/src/atlas/array/SVector.h +++ b/src/atlas/array/SVector.h @@ -16,7 +16,7 @@ #include "atlas/library/config.h" #include "atlas/util/Allocate.h" -#ifndef __CUDA_ARCH__ +#if ATLAS_HOST_COMPILE #include "atlas/runtime/Exception.h" #endif @@ -34,7 +34,7 @@ class SVector { ATLAS_HOST_DEVICE SVector(const T* data, const idx_t size): data_(data), size_(size), externally_allocated_(true) {} -#ifdef __CUDACC__ +#if ATLAS_HIC_COMPILER // Note that this does not copy!!! It is mainly intended to be passed to a CUDA kernel which requires value semantics for this class ATLAS_HOST_DEVICE SVector(SVector const& other): data_(other.data_), size_(other.size_), externally_allocated_(true) {} @@ -70,7 +70,7 @@ class SVector { ATLAS_HOST_DEVICE void clear() { if (data_ && !externally_allocated_) { -#ifndef __CUDA_ARCH__ +#if ATLAS_HOST_COMPILE deallocate(data_, size_); #endif } @@ -103,23 +103,23 @@ class SVector { ATLAS_HOST_DEVICE T& operator()(const idx_t idx) { - assert(data_ && idx < size_); + //assert(data_ && idx < size_); return data_[idx]; } ATLAS_HOST_DEVICE T const& operator()(const idx_t idx) const { - assert(data_ && idx < size_); + //assert(data_ && idx < size_); return data_[idx]; } ATLAS_HOST_DEVICE T& operator[](const idx_t idx) { - assert(data_ && idx < size_); + //assert(data_ && idx < size_); return data_[idx]; } ATLAS_HOST_DEVICE T const& operator[](const idx_t idx) const { - assert(data_ && idx < size_); + //assert(data_ && idx < size_); return data_[idx]; } @@ -140,7 +140,7 @@ class SVector { } void resize(idx_t N) { -#ifndef __CUDA_ARCH__ +#if ATLAS_HOST_COMPILE ATLAS_ASSERT(not externally_allocated_, "Cannot resize externally allocated (or wrapped) data"); #endif resize_impl(N); @@ -161,7 +161,7 @@ class SVector { for (idx_t c = 0; c < size; ++c) { ptr[c].~T(); } - util::delete_managedmem(ptr); + util::delete_managedmem(ptr, size); } } diff --git a/src/atlas/array/Vector.h b/src/atlas/array/Vector.h index aeadd9bfc..ecef21769 100644 --- a/src/atlas/array/Vector.h +++ b/src/atlas/array/Vector.h @@ -16,9 +16,7 @@ #include "atlas/library/config.h" #include "atlas/runtime/Exception.h" -#if ATLAS_HAVE_CUDA -#include -#endif +#include "hic/hic.h" namespace atlas { namespace array { @@ -59,10 +57,9 @@ class Vector { size_ = N; } - void updateDevice() { - if (!data_gpu_) { -#if ATLAS_HAVE_CUDA - ::cudaMalloc((void**)(&data_gpu_), sizeof(T*) * size_); + void allocateDevice() { + if constexpr (ATLAS_HAVE_GPU) { + HIC_CALL(hicMalloc((void**)(&data_gpu_), sizeof(T*) * size_)); T* buff = new T[size_]; @@ -70,32 +67,36 @@ class Vector { data_[i]->updateDevice(); buff[i] = data_[i]->gpu_object_ptr(); } - ::cudaMemcpy(data_gpu_, buff, sizeof(T*) * size_, cudaMemcpyHostToDevice); - delete buff; -#else + HIC_CALL(hicMemcpy(data_gpu_, buff, sizeof(T*) * size_, hicMemcpyHostToDevice)); + delete[] buff; + } + else { data_gpu_ = data_; -#endif - size_gpu_ = size_; + } + size_gpu_ = size_; + } + + void updateDevice() { + if (!data_gpu_) { + allocateDevice(); } else { ATLAS_ASSERT(size_gpu_ == size_); -#if ATLAS_HAVE_CUDA - for (idx_t i = 0; i < size(); ++i) { - data_[i]->updateDevice(); - assert(data_gpu_[i] == data_[i]->gpu_object_ptr()); + if constexpr (ATLAS_HAVE_GPU) { + for (idx_t i = 0; i < size(); ++i) { + data_[i]->updateDevice(); + assert(data_gpu_[i] == data_[i]->gpu_object_ptr()); + } } -#endif } } void updateHost() { ATLAS_ASSERT(data_gpu_ != nullptr); - -#if ATLAS_HAVE_CUDA - - for (idx_t i = 0; i < size(); ++i) { - data_[i]->updateHost(); + if constexpr (ATLAS_HAVE_GPU) { + for (idx_t i = 0; i < size(); ++i) { + data_[i]->updateHost(); + } } -#endif } T* gpu_object_ptr() { return data_gpu_; } diff --git a/src/atlas/array/gridtools/GridToolsDataStore.h b/src/atlas/array/gridtools/GridToolsDataStore.h index d2017f14e..8fb604640 100644 --- a/src/atlas/array/gridtools/GridToolsDataStore.h +++ b/src/atlas/array/gridtools/GridToolsDataStore.h @@ -10,6 +10,7 @@ #pragma once +#include "atlas/library/config.h" #include "atlas/array/ArrayDataStore.h" #include "atlas/array/gridtools/GridToolsTraits.h" @@ -54,7 +55,7 @@ struct GridToolsDataStore : ArrayDataStore { accUnmap(); } - bool deviceAllocated() const override { return ATLAS_HAVE_CUDA; } + bool deviceAllocated() const override { return ATLAS_HAVE_GPU; } bool hostNeedsUpdate() const override { return data_store_->host_needs_update(); } @@ -112,7 +113,7 @@ struct GridToolsDataStore : ArrayDataStore { return ::gridtools::make_host_view<::gridtools::access_mode::read_only>(*data_store_).data(); } void* device_data() const { -#if ATLAS_HAVE_CUDA +#if ATLAS_HAVE_GPU return ::gridtools::make_device_view<::gridtools::access_mode::read_only>(*data_store_).data(); #else return ::gridtools::make_host_view<::gridtools::access_mode::read_only>(*data_store_).data(); diff --git a/src/atlas/array/gridtools/GridToolsIndexView.cu b/src/atlas/array/gridtools/GridToolsIndexView.cu deleted file mode 100644 index a4a8427b2..000000000 --- a/src/atlas/array/gridtools/GridToolsIndexView.cu +++ /dev/null @@ -1,11 +0,0 @@ -/* - * (C) Copyright 2013 ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor does it submit to any jurisdiction. - */ - -#include "GridToolsIndexView.cc" diff --git a/src/atlas/array/native/NativeArrayView.h b/src/atlas/array/native/NativeArrayView.h index 5c85f115f..e9e90a47e 100644 --- a/src/atlas/array/native/NativeArrayView.h +++ b/src/atlas/array/native/NativeArrayView.h @@ -352,7 +352,9 @@ class ArrayView { ATLAS_HOST_DEVICE void check_bounds(Ints... idx) const { static_assert(sizeof...(idx) == Rank, "Expected number of indices is different from rank of array"); +#if ATLAS_HOST_COMPILE return check_bounds_part<0>(idx...); +#endif } #else template @@ -366,11 +368,13 @@ class ArrayView { ATLAS_HOST_DEVICE void check_bounds_force(Ints... idx) const { static_assert(sizeof...(idx) == Rank, "Expected number of indices is different from rank of array"); +#if ATLAS_HOST_COMPILE return check_bounds_part<0>(idx...); +#endif } template - ATLAS_HOST_DEVICE + ATLAS_HOST void check_bounds_part(Int idx, Ints... next_idx) const { if (idx_t(idx) >= shape_[Dim]) { throw_OutOfRange("ArrayView", array_dim(), idx, shape_[Dim]); @@ -379,7 +383,7 @@ class ArrayView { } template - ATLAS_HOST_DEVICE + ATLAS_HOST void check_bounds_part(Int last_idx) const { if (idx_t(last_idx) >= shape_[Dim]) { throw_OutOfRange("ArrayView", array_dim(), last_idx, shape_[Dim]); diff --git a/src/atlas/array/native/NativeDataStore.h b/src/atlas/array/native/NativeDataStore.h index d0ce0baef..60d35dd50 100644 --- a/src/atlas/array/native/NativeDataStore.h +++ b/src/atlas/array/native/NativeDataStore.h @@ -16,20 +16,18 @@ #include // std::numeric_limits::signaling_NaN #include -#if ATLAS_HAVE_CUDA -#include -#endif - #include "atlas/array/ArrayDataStore.h" #include "atlas/library/Library.h" #include "atlas/library/config.h" +#include "atlas/parallel/acc/acc.h" #include "atlas/runtime/Exception.h" #include "atlas/runtime/Log.h" #include "eckit/log/Bytes.h" -#if ATLAS_HAVE_ACC -#include "atlas_acc_support/atlas_acc_map_data.h" -#endif +#include "hic/hic.h" + + +#define ATLAS_ACC_DEBUG 0 //------------------------------------------------------------------------------ @@ -96,17 +94,31 @@ template void initialise(Value[], size_t) {} #endif +static int devices() { + static int devices_ = [](){ + int n = 0; + auto err = hicGetDeviceCount(&n); + if (err != hicSuccess) { + n = 0; + static_cast(hicGetLastError()); + } + return n; + }(); + return devices_; +} + template class DataStore : public ArrayDataStore { public: DataStore(size_t size): size_(size) { allocateHost(); initialise(host_data_, size_); -#if ATLAS_HAVE_CUDA - device_updated_ = false; -#else - device_data_ = host_data_; -#endif + if (ATLAS_HAVE_GPU && devices()) { + device_updated_ = false; + } + else { + device_data_ = host_data_; + } } ~DataStore() { @@ -115,28 +127,28 @@ class DataStore : public ArrayDataStore { } void updateDevice() const override { -#if ATLAS_HAVE_CUDA - if (not device_allocated_) { - allocateDevice(); - } - cudaError_t err = cudaMemcpy(device_data_, host_data_, size_*sizeof(Value), cudaMemcpyHostToDevice); - if (err != cudaSuccess) { - throw_AssertionFailed("Failed to updateDevice: "+std::string(cudaGetErrorString(err)), Here()); + if (ATLAS_HAVE_GPU && devices()) { + if (not device_allocated_) { + allocateDevice(); + } + hicError_t err = hicMemcpy(device_data_, host_data_, size_*sizeof(Value), hicMemcpyHostToDevice); + if (err != hicSuccess) { + throw_AssertionFailed("Failed to updateDevice: "+std::string(hicGetErrorString(err)), Here()); + } + device_updated_ = true; } - device_updated_ = true; -#endif } void updateHost() const override { -#if ATLAS_HAVE_CUDA - if (device_allocated_) { - cudaError_t err = cudaMemcpy(host_data_, device_data_, size_*sizeof(Value), cudaMemcpyDeviceToHost); - if (err != cudaSuccess) { - throw_AssertionFailed("Failed to updateHost: "+std::string(cudaGetErrorString(err)), Here()); + if constexpr (ATLAS_HAVE_GPU) { + if (device_allocated_) { + hicError_t err = hicMemcpy(host_data_, device_data_, size_*sizeof(Value), hicMemcpyDeviceToHost); + if (err != hicSuccess) { + throw_AssertionFailed("Failed to updateHost: "+std::string(hicGetErrorString(err)), Here()); + } + host_updated_ = true; } - host_updated_ = true; } -#endif } bool valid() const override { return true; } @@ -162,33 +174,33 @@ class DataStore : public ArrayDataStore { bool deviceAllocated() const override { return device_allocated_; } void allocateDevice() const override { -#if ATLAS_HAVE_CUDA - if (device_allocated_) { - return; - } - if (size_) { - cudaError_t err = cudaMalloc((void**)&device_data_, sizeof(Value)*size_); - if (err != cudaSuccess) { - throw_AssertionFailed("Failed to allocate GPU memory: " + std::string(cudaGetErrorString(err)), Here()); + if (ATLAS_HAVE_GPU && devices()) { + if (device_allocated_) { + return; + } + if (size_) { + hicError_t err = hicMalloc((void**)&device_data_, sizeof(Value)*size_); + if (err != hicSuccess) { + throw_AssertionFailed("Failed to allocate GPU memory: " + std::string(hicGetErrorString(err)), Here()); + } + device_allocated_ = true; + accMap(); } - device_allocated_ = true; - accMap(); } -#endif } void deallocateDevice() const override { -#if ATLAS_HAVE_CUDA - if (device_allocated_) { - accUnmap(); - cudaError_t err = cudaFree(device_data_); - if (err != cudaSuccess) { - throw_AssertionFailed("Failed to deallocate GPU memory: " + std::string(cudaGetErrorString(err)), Here()); + if constexpr (ATLAS_HAVE_GPU) { + if (device_allocated_) { + accUnmap(); + hicError_t err = hicFree(device_data_); + if (err != hicSuccess) { + throw_AssertionFailed("Failed to deallocate GPU memory: " + std::string(hicGetErrorString(err)), Here()); + } + device_data_ = nullptr; + device_allocated_ = false; } - device_data_ = nullptr; - device_allocated_ = false; } -#endif } bool hostNeedsUpdate() const override { return (not host_updated_); } @@ -210,13 +222,17 @@ class DataStore : public ArrayDataStore { void* voidDeviceData() override { return static_cast(device_data_); } void accMap() const override { -#if ATLAS_HAVE_ACC - if (not acc_mapped_) { + if (not acc_mapped_ && acc::devices()) { ATLAS_ASSERT(deviceAllocated(),"Could not accMap as device data is not allocated"); - atlas_acc_map_data((void*)host_data_, (void*)device_data_, size_ * sizeof(Value)); + ATLAS_ASSERT(!atlas::acc::is_present(host_data_, size_ * sizeof(Value))); + if constexpr(ATLAS_ACC_DEBUG) { + std::cout << " + acc_map_data(hostptr:"< class WrappedDataStore : public ArrayDataStore { public: WrappedDataStore(Value* host_data, size_t size): host_data_(host_data), size_(size) { -#if ATLAS_HAVE_CUDA - device_updated_ = false; -#else - device_data_ = host_data_; -#endif + if (ATLAS_HAVE_GPU && devices()) { + device_updated_ = false; + } + else { + device_data_ = host_data_; + } } void updateDevice() const override { -#if ATLAS_HAVE_CUDA - if (not device_allocated_) { - allocateDevice(); - } - cudaError_t err = cudaMemcpy(device_data_, host_data_, size_*sizeof(Value), cudaMemcpyHostToDevice); - if (err != cudaSuccess) { - throw_AssertionFailed("Failed to updateDevice: "+std::string(cudaGetErrorString(err)), Here()); + if (ATLAS_HAVE_GPU && devices()) { + if (not device_allocated_) { + allocateDevice(); + } + hicError_t err = hicMemcpy(device_data_, host_data_, size_*sizeof(Value), hicMemcpyHostToDevice); + if (err != hicSuccess) { + throw_AssertionFailed("Failed to updateDevice: "+std::string(hicGetErrorString(err)), Here()); + } + device_updated_ = true; } - device_updated_ = true; -#endif } void updateHost() const override { -#if ATLAS_HAVE_CUDA - if (device_allocated_) { - cudaError_t err = cudaMemcpy(host_data_, device_data_, size_*sizeof(Value), cudaMemcpyDeviceToHost); - if (err != cudaSuccess) { - throw_AssertionFailed("Failed to updateHost: "+std::string(cudaGetErrorString(err)), Here()); + if constexpr (ATLAS_HAVE_GPU) { + if (device_allocated_) { + hicError_t err = hicMemcpy(host_data_, device_data_, size_*sizeof(Value), hicMemcpyDeviceToHost); + if (err != hicSuccess) { + throw_AssertionFailed("Failed to updateHost: "+std::string(hicGetErrorString(err)), Here()); + } + host_updated_ = true; } - host_updated_ = true; } -#endif } bool valid() const override { return true; } @@ -346,33 +366,33 @@ class WrappedDataStore : public ArrayDataStore { bool deviceAllocated() const override { return device_allocated_; } void allocateDevice() const override { -#if ATLAS_HAVE_CUDA - if (device_allocated_) { - return; - } - if (size_) { - cudaError_t err = cudaMalloc((void**)&device_data_, sizeof(Value)*size_); - if (err != cudaSuccess) { - throw_AssertionFailed("Failed to allocate GPU memory: " + std::string(cudaGetErrorString(err)), Here()); + if (ATLAS_HAVE_GPU && devices()) { + if (device_allocated_) { + return; + } + if (size_) { + hicError_t err = hicMalloc((void**)&device_data_, sizeof(Value)*size_); + if (err != hicSuccess) { + throw_AssertionFailed("Failed to allocate GPU memory: " + std::string(hicGetErrorString(err)), Here()); + } + device_allocated_ = true; + accMap(); } - device_allocated_ = true; - accMap(); } -#endif } void deallocateDevice() const override { -#if ATLAS_HAVE_CUDA - if (device_allocated_) { - accUnmap(); - cudaError_t err = cudaFree(device_data_); - if (err != cudaSuccess) { - throw_AssertionFailed("Failed to deallocate GPU memory: " + std::string(cudaGetErrorString(err)), Here()); + if constexpr (ATLAS_HAVE_GPU) { + if (device_allocated_) { + accUnmap(); + hicError_t err = hicFree(device_data_); + if (err != hicSuccess) { + throw_AssertionFailed("Failed to deallocate GPU memory: " + std::string(hicGetErrorString(err)), Here()); + } + device_data_ = nullptr; + device_allocated_ = false; } - device_data_ = nullptr; - device_allocated_ = false; } -#endif } bool hostNeedsUpdate() const override { return (not host_updated_); } @@ -394,13 +414,17 @@ class WrappedDataStore : public ArrayDataStore { void* voidDeviceData() override { return static_cast(device_data_); } void accMap() const override { -#if ATLAS_HAVE_ACC - if (not acc_mapped_) { + if (not acc_mapped_ && acc::devices()) { ATLAS_ASSERT(deviceAllocated(),"Could not accMap as device data is not allocated"); - atlas_acc_map_data((void*)host_data_, (void*)device_data_, size_ * sizeof(Value)); + ATLAS_ASSERT(!atlas::acc::is_present(host_data_, size_ * sizeof(Value))); + if constexpr(ATLAS_ACC_DEBUG) { + std::cout << " + acc_map_data(hostptr:"< make_host_view(const Array& array) { template ArrayView make_device_view(Array& array) { -#if ATLAS_HAVE_CUDA +#if ATLAS_HAVE_GPU ATLAS_ASSERT(array.deviceAllocated(),"make_device_view: Array not allocated on device"); return ArrayView((array.device_data()), array.shape(), array.strides()); #else @@ -59,7 +59,7 @@ ArrayView make_device_view(Array& array) { template ArrayView make_device_view(const Array& array) { -#if ATLAS_HAVE_CUDA +#if ATLAS_HAVE_GPU ATLAS_ASSERT(array.deviceAllocated(),"make_device_view: Array not allocated on device"); return ArrayView(array.device_data(), array.shape(), array.strides()); #else diff --git a/src/atlas/field/detail/FieldImpl.h b/src/atlas/field/detail/FieldImpl.h index 066f2cba7..cdd9c308f 100644 --- a/src/atlas/field/detail/FieldImpl.h +++ b/src/atlas/field/detail/FieldImpl.h @@ -16,6 +16,7 @@ #include #include #include +#include #include "atlas/util/Object.h" diff --git a/src/atlas/functionspace/Spectral.h b/src/atlas/functionspace/Spectral.h index dbb32b0fd..7e5fe7c43 100644 --- a/src/atlas/functionspace/Spectral.h +++ b/src/atlas/functionspace/Spectral.h @@ -47,9 +47,10 @@ class Spectral : public functionspace::FunctionSpaceImpl { n = total wavenumber const auto zonal_wavenumbers = Spectral::zonal_wavenumbers(); + const int truncation = Spectral::truncation(); idx_t jc=0; for( int jm=0; jm this->halo()) { - throw_Exception("StructuredColumsn does not contain a halo of size " + std::to_string(halo) + ".", Here()); + throw_Exception("StructuredColumns does not contain a halo of size " + std::to_string(halo) + ".", Here()); } polygons_[halo].reset(new grid::StructuredPartitionPolygon(*this, halo)); } @@ -726,7 +726,7 @@ struct FixupHaloForVectors { template void apply(Field& field) { std::string type = field.metadata().getString("type", "scalar"); - if (type == "vector ") { + if (type == "vector") { ATLAS_NOTIMPLEMENTED; } } @@ -770,12 +770,16 @@ struct FixupHaloForVectors<3> { template void apply(Field& field) { std::string type = field.metadata().getString("type", "scalar"); + + // if levels not setup in functionspace use levels from field + idx_t k_end = (fs.k_begin() == 0) && (fs.k_end() == 0) ? field.levels() : fs.k_end(); + if (type == "vector") { auto array = array::make_view(field); for (idx_t j = fs.j_begin_halo(); j < 0; ++j) { for (idx_t i = fs.i_begin_halo(j); i < fs.i_end_halo(j); ++i) { idx_t n = fs.index(i, j); - for (idx_t k = fs.k_begin(); k < fs.k_end(); ++k) { + for (idx_t k = fs.k_begin(); k < k_end; ++k) { array(n, k, XX) = -array(n, k, XX); array(n, k, YY) = -array(n, k, YY); } @@ -784,7 +788,7 @@ struct FixupHaloForVectors<3> { for (idx_t j = fs.grid().ny(); j < fs.j_end_halo(); ++j) { for (idx_t i = fs.i_begin_halo(j); i < fs.i_end_halo(j); ++i) { idx_t n = fs.index(i, j); - for (idx_t k = fs.k_begin(); k < fs.k_end(); ++k) { + for (idx_t k = fs.k_begin(); k < k_end; ++k) { array(n, k, XX) = -array(n, k, XX); array(n, k, YY) = -array(n, k, YY); } diff --git a/src/atlas/functionspace/detail/StructuredColumns_setup.cc b/src/atlas/functionspace/detail/StructuredColumns_setup.cc index 4bdc0a4e9..094b47849 100644 --- a/src/atlas/functionspace/detail/StructuredColumns_setup.cc +++ b/src/atlas/functionspace/detail/StructuredColumns_setup.cc @@ -588,13 +588,20 @@ void StructuredColumns::setup(const grid::Distribution& distribution, const ecki atlas_omp_parallel_for(idx_t n = 0; n < gridpoints.size(); ++n) { const GridPoint& gp = gridpoints[n]; - if (gp.j >= 0 && gp.j < grid_->ny()) { - xy(gp.r, XX) = grid_->x(gp.i, gp.j); - xy(gp.r, YY) = grid_->y(gp.j); - } - else { - xy(gp.r, XX) = compute_x(gp.i, gp.j); - xy(gp.r, YY) = compute_y(gp.j); + if (regional) { + std::array lonlatVec; + grid_->lonlat(gp.i, gp.j, lonlatVec.data()); + xy(gp.r, XX) = lonlatVec[0]; + xy(gp.r, YY) = lonlatVec[1]; + } else { + if (gp.j >= 0 && gp.j < grid_->ny()) { + xy(gp.r, XX) = grid_->x(gp.i, gp.j); + xy(gp.r, YY) = grid_->y(gp.j); + } + else { + xy(gp.r, XX) = compute_x(gp.i, gp.j); + xy(gp.r, YY) = compute_y(gp.j); + } } bool in_domain(false); diff --git a/src/atlas/interpolation/method/MethodFactory.cc b/src/atlas/interpolation/method/MethodFactory.cc index a34e29961..f7a25c72f 100644 --- a/src/atlas/interpolation/method/MethodFactory.cc +++ b/src/atlas/interpolation/method/MethodFactory.cc @@ -24,6 +24,7 @@ #include "structured/Linear3D.h" #include "structured/QuasiCubic2D.h" #include "structured/QuasiCubic3D.h" +#include "structured/RegionalLinear2D.h" #include "unstructured/FiniteElement.h" #include "unstructured/UnstructuredBilinearLonLat.h" @@ -46,6 +47,7 @@ void force_link() { MethodBuilder(); MethodBuilder(); MethodBuilder(); + MethodBuilder(); MethodBuilder(); MethodBuilder(); MethodBuilder(); diff --git a/src/atlas/interpolation/method/binning/Binning.cc b/src/atlas/interpolation/method/binning/Binning.cc index cbbe4d7aa..f5a6b0743 100644 --- a/src/atlas/interpolation/method/binning/Binning.cc +++ b/src/atlas/interpolation/method/binning/Binning.cc @@ -102,8 +102,9 @@ void Binning::do_setup(const FunctionSpace& source, // start of the indexes associated with the row 'i+1' size_t ubound = ptr_tamx_o[idx_row_next]; - if (lbound == ubound) + if (lbound == ubound) { continue; + } double sum_row = 0; for (size_t i = lbound; i < ubound; ++i) { diff --git a/src/atlas/interpolation/method/cubedsphere/CellFinder.cc b/src/atlas/interpolation/method/cubedsphere/CellFinder.cc index 9912d16f1..4a77231a0 100644 --- a/src/atlas/interpolation/method/cubedsphere/CellFinder.cc +++ b/src/atlas/interpolation/method/cubedsphere/CellFinder.cc @@ -37,7 +37,7 @@ CellFinder::CellFinder(const Mesh& mesh, const util::Config& config): mesh_{mesh auto halo = config.getInt("halo", 0); for (idx_t i = 0; i < mesh_.cells().size(); ++i) { if (haloView(i) <= halo) { - points.emplace_back(PointLonLat(lonlatView(i, LON), lonlatView(i, LAT))); + points.emplace_back(lonlatView(i, LON), lonlatView(i, LAT)); payloads.emplace_back(i); } } diff --git a/src/atlas/interpolation/method/sphericalvector/ComplexMatrixMultiply.h b/src/atlas/interpolation/method/sphericalvector/ComplexMatrixMultiply.h index 34cfdacb2..8d15d035a 100644 --- a/src/atlas/interpolation/method/sphericalvector/ComplexMatrixMultiply.h +++ b/src/atlas/interpolation/method/sphericalvector/ComplexMatrixMultiply.h @@ -70,7 +70,7 @@ class ComplexMatrixMultiply { // tinyNum ~= 2.3e-13 for double. constexpr auto tinyNum = 1024 * std::numeric_limits::epsilon(); const auto complexMagnitude = std::abs(complexRowIter.value()); - const auto realValue = realRowIter.value(); + const auto realValue = std::abs(realRowIter.value()); const auto error = std::abs(complexMagnitude - realValue); const auto printError = [&]() { diff --git a/src/atlas/interpolation/method/structured/RegionalLinear2D.cc b/src/atlas/interpolation/method/structured/RegionalLinear2D.cc new file mode 100644 index 000000000..e8588b617 --- /dev/null +++ b/src/atlas/interpolation/method/structured/RegionalLinear2D.cc @@ -0,0 +1,572 @@ +/* + * (C) Copyright 2024 Meteorologisk Institutt + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + */ + +#include "atlas/interpolation/method/structured/RegionalLinear2D.h" + +#include + +#include "atlas/array.h" +#include "atlas/interpolation/method/MethodFactory.h" +#include "atlas/util/KDTree.h" +#include "atlas/util/Point.h" + +namespace atlas { +namespace interpolation { +namespace method { + +namespace { +MethodBuilder __builder("regional-linear-2d"); +} + +void RegionalLinear2D::print(std::ostream&) const { ATLAS_NOTIMPLEMENTED; } + +void RegionalLinear2D::do_setup(const Grid& source, const Grid& target, + const Cache&) { + ATLAS_NOTIMPLEMENTED; +} + +void RegionalLinear2D::do_setup(const FunctionSpace& source, + const FunctionSpace& target) { + ATLAS_TRACE("interpolation::method::RegionalLinear2D::do_setup"); + source_ = source; + target_ = target; + + if (target_.size() == 0) { + return; + } + ASSERT(source_.type() == "StructuredColumns"); + + // Get grid parameters + const functionspace::StructuredColumns sourceFs(source_); + const RegularGrid sourceGrid(sourceFs.grid()); + const Projection & sourceProj = sourceGrid.projection(); + const size_t sourceNx = sourceGrid.nx(); + const size_t sourceNy = sourceGrid.ny(); + const double sourceDx = sourceGrid.dx(); + const double sourceDy = std::abs(sourceGrid.y(1)-sourceGrid.y(0)); + const bool reversedY = sourceGrid.y(1) < sourceGrid.y(0); + + // Check grid regularity in y direction + for (size_t sourceJ = 0; sourceJ < sourceNy-1; ++sourceJ) { + if (reversedY) { + ASSERT(std::abs(sourceGrid.y(sourceJ)-sourceGrid.y(sourceJ+1)-sourceDy) < 1.0e-12*sourceDy); + } else { + ASSERT(std::abs(sourceGrid.y(sourceJ+1)-sourceGrid.y(sourceJ)-sourceDy) < 1.0e-12*sourceDy); + } + } + + // Source grid indices + const Field sourceFieldIndexI = sourceFs.index_i(); + const Field sourceFieldIndexJ = sourceFs.index_j(); + const auto sourceIndexIView = array::make_view(sourceFieldIndexI); + const auto sourceIndexJView = array::make_view(sourceFieldIndexJ); + sourceSize_ = sourceFs.size(); + + // Destination grid size + targetSize_ = target_.size(); + + // Ghost points + const auto sourceGhostView = array::make_view(sourceFs.ghost()); + const auto targetGhostView = array::make_view(target_.ghost()); + + // Define reduced grid horizontal distribution + std::vector mpiTask(sourceNx*sourceNy, 0); + for (size_t sourceJnode = 0; sourceJnode < sourceSize_; ++sourceJnode) { + if (sourceGhostView(sourceJnode) == 0) { + mpiTask[(sourceIndexIView(sourceJnode)-1)*sourceNy+sourceIndexJView(sourceJnode)-1] = comm_.rank(); + } + } + comm_.allReduceInPlace(mpiTask.begin(), mpiTask.end(), eckit::mpi::sum()); + + // Define local tree on destination grid + std::vector targetPoints; + std::vector targetIndices; + targetPoints.reserve(targetSize_); + targetIndices.reserve(targetSize_); + const auto targetLonLatView = array::make_view(target_.lonlat()); + for (size_t targetJnode = 0; targetJnode < targetSize_; ++targetJnode) { + PointLonLat p({targetLonLatView(targetJnode, 0), targetLonLatView(targetJnode, 1)}); + sourceProj.lonlat2xy(p); + targetPoints.emplace_back(p[0], p[1], 0.0); + targetIndices.emplace_back(targetJnode); + } + util::IndexKDTree targetTree; + if (targetSize_ > 0) { + targetTree.build(targetPoints, targetIndices); + } + const double radius = std::sqrt(sourceDx*sourceDx+sourceDy*sourceDy); + + // Delta for colocation + const double eps = 1.0e-8; + + // RecvCounts and received points list + targetRecvCounts_.resize(comm_.size()); + std::fill(targetRecvCounts_.begin(), targetRecvCounts_.end(), 0); + std::vector targetRecvPointsList; + for (size_t sourceJ = 0; sourceJ < sourceNy; ++sourceJ) { + double yMin, yMax; + if (reversedY) { + yMin = sourceJ < sourceNy-1 ? sourceGrid.y(sourceJ+1)-eps : -std::numeric_limits::max(); + yMax = sourceJ > 0 ? sourceGrid.y(sourceJ-1)+eps : std::numeric_limits::max(); + } else { + yMin = sourceJ > 0 ? sourceGrid.y(sourceJ-1)-eps : -std::numeric_limits::max(); + yMax = sourceJ < sourceNy-1 ? sourceGrid.y(sourceJ+1)+eps : std::numeric_limits::max(); + } + for (size_t sourceI = 0; sourceI < sourceNx; ++sourceI) { + const double xMin = sourceI > 0 ? sourceGrid.x(sourceI-1)-eps : -std::numeric_limits::max(); + const double xMax = sourceI < sourceNx-1 ? sourceGrid.x(sourceI+1)+eps : + std::numeric_limits::max(); + + bool pointsNeeded = false; + if (targetSize_ > 0) { + const Point3 p(sourceGrid.x(sourceI), sourceGrid.y(sourceJ), 0.0); + const auto list = targetTree.closestPointsWithinRadius(p, radius); + for (const auto & item : list) { + const PointXYZ targetPoint = item.point(); + const size_t targetJnode = item.payload(); + if (targetGhostView(targetJnode) == 0) { + const bool inX = (xMin <= targetPoint[0] && targetPoint[0] <= xMax); + const bool inY = (yMin <= targetPoint[1] && targetPoint[1] <= yMax); + if (inX && inY) { + pointsNeeded = true; + break; + } + } + } + } + if (pointsNeeded) { + ++targetRecvCounts_[mpiTask[sourceI*sourceNy+sourceJ]]; + targetRecvPointsList.push_back(sourceI*sourceNy+sourceJ); + } + } + } + + // Buffer size + targetRecvSize_ = targetRecvPointsList.size(); + + if (targetRecvSize_ > 0) { + // RecvDispls + targetRecvDispls_.push_back(0); + for (size_t jt = 0; jt < comm_.size()-1; ++jt) { + targetRecvDispls_.push_back(targetRecvDispls_[jt]+targetRecvCounts_[jt]); + } + + // Allgather RecvCounts + eckit::mpi::Buffer targetRecvCountsBuffer(comm_.size()); + comm_.allGatherv(targetRecvCounts_.begin(), targetRecvCounts_.end(), targetRecvCountsBuffer); + std::vector targetRecvCountsGlb_ = std::move(targetRecvCountsBuffer.buffer); + + // SendCounts + for (size_t jt = 0; jt < comm_.size(); ++jt) { + sourceSendCounts_.push_back(targetRecvCountsGlb_[jt*comm_.size()+comm_.rank()]); + } + + // Buffer size + sourceSendSize_ = 0; + for (const auto & n : sourceSendCounts_) { + sourceSendSize_ += n; + } + + // SendDispls + sourceSendDispls_.push_back(0); + for (size_t jt = 0; jt < comm_.size()-1; ++jt) { + sourceSendDispls_.push_back(sourceSendDispls_[jt]+sourceSendCounts_[jt]); + } + + // Ordered received points list + std::vector targetRecvOffset(comm_.size(), 0); + std::vector targetRecvPointsListOrdered(targetRecvSize_); + for (size_t jr = 0; jr < targetRecvSize_; ++jr) { + const size_t sourceI = targetRecvPointsList[jr]/sourceNy; + const size_t sourceJ = targetRecvPointsList[jr]-sourceI*sourceNy; + size_t jt = mpiTask[sourceI*sourceNy+sourceJ]; + size_t jro = targetRecvDispls_[jt]+targetRecvOffset[jt]; + targetRecvPointsListOrdered[jro] = targetRecvPointsList[jr]; + ++targetRecvOffset[jt]; + } + std::vector sourceSentPointsList(sourceSendSize_); + comm_.allToAllv(targetRecvPointsListOrdered.data(), targetRecvCounts_.data(), targetRecvDispls_.data(), + sourceSentPointsList.data(), sourceSendCounts_.data(), sourceSendDispls_.data()); + + // Sort indices + std::vector gij; + for (size_t sourceJnode = 0; sourceJnode < sourceSize_; ++sourceJnode) { + if (sourceGhostView(sourceJnode) == 0) { + gij.push_back((sourceIndexIView(sourceJnode)-1)*sourceNy+sourceIndexJView(sourceJnode)-1); + } else { + gij.push_back(-1); + } + } + std::vector gidx(sourceSize_); + std::iota(gidx.begin(), gidx.end(), 0); + std::stable_sort(gidx.begin(), gidx.end(), [&gij](size_t i1, size_t i2) + {return gij[i1] < gij[i2];}); + std::vector ridx(sourceSendSize_); + std::iota(ridx.begin(), ridx.end(), 0); + std::stable_sort(ridx.begin(), ridx.end(), [&sourceSentPointsList](size_t i1, size_t i2) + {return sourceSentPointsList[i1] < sourceSentPointsList[i2];}); + + // Mapping for sent points + sourceSendMapping_.resize(sourceSendSize_); + size_t sourceJnode = 0; + for (size_t js = 0; js < sourceSendSize_; ++js) { + while (gij[gidx[sourceJnode]] < sourceSentPointsList[ridx[js]]) { + ++sourceJnode; + ASSERT(sourceJnode < sourceSize_); + } + sourceSendMapping_[ridx[js]] = gidx[sourceJnode]; + } + + // Sort indices + std::vector idx(targetRecvPointsListOrdered.size()); + std::iota(idx.begin(), idx.end(), 0); + std::stable_sort(idx.begin(), idx.end(), [&targetRecvPointsListOrdered](size_t i1, size_t i2) + {return targetRecvPointsListOrdered[i1] < targetRecvPointsListOrdered[i2];}); + + // Compute horizontal interpolation + stencil_.resize(targetSize_); + weights_.resize(targetSize_); + stencilSize_.resize(targetSize_); + for (size_t targetJnode = 0; targetJnode < targetSize_; ++targetJnode) { + // Interpolation element default values + if (targetGhostView(targetJnode) == 0) { + // Destination grid indices + const double targetX = targetPoints[targetJnode][0]; + bool colocatedX = false; + int indexI = -1; + for (size_t sourceI = 0; sourceI < sourceNx-1; ++sourceI) { + if (std::abs(targetX-sourceGrid.x(sourceI)) < eps) { + indexI = sourceI; + colocatedX = true; + } + if (sourceGrid.x(sourceI)+eps < targetX && targetX < sourceGrid.x(sourceI+1)-eps) { + indexI = sourceI; + colocatedX = false; + } + } + if (std::abs(targetX-sourceGrid.x(sourceNx-1)) < eps) { + indexI = sourceNx-1; + colocatedX = true; + } + const double targetY = targetPoints[targetJnode][1]; + bool colocatedY = false; + int indexJ = -1; + for (size_t sourceJ = 0; sourceJ < sourceNy-1; ++sourceJ) { + if (std::abs(targetY-sourceGrid.y(sourceJ)) < eps) { + indexJ = sourceJ; + colocatedY = true; + } + if (reversedY) { + if (sourceGrid.y(sourceJ+1)+eps < targetY && targetY < sourceGrid.y(sourceJ)-eps) { + indexJ = sourceJ; + colocatedY = false; + } + } else { + if (sourceGrid.y(sourceJ)+eps < targetY && targetY < sourceGrid.y(sourceJ+1)-eps) { + indexJ = sourceJ; + colocatedY = false; + } + } + } + if (std::abs(targetY-sourceGrid.y(sourceNy-1)) < eps) { + indexJ = sourceNy-1; + colocatedY = true; + } + + if (indexI == -1 || indexJ == -1) { + // Point outside of the domain, using nearest neighbor + if (indexI > -1) { + if (!colocatedX && + (std::abs(targetX-sourceGrid.x(indexI+1)) < std::abs(targetX-sourceGrid.x(indexI)))) { + indexI += 1; + } + } else { + if (std::abs(targetX-sourceGrid.x(0)) < std::abs(targetX-sourceGrid.x(sourceNx-1))) { + indexI = 0; + } else { + indexI = sourceNx-1; + } + } + if (indexJ > -1) { + if (!colocatedY && + (std::abs(targetY-sourceGrid.y(indexJ+1)) < std::abs(targetY-sourceGrid.y(indexJ)))) { + indexJ += 1; + } + } else { + if (std::abs(targetY-sourceGrid.y(0)) < std::abs(targetY-sourceGrid.y(sourceNy-1))) { + indexJ = 0; + } else { + indexJ = sourceNy-1; + } + Log::info() << "WARNING: point outside of the domain" << std::endl; + } + + // Colocated point (actually nearest neighbor) + colocatedX = true; + colocatedY = true; + } + + // Bilinear interpolation factor + const double alphaX = 1.0-(sourceGrid.x(indexI)+sourceDx-targetX)/sourceDx; + const double alphaY = reversedY ? (sourceGrid.y(indexJ)-targetY)/sourceDy + : 1.0-(sourceGrid.y(indexJ)+sourceDy-targetY)/sourceDy; + + // Points to find + std::vector toFind = {true, !colocatedX, !colocatedY, !colocatedX && !colocatedY}; + std::vector valueToFind = {indexI*sourceNy+indexJ, (indexI+1)*sourceNy+indexJ, + indexI*sourceNy+(indexJ+1), (indexI+1)*sourceNy+(indexJ+1)}; + std::array foundIndex; + foundIndex.fill(-1); + + // Binary search for each point + for (size_t jj = 0; jj < 4; ++jj) { + if (toFind[jj]) { + size_t low = 0; + size_t high = targetRecvPointsListOrdered.size()-1; + while (low <= high) { + size_t mid = low+(high-low)/2; + if (valueToFind[jj] == static_cast(targetRecvPointsListOrdered[idx[mid]])) { + foundIndex[jj] = idx[mid]; + break; + } + if (valueToFind[jj] > static_cast(targetRecvPointsListOrdered[idx[mid]])) { + low = mid+1; + } + if (valueToFind[jj] < static_cast(targetRecvPointsListOrdered[idx[mid]])) { + high = mid-1; + } + } + ASSERT(foundIndex[jj] > -1); + ASSERT(static_cast(targetRecvPointsListOrdered[foundIndex[jj]]) == + valueToFind[jj]); + } + } + + // Create interpolation operations + if (colocatedX && colocatedY) { + // Colocated point + stencil_[targetJnode][0] = foundIndex[0]; + weights_[targetJnode][0] = 1.0; + stencilSize_[targetJnode] = 1; + } else if (colocatedY) { + // Linear interpolation along x + stencil_[targetJnode][0] = foundIndex[0]; + weights_[targetJnode][0] = 1.0-alphaX; + stencil_[targetJnode][1] = foundIndex[1]; + weights_[targetJnode][1] = alphaX; + stencilSize_[targetJnode] = 2; + } else if (colocatedX) { + // Linear interpolation along y + stencil_[targetJnode][0] = foundIndex[0]; + weights_[targetJnode][0] = 1.0-alphaY; + stencil_[targetJnode][1] = foundIndex[2]; + weights_[targetJnode][1] = alphaY; + stencilSize_[targetJnode] = 2; + } else { + // Bilinear interpolation + stencil_[targetJnode][0] = foundIndex[0]; + weights_[targetJnode][0] = (1.0-alphaX)*(1.0-alphaY); + stencil_[targetJnode][1] = foundIndex[1]; + weights_[targetJnode][1] = alphaX*(1.0-alphaY); + stencil_[targetJnode][2] = foundIndex[2]; + weights_[targetJnode][2] = (1.0-alphaX)*alphaY; + stencil_[targetJnode][3] = foundIndex[3]; + weights_[targetJnode][3] = alphaX*alphaY; + stencilSize_[targetJnode] = 4; + } + } else { + // Ghost point + stencilSize_[targetJnode] = 0; + } + } + } +} + +void RegionalLinear2D::do_execute(const FieldSet& sourceFieldSet, + FieldSet& targetFieldSet, + Metadata& metadata) const { + ATLAS_TRACE("atlas::interpolation::method::RegionalLinear2D::do_execute()"); + ATLAS_ASSERT(sourceFieldSet.size() == targetFieldSet.size()); + + for (auto i = 0; i < sourceFieldSet.size(); ++i) { + do_execute(sourceFieldSet[i], targetFieldSet[i], metadata); + } +} + +void RegionalLinear2D::do_execute(const Field& sourceField, Field& targetField, + Metadata&) const { + ATLAS_TRACE("atlas::interpolation::method::RegionalLinear2D::do_execute()"); + + if (targetField.size() == 0) { + return; + } + + // Check number of levels + ASSERT(sourceField.levels() == targetField.levels()); + const size_t nz = sourceField.levels() > 0 ? sourceField.levels() : 1; + const size_t ndim = sourceField.levels() > 0 ? 2 : 1; + + // Scale counts and displs for all levels + std::vector sourceSendCounts3D(comm_.size()); + std::vector sourceSendDispls3D(comm_.size()); + std::vector targetRecvCounts3D(comm_.size()); + std::vector targetRecvDispls3D(comm_.size()); + for (size_t jt = 0; jt < comm_.size(); ++jt) { + sourceSendCounts3D[jt] = sourceSendCounts_[jt]*nz; + sourceSendDispls3D[jt] = sourceSendDispls_[jt]*nz; + targetRecvCounts3D[jt] = targetRecvCounts_[jt]*nz; + targetRecvDispls3D[jt] = targetRecvDispls_[jt]*nz; + } + + // Halo exchange + haloExchange(sourceField); + + // Serialize + std::vector sourceSendVec(sourceSendSize_*nz); + if (ndim == 1) { + const auto sourceView = array::make_view(sourceField); + for (size_t js = 0; js < sourceSendSize_; ++js) { + size_t sourceJnode = sourceSendMapping_[js]; + sourceSendVec[js] = sourceView(sourceJnode); + } + } else if (ndim == 2) { + const auto sourceView = array::make_view(sourceField); + for (size_t js = 0; js < sourceSendSize_; ++js) { + for (size_t k = 0; k < nz; ++k) { + size_t sourceJnode = sourceSendMapping_[js]; + sourceSendVec[js*nz+k] = sourceView(sourceJnode, k); + } + } + } + + // Communication + std::vector targetRecvVec(targetRecvSize_*nz); + comm_.allToAllv(sourceSendVec.data(), sourceSendCounts3D.data(), sourceSendDispls3D.data(), + targetRecvVec.data(), targetRecvCounts3D.data(), targetRecvDispls3D.data()); + + // Interpolation + if (ndim == 1) { + auto targetView = array::make_view(targetField); + targetView.assign(0.0); + for (size_t targetJnode = 0; targetJnode < targetSize_; ++targetJnode) { + for (size_t jj = 0; jj < stencilSize_[targetJnode]; ++jj) { + targetView(targetJnode) += weights_[targetJnode][jj] + *targetRecvVec[stencil_[targetJnode][jj]]; + } + } + } else if (ndim == 2) { + auto targetView = array::make_view(targetField); + targetView.assign(0.0); + for (size_t targetJnode = 0; targetJnode < targetSize_; ++targetJnode) { + for (size_t jj = 0; jj < stencilSize_[targetJnode]; ++jj) { + for (size_t k = 0; k < nz; ++k) { + targetView(targetJnode, k) += weights_[targetJnode][jj] + *targetRecvVec[stencil_[targetJnode][jj]*nz+k]; + } + } + } + } + + // Set target field dirty + targetField.set_dirty(); +} + +void RegionalLinear2D::do_execute_adjoint(FieldSet& sourceFieldSet, + const FieldSet& targetFieldSet, + Metadata& metadata) const { + ATLAS_TRACE( + "atlas::interpolation::method::RegionalLinear2D::do_execute_adjoint()"); + ATLAS_ASSERT(sourceFieldSet.size() == targetFieldSet.size()); + + for (auto i = 0; i < sourceFieldSet.size(); ++i) { + do_execute_adjoint(sourceFieldSet[i], targetFieldSet[i], metadata); + } +} + +void RegionalLinear2D::do_execute_adjoint(Field& sourceField, + const Field& targetField, + Metadata& metadata) const { + ATLAS_TRACE( + "atlas::interpolation::method::RegionalLinear2D::do_execute_adjoint()"); + + if (targetField.size() == 0) { + return; + } + + // Check number of levels + ASSERT(sourceField.levels() == targetField.levels()); + const size_t nz = sourceField.levels() > 0 ? sourceField.levels() : 1; + const size_t ndim = sourceField.levels() > 0 ? 2 : 1; + + // Scale counts and displs for all levels + std::vector sourceSendCounts3D(comm_.size()); + std::vector sourceSendDispls3D(comm_.size()); + std::vector targetRecvCounts3D(comm_.size()); + std::vector targetRecvDispls3D(comm_.size()); + for (size_t jt = 0; jt < comm_.size(); ++jt) { + sourceSendCounts3D[jt] = sourceSendCounts_[jt]*nz; + sourceSendDispls3D[jt] = sourceSendDispls_[jt]*nz; + targetRecvCounts3D[jt] = targetRecvCounts_[jt]*nz; + targetRecvDispls3D[jt] = targetRecvDispls_[jt]*nz; + } + + // Copy destination field + Field targetTmpField = targetField.clone(); + + // Interpolation adjoint + std::vector targetRecvVec(targetRecvSize_*nz, 0.0); + if (ndim == 1) { + const auto targetView = array::make_view(targetTmpField); + for (size_t targetJnode = 0; targetJnode < targetSize_; ++targetJnode) { + for (size_t jj = 0; jj < stencilSize_[targetJnode]; ++jj) { + targetRecvVec[stencil_[targetJnode][jj]] += weights_[targetJnode][jj] + *targetView(targetJnode); + } + } + } else if (ndim == 2) { + const auto targetView = array::make_view(targetTmpField); + for (size_t targetJnode = 0; targetJnode < targetSize_; ++targetJnode) { + for (size_t jj = 0; jj < stencilSize_[targetJnode]; ++jj) { + for (size_t k = 0; k < nz; ++k) { + targetRecvVec[stencil_[targetJnode][jj]*nz+k] += weights_[targetJnode][jj] + *targetView(targetJnode, k); + } + } + } + } + + // Communication + std::vector sourceSendVec(sourceSendSize_*nz); + comm_.allToAllv(targetRecvVec.data(), targetRecvCounts3D.data(), targetRecvDispls3D.data(), + sourceSendVec.data(), sourceSendCounts3D.data(), sourceSendDispls3D.data()); + + // Deserialize + if (ndim == 1) { + auto sourceView = array::make_view(sourceField); + sourceView.assign(0.0); + for (size_t js = 0; js < sourceSendSize_; ++js) { + size_t sourceJnode = sourceSendMapping_[js]; + sourceView(sourceJnode) += sourceSendVec[js]; + } + } else if (ndim == 2) { + auto sourceView = array::make_view(sourceField); + sourceView.assign(0.0); + for (size_t js = 0; js < sourceSendSize_; ++js) { + size_t sourceJnode = sourceSendMapping_[js]; + for (size_t k = 0; k < nz; ++k) { + sourceView(sourceJnode, k) += sourceSendVec[js*nz+k]; + } + } + } + + // Adjoint halo exchange + adjointHaloExchange(sourceField); +} + +} // namespace method +} // namespace interpolation +} // namespace atlas diff --git a/src/atlas/interpolation/method/structured/RegionalLinear2D.h b/src/atlas/interpolation/method/structured/RegionalLinear2D.h new file mode 100644 index 000000000..462f88662 --- /dev/null +++ b/src/atlas/interpolation/method/structured/RegionalLinear2D.h @@ -0,0 +1,77 @@ +/* + * (C) Copyright 2024 Meteorologisk Institutt + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + */ + +#pragma once + +#include "atlas/interpolation/method/Method.h" + +#include + +#include "atlas/field.h" +#include "atlas/functionspace.h" +#include "atlas/grid/Grid.h" + +#include "eckit/config/Configuration.h" +#include "eckit/mpi/Comm.h" + +namespace atlas { +namespace interpolation { +namespace method { + + +class RegionalLinear2D : public Method { + public: + /// @brief Regional linear interpolation + /// + /// @details + /// + RegionalLinear2D(const Config& config) : Method(config), comm_(eckit::mpi::comm()) {} + ~RegionalLinear2D() override {} + + void print(std::ostream&) const override; + const FunctionSpace& source() const override { return source_; } + const FunctionSpace& target() const override { return target_; } + + void do_execute(const FieldSet& sourceFieldSet, FieldSet& targetFieldSet, + Metadata& metadata) const override; + void do_execute(const Field& sourceField, Field& targetField, + Metadata& metadata) const override; + + void do_execute_adjoint(FieldSet& sourceFieldSet, + const FieldSet& targetFieldSet, + Metadata& metadata) const override; + void do_execute_adjoint(Field& sourceField, const Field& targetField, + Metadata& metadata) const override; + + private: + using Method::do_setup; + void do_setup(const FunctionSpace& source, const FunctionSpace& target) override; + void do_setup(const Grid& source, const Grid& target, const Cache&) override; + + FunctionSpace source_{}; + FunctionSpace target_{}; + + const eckit::mpi::Comm & comm_; + size_t sourceSize_; + std::vector mpiTask_; + size_t targetSize_; + size_t sourceSendSize_; + size_t targetRecvSize_; + std::vector sourceSendCounts_; + std::vector sourceSendDispls_; + std::vector targetRecvCounts_; + std::vector targetRecvDispls_; + std::vector sourceSendMapping_; + std::vector> stencil_; + std::vector> weights_; + std::vector stencilSize_; +}; + + +} // namespace method +} // namespace interpolation +} // namespace atlas diff --git a/src/atlas/interpolation/method/unstructured/ConservativeSphericalPolygonInterpolation.h b/src/atlas/interpolation/method/unstructured/ConservativeSphericalPolygonInterpolation.h index 3dd79099b..6d4db4dba 100644 --- a/src/atlas/interpolation/method/unstructured/ConservativeSphericalPolygonInterpolation.h +++ b/src/atlas/interpolation/method/unstructured/ConservativeSphericalPolygonInterpolation.h @@ -110,9 +110,10 @@ class ConservativeSphericalPolygonInterpolation : public Method { SRCTGT_INTERSECTPLG_DIFF, // index, 1/(unit_sphere.area) ( \sum_{scell} scell.area - \sum{tcell} tcell.area ) REMAP_CONS, // index, error in mass conservation REMAP_L2, // index, error accuracy for given analytical function - REMAP_LINF // index, like REMAP_L2 but in L_infinity norm + REMAP_LINF, // index, like REMAP_L2 but in L_infinity norm + ERRORS_ENUM_SIZE }; - std::array errors; + std::array errors; double tgt_area_sum; double src_area_sum; diff --git a/src/atlas/library/Library.cc b/src/atlas/library/Library.cc index 6ed6b227a..99bebfe96 100644 --- a/src/atlas/library/Library.cc +++ b/src/atlas/library/Library.cc @@ -485,8 +485,8 @@ void Library::Information::print(std::ostream& out) const { feature_MPI = true; #endif std::string array_data_store = "Native-host"; -#if ATLAS_HAVE_CUDA - array_data_store = "Native-CUDA"; +#if ATLAS_HAVE_GPU + array_data_store = "Native-GPU"; #endif #if ATLAS_HAVE_GRIDTOOLS_STORAGE array_data_store = "Gridtools-host"; diff --git a/src/atlas/library/config.h b/src/atlas/library/config.h index d684c3716..82c2b2e7e 100644 --- a/src/atlas/library/config.h +++ b/src/atlas/library/config.h @@ -16,6 +16,7 @@ #include "atlas/atlas_ecbuild_config.h" #include "atlas/library/defines.h" +#include "hic/hic_config.h" #ifndef DOXYGEN_SHOULD_SKIP_THIS #define DOXYGEN_HIDE(X) X @@ -24,6 +25,21 @@ #define ATLAS_HAVE_TRACE 1 #define ATLAS_HAVE_TRACE_BARRIERS 1 +#define ATLAS_HIC_COMPILER HIC_COMPILER +#define ATLAS_HOST_COMPILE HIC_HOST_COMPILE +#define ATLAS_DEVICE_COMPILE HIC_DEVICE_COMPILE +#define ATLAS_HOST_DEVICE HIC_HOST_DEVICE +#define ATLAS_DEVICE HIC_DEVICE +#define ATLAS_HOST HIC_HOST +#define ATLAS_GLOBAL HIC_GLOBAL + +#if HIC_BACKEND_CUDA || HIC_BACKEND_HIP +#define ATLAS_HAVE_GPU 1 +#else +#define ATLAS_HAVE_GPU 0 +#endif + + namespace atlas { /// @typedef gidx_t diff --git a/src/atlas/library/defines.h.in b/src/atlas/library/defines.h.in index cf329bfd5..de1672dcf 100644 --- a/src/atlas/library/defines.h.in +++ b/src/atlas/library/defines.h.in @@ -17,7 +17,7 @@ #define ATLAS_HAVE_OMP @atlas_HAVE_OMP_CXX@ #define ATLAS_OMP_TASK_SUPPORTED @ATLAS_OMP_TASK_SUPPORTED@ #define ATLAS_OMP_TASK_UNTIED_SUPPORTED @ATLAS_OMP_TASK_UNTIED_SUPPORTED@ -#define ATLAS_HAVE_CUDA @atlas_HAVE_CUDA@ +#define ATLAS_HAVE_GPU @atlas_HAVE_GPU@ #define ATLAS_HAVE_ACC @atlas_HAVE_ACC@ #define ATLAS_HAVE_QHULL @atlas_HAVE_QHULL@ #define ATLAS_HAVE_CGAL @atlas_HAVE_CGAL@ @@ -45,16 +45,6 @@ #define ATLAS_ECKIT_DEVELOP @ATLAS_ECKIT_DEVELOP@ #define ATLAS_HAVE_FUNCTIONSPACE @atlas_HAVE_ATLAS_FUNCTIONSPACE@ -#ifdef __CUDACC__ -#define ATLAS_HOST_DEVICE __host__ __device__ -#define ATLAS_DEVICE __device__ -#define ATLAS_HOST __host__ -#else -#define ATLAS_HOST_DEVICE -#define ATLAS_DEVICE -#define ATLAS_HOST -#endif - #define ATLAS_BITS_LOCAL @ATLAS_BITS_LOCAL@ #if defined( __GNUC__ ) || defined( __clang__ ) diff --git a/src/atlas/mesh/Connectivity.h b/src/atlas/mesh/Connectivity.h index a77eafdfe..1631384d7 100644 --- a/src/atlas/mesh/Connectivity.h +++ b/src/atlas/mesh/Connectivity.h @@ -183,7 +183,7 @@ class IrregularConnectivityImpl { /// No resizing can be performed as data is not owned. IrregularConnectivityImpl(idx_t values[], idx_t rows, idx_t displs[], idx_t counts[]); -#ifdef __CUDACC__ +#if ATLAS_HIC_COMPILER /// @brief Copy ctr (only to be used when calling a cuda kernel) // This ctr has to be defined in the header, since __CUDACC__ will identify // whether @@ -529,7 +529,7 @@ class BlockConnectivityImpl { /// No resizing can be performed as data is not owned. BlockConnectivityImpl(idx_t rows, idx_t cols, idx_t values[]); -#ifdef __CUDACC__ +#if ATLAS_HIC_COMPILER /// @brief Copy ctr (only to be used when calling a cuda kernel) // This ctr has to be defined in the header, since __CUDACC__ will identify // whether it is compiled it for a GPU kernel @@ -645,6 +645,7 @@ using Connectivity = IrregularConnectivity; // ----------------------------------------------------------------------------------------------------- +ATLAS_HOST_DEVICE inline idx_t IrregularConnectivityImpl::operator()(idx_t row_idx, idx_t col_idx) const { assert(counts_[row_idx] > (col_idx)); return values_[displs_[row_idx] + col_idx] FROM_FORTRAN; @@ -662,36 +663,43 @@ inline void IrregularConnectivityImpl::set(idx_t row_idx, idx_t col_idx, const i values_[displs_[row_idx] + col_idx] = value TO_FORTRAN; } +ATLAS_HOST_DEVICE inline IrregularConnectivityImpl::Row IrregularConnectivityImpl::row(idx_t row_idx) const { return IrregularConnectivityImpl::Row(const_cast(values_.data()) + displs_(row_idx), counts_(row_idx)); } // ----------------------------------------------------------------------------------------------------- +ATLAS_HOST_DEVICE inline idx_t MultiBlockConnectivityImpl::operator()(idx_t row_idx, idx_t col_idx) const { return IrregularConnectivityImpl::operator()(row_idx, col_idx); } +ATLAS_HOST_DEVICE inline idx_t MultiBlockConnectivityImpl::operator()(idx_t block_idx, idx_t block_row_idx, idx_t block_col_idx) const { return block(block_idx)(block_row_idx, block_col_idx); } // ----------------------------------------------------------------------------------------------------- +ATLAS_HOST_DEVICE inline idx_t BlockConnectivityImpl::operator()(idx_t row_idx, idx_t col_idx) const { return values_[index(row_idx, col_idx)] FROM_FORTRAN; } +ATLAS_HOST_DEVICE inline void BlockConnectivityImpl::set(idx_t row_idx, const idx_t column_values[]) { for (idx_t n = 0; n < cols_; ++n) { values_[index(row_idx, n)] = column_values[n] TO_FORTRAN; } } +ATLAS_HOST_DEVICE inline void BlockConnectivityImpl::set(idx_t row_idx, idx_t col_idx, const idx_t value) { values_[index(row_idx, col_idx)] = value TO_FORTRAN; } +ATLAS_HOST_DEVICE inline idx_t BlockConnectivityImpl::index(idx_t i, idx_t j) const { return i * cols_ + j; } diff --git a/src/atlas/mesh/actions/BuildEdges.cc b/src/atlas/mesh/actions/BuildEdges.cc index 08a568a17..4ab0f453a 100644 --- a/src/atlas/mesh/actions/BuildEdges.cc +++ b/src/atlas/mesh/actions/BuildEdges.cc @@ -93,7 +93,7 @@ void build_element_to_edge_connectivity(Mesh& mesh) { UniqueLonLat compute_uid(mesh); for (idx_t jedge = 0; jedge < nb_edges; ++jedge) { - edge_sort.emplace_back(Sort(compute_uid(edge_node_connectivity.row(jedge)), jedge)); + edge_sort.emplace_back(compute_uid(edge_node_connectivity.row(jedge)), jedge); } std::sort(edge_sort.data(), edge_sort.data() + nb_edges); diff --git a/src/atlas/mesh/actions/BuildParallelFields.cc b/src/atlas/mesh/actions/BuildParallelFields.cc index 17f617d18..bd403a29b 100644 --- a/src/atlas/mesh/actions/BuildParallelFields.cc +++ b/src/atlas/mesh/actions/BuildParallelFields.cc @@ -984,7 +984,7 @@ Field& build_edges_global_idx(Mesh& mesh) { std::vector edge_sort; edge_sort.reserve(glb_nb_edges); for (idx_t jedge = 0; jedge < glb_edge_id.shape(0); ++jedge) { - edge_sort.emplace_back(Node(glb_edge_id(jedge), jedge)); + edge_sort.emplace_back(glb_edge_id(jedge), jedge); } std::sort(edge_sort.begin(), edge_sort.end()); diff --git a/src/atlas/parallel/HaloExchange.h b/src/atlas/parallel/HaloExchange.h index 0d6b0e229..2c4931007 100644 --- a/src/atlas/parallel/HaloExchange.h +++ b/src/atlas/parallel/HaloExchange.h @@ -33,8 +33,8 @@ #include "atlas/runtime/Exception.h" #include "atlas/util/Object.h" -#if ATLAS_HAVE_CUDA -#include "atlas/parallel/HaloExchangeCUDA.h" +#if ATLAS_HAVE_GPU +#include "atlas/parallel/HaloExchangeGPU.h" #endif namespace atlas { @@ -90,7 +90,7 @@ class HaloExchange : public util::Object { DATA_TYPE* allocate_buffer(const int buffer_size, const bool on_device) const; template - void deallocate_buffer(DATA_TYPE* buffer, const bool on_device) const; + void deallocate_buffer(DATA_TYPE* buffer, const int buffer_size, const bool on_device) const; template void pack_send_buffer(const array::ArrayView& hfield, @@ -174,6 +174,14 @@ void HaloExchange::execute(array::Array& field, bool on_device) const { DATA_TYPE* inner_buffer = allocate_buffer(inner_size, on_device); DATA_TYPE* halo_buffer = allocate_buffer(halo_size, on_device); +#if ATLAS_HAVE_GPU + if (on_device) { + ATLAS_ASSERT( is_device_accessible(inner_buffer) ); + ATLAS_ASSERT( is_device_accessible(halo_buffer) ); + ATLAS_ASSERT( is_device_accessible(field_dv.data()) ); + } +#endif + counts_displs_setup(var_size, inner_counts_init, halo_counts_init, inner_counts, halo_counts, inner_displs, halo_displs); @@ -190,8 +198,8 @@ void HaloExchange::execute(array::Array& field, bool on_device) const { wait_for_send(inner_counts_init, inner_req); - deallocate_buffer(inner_buffer, on_device); - deallocate_buffer(halo_buffer, on_device); + deallocate_buffer(inner_buffer, inner_size, on_device); + deallocate_buffer(halo_buffer, halo_size, on_device); } template @@ -241,8 +249,8 @@ void HaloExchange::execute_adjoint(array::Array& field, bool on_device) const { zero_halos(field_hv, field_dv, halo_buffer, halo_size, on_device); - deallocate_buffer(halo_buffer, on_device); - deallocate_buffer(inner_buffer, on_device); + deallocate_buffer(halo_buffer, halo_size, on_device); + deallocate_buffer(inner_buffer, inner_size, on_device); } template @@ -261,12 +269,12 @@ DATA_TYPE* HaloExchange::allocate_buffer(const int buffer_size, const bool on_de template -void HaloExchange::deallocate_buffer(DATA_TYPE* buffer, const bool on_device) const { +void HaloExchange::deallocate_buffer(DATA_TYPE* buffer, const int buffer_size, const bool on_device) const { if (on_device) { - util::delete_devicemem(buffer); + util::delete_devicemem(buffer, buffer_size); } else { - util::delete_hostmem(buffer); + util::delete_hostmem(buffer, buffer_size); } } @@ -294,7 +302,7 @@ void HaloExchange::ireceive(int tag, std::vector& recv_displs, std::vector< for (size_t jproc = 0; jproc < static_cast(nproc); ++jproc) { if (recv_counts[jproc] > 0) { recv_req[jproc] = - comm().iReceive(&recv_buffer[recv_displs[jproc]], recv_counts[jproc], jproc, tag); + comm().iReceive(recv_buffer+recv_displs[jproc], recv_counts[jproc], jproc, tag); } } } @@ -309,7 +317,7 @@ void HaloExchange::isend_and_wait_for_receive(int tag, std::vector& recv_co ATLAS_TRACE_MPI(ISEND) { for (size_t jproc = 0; jproc < static_cast(nproc); ++jproc) { if (send_counts[jproc] > 0) { - send_req[jproc] = comm().iSend(&send_buffer[send_displs[jproc]], send_counts[jproc], jproc, tag); + send_req[jproc] = comm().iSend(send_buffer+send_displs[jproc], send_counts[jproc], jproc, tag); } } } @@ -380,7 +388,7 @@ void HaloExchange::zero_halos(ATLAS_MAYBE_UNUSED const array::ArrayView& dfield, DATA_TYPE* recv_buffer, int recv_size, ATLAS_MAYBE_UNUSED const bool on_device) const { ATLAS_TRACE(); -#if ATLAS_HAVE_CUDA +#if ATLAS_HAVE_GPU if (on_device) { ATLAS_NOTIMPLEMENTED; } @@ -394,9 +402,9 @@ void HaloExchange::pack_send_buffer(ATLAS_MAYBE_UNUSED const array::ArrayView& dfield, DATA_TYPE* send_buffer, int send_size, ATLAS_MAYBE_UNUSED const bool on_device) const { ATLAS_TRACE(); -#if ATLAS_HAVE_CUDA +#if ATLAS_HAVE_GPU if (on_device) { - halo_packer_cuda::pack(sendcnt_, sendmap_, hfield, dfield, send_buffer, + halo_packer_hic::pack(sendcnt_, sendmap_, hfield, dfield, send_buffer, send_size); } else @@ -410,9 +418,9 @@ void HaloExchange::unpack_recv_buffer(const DATA_TYPE* recv_buffer, int recv_siz array::ArrayView& dfield, ATLAS_MAYBE_UNUSED const bool on_device) const { ATLAS_TRACE(); -#if ATLAS_HAVE_CUDA +#if ATLAS_HAVE_GPU if (on_device) { - halo_packer_cuda::unpack(recvcnt_, recvmap_, recv_buffer, recv_size, hfield, + halo_packer_hic::unpack(recvcnt_, recvmap_, recv_buffer, recv_size, hfield, dfield); } else @@ -425,9 +433,9 @@ void HaloExchange::pack_recv_adjoint_buffer(ATLAS_MAYBE_UNUSED const array::Arra const array::ArrayView& dfield, DATA_TYPE* recv_buffer, int recv_size, ATLAS_MAYBE_UNUSED const bool on_device) const { ATLAS_TRACE(); -#if ATLAS_HAVE_CUDA +#if ATLAS_HAVE_GPU if (on_device) { - halo_packer_cuda::pack(recvcnt_, recvmap_, hfield, dfield, recv_buffer, + halo_packer_hic::pack(recvcnt_, recvmap_, hfield, dfield, recv_buffer, recv_size); } else @@ -441,9 +449,9 @@ void HaloExchange::unpack_send_adjoint_buffer(const DATA_TYPE* send_buffer, int array::ArrayView& dfield, ATLAS_MAYBE_UNUSED const bool on_device) const { ATLAS_TRACE(); -#if ATLAS_HAVE_CUDA +#if ATLAS_HAVE_GPU if (on_device) { - halo_packer_cuda::unpack(sendcnt_, sendmap_, send_buffer, send_size, hfield, + halo_packer_hic::unpack(sendcnt_, sendmap_, send_buffer, send_size, hfield, dfield); } else diff --git a/src/atlas/parallel/HaloExchangeCUDA.h b/src/atlas/parallel/HaloExchangeGPU.h similarity index 94% rename from src/atlas/parallel/HaloExchangeCUDA.h rename to src/atlas/parallel/HaloExchangeGPU.h index ada9641e6..f3b9a46c5 100644 --- a/src/atlas/parallel/HaloExchangeCUDA.h +++ b/src/atlas/parallel/HaloExchangeGPU.h @@ -18,7 +18,7 @@ namespace atlas { namespace parallel { template -struct halo_packer_cuda { +struct halo_packer_hic { static void pack(const int sendcnt, array::SVector const& sendmap, const array::ArrayView& hfield, const array::ArrayView& dfield, DATA_TYPE* send_buffer, int send_buffer_size); @@ -28,5 +28,8 @@ struct halo_packer_cuda { array::ArrayView& dfield); }; +bool is_device_accessible(const void* data); + + } // namespace parallel } // namespace atlas diff --git a/src/atlas/parallel/HaloExchangeCUDA.cu b/src/atlas/parallel/HaloExchangeGPU.hic similarity index 80% rename from src/atlas/parallel/HaloExchangeCUDA.cu rename to src/atlas/parallel/HaloExchangeGPU.hic index a9242515a..c96893d1c 100644 --- a/src/atlas/parallel/HaloExchangeCUDA.cu +++ b/src/atlas/parallel/HaloExchangeGPU.hic @@ -9,14 +9,26 @@ */ -#include "atlas/parallel/HaloExchangeCUDA.h" +#include "atlas/parallel/HaloExchangeGPU.h" #include "atlas/parallel/HaloExchangeImpl.h" #include "atlas/array/SVector.h" #include "atlas/runtime/Exception.h" +#include "hic/hic.h" + namespace atlas { namespace parallel { +bool is_device_accessible(const void* ptr) { + hicPointerAttributes attr; + hicError_t code = hicPointerGetAttributes(&attr, ptr); + if( code != hicSuccess ) { + static_cast(hicGetLastError()); + return false; + } + return ptr == attr.devicePointer; +} + template struct get_buffer_index{ template @@ -120,7 +132,7 @@ struct get_first_non_parallel_dim }; template -struct get_n_cuda_blocks +struct get_n_hic_blocks { template static unsigned int apply(const array::ArrayView& hfield, const unsigned int block_size_y) { @@ -130,7 +142,7 @@ struct get_n_cuda_blocks }; template<> -struct get_n_cuda_blocks<0, 1> { +struct get_n_hic_blocks<0, 1> { template static unsigned int apply(const array::ArrayView& hfield, const unsigned int block_size_y) { return 1; @@ -138,70 +150,71 @@ struct get_n_cuda_blocks<0, 1> { }; template -void halo_packer_cuda::pack( const int sendcnt, array::SVector const & sendmap, +void halo_packer_hic::pack( const int sendcnt, array::SVector const & sendmap, const array::ArrayView& hfield, const array::ArrayView& dfield, DATA_TYPE* send_buffer, int send_buffer_size ) { const unsigned int block_size_x = 32; const unsigned int block_size_y = (RANK==1) ? 1 : 4; - unsigned int nblocks_y = get_n_cuda_blocks::apply(hfield, block_size_y); + const unsigned int nblocks_x = (sendcnt+block_size_x-1)/block_size_x; + const unsigned int nblocks_y = get_n_hic_blocks::apply(hfield, block_size_y); dim3 threads(block_size_x, block_size_y); - dim3 blocks((sendcnt+block_size_x-1)/block_size_x, nblocks_y); - cudaDeviceSynchronize(); - cudaError_t err = cudaGetLastError(); - if (err != cudaSuccess) { - std::string msg = std::string("Error synchronizing device")+ cudaGetErrorString(err); + dim3 blocks(nblocks_x, nblocks_y); + HIC_CALL(hicDeviceSynchronize()); + hicError_t err = hicGetLastError(); + if (err != hicSuccess) { + std::string msg = std::string("Error synchronizing device")+ hicGetErrorString(err); throw_Exception(msg); } pack_kernel<<>>(sendcnt, sendmap.data(), sendmap.size(), dfield, send_buffer, send_buffer_size); - err = cudaGetLastError(); - if (err != cudaSuccess) + err = hicGetLastError(); + if (err != hicSuccess) throw_Exception("Error launching GPU packing kernel"); - cudaDeviceSynchronize(); - err = cudaGetLastError(); - if (err != cudaSuccess) { - std::string msg = std::string("Error synchronizing device")+ cudaGetErrorString(err); + HIC_CALL(hicDeviceSynchronize()); + err = hicGetLastError(); + if (err != hicSuccess) { + std::string msg = std::string("Error synchronizing device")+ hicGetErrorString(err); throw_Exception(msg); } } template -void halo_packer_cuda::unpack(const int recvcnt, array::SVector const & recvmap, +void halo_packer_hic::unpack(const int recvcnt, array::SVector const & recvmap, const DATA_TYPE* recv_buffer , int recv_buffer_size, array::ArrayView &hfield, array::ArrayView &dfield) { const unsigned int block_size_x = 32; const unsigned int block_size_y = (RANK==1) ? 1 : 4; - unsigned int nblocks_y = get_n_cuda_blocks::apply(hfield, block_size_y); + unsigned int nblocks_y = get_n_hic_blocks::apply(hfield, block_size_y); dim3 threads(block_size_x, block_size_y); dim3 blocks((recvcnt+block_size_x-1)/block_size_x, nblocks_y); - cudaDeviceSynchronize(); - cudaError_t err = cudaGetLastError(); - if (err != cudaSuccess) { - std::string msg = std::string("Error synchronizing device")+ cudaGetErrorString(err); + HIC_CALL(hicDeviceSynchronize()); + hicError_t err = hicGetLastError(); + if (err != hicSuccess) { + std::string msg = std::string("Error synchronizing device")+ hicGetErrorString(err); throw_Exception(msg); } unpack_kernel<<>>(recvcnt, recvmap.data(), recvmap.size(), recv_buffer, recv_buffer_size, dfield); - err = cudaGetLastError(); - if (err != cudaSuccess) { - std::string msg = std::string("Error launching GPU packing kernel")+ cudaGetErrorString(err); + err = hicGetLastError(); + if (err != hicSuccess) { + std::string msg = std::string("Error launching GPU packing kernel")+ hicGetErrorString(err); throw_Exception(msg); } - cudaDeviceSynchronize(); - err = cudaGetLastError(); - if (err != cudaSuccess) { - std::string msg = std::string("Error synchronizing device")+ cudaGetErrorString(err); + HIC_CALL(hicDeviceSynchronize()); + err = hicGetLastError(); + if (err != hicSuccess) { + std::string msg = std::string("Error synchronizing device")+ hicGetErrorString(err); throw_Exception(msg); } } @@ -220,11 +233,11 @@ void halo_packer_cuda::unpack(const int recvcnt, a #define EXPLICIT_TEMPLATE_INSTANTIATION( ParallelDim, RANK ) \ - template class halo_packer_cuda; \ - template class halo_packer_cuda; \ - template class halo_packer_cuda; \ - template class halo_packer_cuda; \ - template class halo_packer_cuda; + template class halo_packer_hic; \ + template class halo_packer_hic; \ + template class halo_packer_hic; \ + template class halo_packer_hic; \ + template class halo_packer_hic; #define EXPLICIT_TEMPLATE_INSTANTIATION_RANK(RANK) \ ATLAS_REPEAT_MACRO(RANK, EXPLICIT_TEMPLATE_INSTANTIATION, RANK) diff --git a/src/atlas/parallel/acc/acc.cc b/src/atlas/parallel/acc/acc.cc new file mode 100644 index 000000000..8a600aea5 --- /dev/null +++ b/src/atlas/parallel/acc/acc.cc @@ -0,0 +1,65 @@ +/* + * (C) Copyright 2024- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include "acc.h" + +#include "atlas/library/defines.h" + +#if ATLAS_HAVE_ACC +#include "atlas_acc_support/atlas_acc_map_data.h" +#endif + +namespace atlas::acc { + +int devices() { +#if ATLAS_HAVE_ACC + static int num_devices = [](){ + auto devicetype = atlas_acc_get_device_type(); + int _num_devices = atlas_acc_get_num_devices(); + if (_num_devices == 1 && devicetype == atlas_acc_device_host) { + --_num_devices; + } + return _num_devices; + }(); + return num_devices; +#else + return 0; +#endif +} + +void map(void* host_data, void* device_data, std::size_t bytes) { +#if ATLAS_HAVE_ACC + atlas_acc_map_data(host_data, device_data, bytes); +#endif +} +void unmap(void* host_data) { +#if ATLAS_HAVE_ACC + atlas_acc_unmap_data(host_data); +#endif +} + +bool is_present(void* host_data, std::size_t bytes) { +#if ATLAS_HAVE_ACC + return atlas_acc_is_present(host_data, bytes); +#else + return false; +#endif +} + +void* deviceptr(void* host_data) { +#if ATLAS_HAVE_ACC + return atlas_acc_deviceptr(host_data); +#else + return nullptr; +#endif +} + +} + diff --git a/src/atlas/array/gridtools/GridToolsArrayView.cu b/src/atlas/parallel/acc/acc.h similarity index 55% rename from src/atlas/array/gridtools/GridToolsArrayView.cu rename to src/atlas/parallel/acc/acc.h index fe607a93b..3a10f1e25 100644 --- a/src/atlas/array/gridtools/GridToolsArrayView.cu +++ b/src/atlas/parallel/acc/acc.h @@ -1,5 +1,5 @@ /* - * (C) Copyright 2013 ECMWF. + * (C) Copyright 2024- ECMWF. * * This software is licensed under the terms of the Apache Licence Version 2.0 * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. @@ -8,4 +8,16 @@ * nor does it submit to any jurisdiction. */ -#include "GridToolsArrayView.cc" +#pragma once +#include + +namespace atlas::acc { + +int devices(); +void map(void* host_data, void* device_data, std::size_t bytes); +void unmap(void* host_data); +bool is_present(void* host_data, std::size_t bytes); +void* deviceptr(void* host_data); + +} + diff --git a/src/atlas/runtime/trace/Timings.cc b/src/atlas/runtime/trace/Timings.cc index a477969d6..cbc847213 100644 --- a/src/atlas/runtime/trace/Timings.cc +++ b/src/atlas/runtime/trace/Timings.cc @@ -274,8 +274,8 @@ void TimingsRegistry::report(std::ostream& out, const eckit::Configuration& conf } } - size_t max_title_length(0); - size_t max_location_length(0); + size_t max_title_length(2); + size_t max_location_length(9); size_t max_nest(0); long max_count(0); double max_seconds(0); diff --git a/src/atlas/util/Allocate.cc b/src/atlas/util/Allocate.cc index b93446abb..58f2648d6 100644 --- a/src/atlas/util/Allocate.cc +++ b/src/atlas/util/Allocate.cc @@ -16,9 +16,7 @@ #include "atlas/library/config.h" #include "atlas/runtime/Exception.h" -#if ATLAS_HAVE_CUDA -#include -#endif +#include "hic/hic.h" namespace atlas { namespace util { @@ -27,50 +25,66 @@ namespace util { namespace detail { //------------------------------------------------------------------------------ -void allocate_cudamanaged(void** ptr, size_t size) { -#if ATLAS_HAVE_CUDA - cudaError_t err = cudaMallocManaged(ptr, size); - if (err != cudaSuccess) - throw_AssertionFailed("failed to allocate GPU memory", Here()); -#else - *ptr = malloc(size); -#endif -} - -void deallocate_cudamanaged(void* ptr) { -#if ATLAS_HAVE_CUDA - cudaError_t err = cudaDeviceSynchronize(); - if (err != cudaSuccess) - throw_AssertionFailed("failed to synchronize memory", Here()); - - err = cudaFree(ptr); - // The following throws an invalid device memory - if (err != cudaSuccess) - throw_AssertionFailed("failed to free GPU memory", Here()); -#else - free(ptr); -#endif +static int devices() { + static int devices_ = [](){ + int n = 0; + auto err = hicGetDeviceCount(&n); + if (err != hicSuccess) { + n = 0; + static_cast(hicGetLastError()); + } + return n; + }(); + return devices_; +} + +void allocate_managed(void** ptr, size_t bytes) { + if constexpr (not ATLAS_HAVE_GPU) { + return allocate_host(ptr, bytes); + } + if (devices() == 0) { + return allocate_host(ptr, bytes); + } + HIC_CALL(hicMallocManaged(ptr, bytes)); +} + +void deallocate_managed(void* ptr, size_t bytes) { + if constexpr (not ATLAS_HAVE_GPU) { + return deallocate_host(ptr, bytes); + } + if (devices() == 0) { + return deallocate_host(ptr, bytes); + } + HIC_CALL(hicDeviceSynchronize()); + HIC_CALL(hicFree(ptr)); } -void allocate_cuda(void** ptr, size_t size) { -#if ATLAS_HAVE_CUDA - cudaError_t err = cudaMalloc(ptr, size); - if (err != cudaSuccess) - throw_AssertionFailed("failed to allocate GPU memory", Here()); -#else - *ptr = malloc(size); -#endif +void allocate_device(void** ptr, size_t bytes) { + if constexpr (not ATLAS_HAVE_GPU) { + return allocate_host(ptr, bytes); + } + if (devices() == 0) { + return allocate_host(ptr, bytes); + } + HIC_CALL(hicMalloc(ptr, bytes)); } -void deallocate_cuda(void* ptr) { - deallocate_cudamanaged(ptr); +void deallocate_device(void* ptr, size_t bytes) { + if constexpr (not ATLAS_HAVE_GPU) { + return deallocate_host(ptr, bytes); + } + if (devices() == 0) { + return deallocate_host(ptr, bytes); + } + HIC_CALL(hicDeviceSynchronize()); + HIC_CALL(hicFree(ptr)); } -void allocate_host(void** ptr, size_t size) { - *ptr = malloc(size); +void allocate_host(void** ptr, size_t bytes) { + *ptr = malloc(bytes); } -void deallocate_host(void* ptr) { +void deallocate_host(void* ptr, size_t /*bytes*/) { free(ptr); } @@ -91,8 +105,17 @@ void atlas__allocate_managedmem_int(int*& a, size_t N) { void atlas__allocate_managedmem_long(long*& a, size_t N) { allocate_managedmem(a, N); } -void atlas__deallocate_managedmem(void*& a) { - delete_managedmem(a); +void atlas__deallocate_managedmem_double(double*& a, size_t N) { + delete_managedmem(a, N); +} +void atlas__deallocate_managedmem_float(float*& a, size_t N) { + delete_managedmem(a, N); +} +void atlas__deallocate_managedmem_int(int*& a, size_t N) { + delete_managedmem(a, N); +} +void atlas__deallocate_managedmem_long(long*& a, size_t N) { + delete_managedmem(a, N); } } diff --git a/src/atlas/util/Allocate.h b/src/atlas/util/Allocate.h index cac37e40a..b8fac7179 100644 --- a/src/atlas/util/Allocate.h +++ b/src/atlas/util/Allocate.h @@ -18,28 +18,28 @@ namespace util { //------------------------------------------------------------------------------ namespace detail { -void allocate_cudamanaged(void** ptr, size_t size); -void deallocate_cudamanaged(void* ptr); +void allocate_managed(void** ptr, size_t bytes); +void deallocate_managed(void* ptr, size_t bytes); -void allocate_cuda(void** ptr, size_t size); -void deallocate_cuda(void* ptr); +void allocate_device(void** ptr, size_t bytes); +void deallocate_device(void* ptr, size_t bytes); -void allocate_host(void** ptr, size_t size); -void deallocate_host(void* ptr); +void allocate_host(void** ptr, size_t bytes); +void deallocate_host(void* ptr, size_t bytes); } // namespace detail template void allocate_managedmem(T*& data, size_t N) { if (N != 0) { - detail::allocate_cudamanaged(reinterpret_cast(&data), N * sizeof(T)); + detail::allocate_managed(reinterpret_cast(&data), N * sizeof(T)); } } template -void delete_managedmem(T*& data) { +void delete_managedmem(T*& data, size_t N) { if (data) { - detail::deallocate_cudamanaged(data); + detail::deallocate_managed(data, N * sizeof(T)); data = nullptr; } } @@ -47,14 +47,14 @@ void delete_managedmem(T*& data) { template void allocate_devicemem(T*& data, size_t N) { if (N != 0) { - detail::allocate_cuda(reinterpret_cast(&data), N * sizeof(T)); + detail::allocate_device(reinterpret_cast(&data), N * sizeof(T)); } } template -void delete_devicemem(T*& data) { +void delete_devicemem(T*& data, size_t N) { if (data) { - detail::deallocate_cuda(data); + detail::deallocate_device(data, N * sizeof(T)); data = nullptr; } } @@ -67,9 +67,9 @@ void allocate_hostmem(T*& data, size_t N) { } template -void delete_hostmem(T*& data) { +void delete_hostmem(T*& data, size_t N) { if (data) { - detail::deallocate_host(data); + detail::deallocate_host(data, N * sizeof(T)); data = nullptr; } } @@ -82,7 +82,10 @@ void atlas__allocate_managedmem_double(double*& a, size_t N); void atlas__allocate_managedmem_float(float*& a, size_t N); void atlas__allocate_managedmem_int(int*& a, size_t N); void atlas__allocate_managedmem_long(long*& a, size_t N); -void atlas__deallocate_managedmem(void*& a); +void atlas__deallocate_managedmem_double(double*& a, size_t N); +void atlas__deallocate_managedmem_float(float*& a, size_t N); +void atlas__deallocate_managedmem_int(int*& a, size_t N); +void atlas__deallocate_managedmem_long(long*& a, size_t N); } //------------------------------------------------------------------------------ diff --git a/src/atlas/util/GPUClonable.h b/src/atlas/util/GPUClonable.h index fcee74129..704a94429 100644 --- a/src/atlas/util/GPUClonable.h +++ b/src/atlas/util/GPUClonable.h @@ -12,9 +12,7 @@ #include "atlas/library/config.h" -#if ATLAS_HAVE_CUDA -#include -#endif +#include "hic/hic.h" namespace atlas { namespace util { @@ -22,30 +20,28 @@ namespace util { template struct GPUClonable { GPUClonable(Base* base_ptr): base_ptr_(base_ptr), gpu_object_ptr_(nullptr) { -#if ATLAS_HAVE_CUDA - cudaMalloc(&gpu_object_ptr_, sizeof(Base)); -#endif + if constexpr (ATLAS_HAVE_GPU) { + HIC_CALL(hicMalloc(&gpu_object_ptr_, sizeof(Base))); + } } ~GPUClonable() { - if (gpu_object_ptr_) { -#if ATLAS_HAVE_CUDA - cudaFree(gpu_object_ptr_); -#endif + if (gpu_object_ptr_ != nullptr) { + HIC_CALL(hicFree(gpu_object_ptr_)); } } Base* gpu_object_ptr() { return static_cast(gpu_object_ptr_); } void updateDevice() { -#if ATLAS_HAVE_CUDA - cudaMemcpy(gpu_object_ptr_, base_ptr_, sizeof(Base), cudaMemcpyHostToDevice); -#endif + if constexpr (ATLAS_HAVE_GPU) { + HIC_CALL(hicMemcpy(gpu_object_ptr_, base_ptr_, sizeof(Base), hicMemcpyHostToDevice)); + } } void updateHost() { -#if ATLAS_HAVE_CUDA - cudaMemcpy(base_ptr_, gpu_object_ptr_, sizeof(Base), cudaMemcpyDeviceToHost); -#endif + if constexpr (ATLAS_HAVE_GPU) { + HIC_CALL(hicMemcpy(base_ptr_, gpu_object_ptr_, sizeof(Base), hicMemcpyDeviceToHost)); + } } Base* base_ptr_; diff --git a/src/atlas/util/PackVectorFields.cc b/src/atlas/util/PackVectorFields.cc new file mode 100644 index 000000000..640a1d46b --- /dev/null +++ b/src/atlas/util/PackVectorFields.cc @@ -0,0 +1,229 @@ +/* + * (C) Crown Copyright 2024 Met Office + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + */ + +#include "atlas/util/PackVectorFields.h" + +#include +#include +#include +#include + +#include "atlas/array.h" +#include "atlas/array/helpers/ArrayForEach.h" +#include "atlas/functionspace.h" +#include "atlas/option.h" +#include "atlas/runtime/Exception.h" +#include "atlas/util/Config.h" +#include "eckit/config/LocalConfiguration.h" + +namespace atlas { +namespace util { + +namespace { + +using eckit::LocalConfiguration; + +using array::DataType; +using array::helpers::arrayForEachDim; + +void addOrReplaceField(FieldSet& fieldSet, const Field& field) { + const auto fieldName = field.name(); + if (fieldSet.has(fieldName)) { + fieldSet[fieldName] = field; + } else { + fieldSet.add(field); + } +} + +Field& getOrCreateField(FieldSet& fieldSet, const FunctionSpace& functionSpace, + const Config& config) { + const auto fieldName = config.getString("name"); + if (!fieldSet.has(fieldName)) { + fieldSet.add(functionSpace.createField(config)); + } + return fieldSet[fieldName]; +} + +void checkFieldCompatibility(const Field& componentField, + const Field& vectorField) { + ATLAS_ASSERT(componentField.functionspace().size() == + vectorField.functionspace().size()); + ATLAS_ASSERT(componentField.levels() == vectorField.levels()); + ATLAS_ASSERT(componentField.variables() == 0); + ATLAS_ASSERT(vectorField.variables() > 0); + + const auto checkStandardShape = [](const Field& field) { + // Check for "standard" Atlas field shape. + auto dim = 0; + const auto rank = field.rank(); + const auto shape = field.shape(); + if (field.functionspace().size() != shape[dim++]) { + return false; + } + if (const auto levels = field.levels(); + levels && (dim >= rank || levels != shape[dim++])) { + return false; + } + if (const auto variables = field.variables(); + variables && (dim >= rank || variables != shape[dim++])) { + return false; + } + if (dim != rank) { + return false; + } + return true; + }; + + ATLAS_ASSERT(checkStandardShape(componentField)); + ATLAS_ASSERT(checkStandardShape(vectorField)); +} + +template +void copyFieldData(ComponentField& componentField, VectorField& vectorField, + const Functor& copier) { + checkFieldCompatibility(componentField, vectorField); + + const auto copyArrayData = [&](auto value, auto rank) { + // Resolve value-type and rank from arguments. + using Value = decltype(value); + constexpr auto Rank = decltype(rank)::value; + + // Iterate over fields. + auto vectorView = array::make_view(vectorField); + auto componentView = array::make_view(componentField); + constexpr auto Dims = std::make_integer_sequence{}; + arrayForEachDim(Dims, execution::par, std::tie(componentView, vectorView), + copier); + }; + + const auto selectRank = [&](auto value) { + switch (vectorField.rank()) { + case 2: + return copyArrayData(value, std::integral_constant{}); + case 3: + return copyArrayData(value, std::integral_constant{}); + default: + ATLAS_THROW_EXCEPTION("Unsupported vector field rank: " + + std::to_string(vectorField.rank())); + } + }; + + const auto selectType = [&]() { + switch (vectorField.datatype().kind()) { + case DataType::kind(): + return selectRank(double{}); + case DataType::kind(): + return selectRank(float{}); + case DataType::kind(): + return selectRank(long{}); + case DataType::kind(): + return selectRank(int{}); + default: + ATLAS_THROW_EXCEPTION("Unknown datatype: " + + std::to_string(vectorField.datatype().kind())); + } + }; + + selectType(); +} + +} // namespace + +FieldSet pack_vector_fields(const FieldSet& fields, FieldSet packedFields) { + // Get the number of variables for each vector field. + auto vectorSizeMap = std::map{}; + for (const auto& field : fields) { + auto vectorFieldName = std::string{}; + if (field.metadata().get("vector_field_name", vectorFieldName)) { + ++vectorSizeMap[vectorFieldName]; + } + } + auto vectorIndexMap = std::map{}; + + // Pack vector fields. + for (const auto& field : fields) { + auto vectorFieldName = std::string{}; + if (!field.metadata().get("vector_field_name", vectorFieldName)) { + // Not a vector component field. + addOrReplaceField(packedFields, field); + continue; + } + + // Field is vector field component. + const auto& componentField = field; + + // Get or create vector field. + const auto vectorFieldConfig = + option::name(vectorFieldName) | + option::levels(componentField.levels()) | + option::vector(vectorSizeMap[vectorFieldName]) | + option::datatype(componentField.datatype()); + auto& vectorField = getOrCreateField( + packedFields, componentField.functionspace(), vectorFieldConfig); + + // Copy field data. + const auto vectorIndex = vectorIndexMap[vectorFieldName]++; + const auto copier = [&](auto&& componentElem, auto&& vectorElem) { + vectorElem(vectorIndex) = componentElem; + }; + copyFieldData(componentField, vectorField, copier); + + // Copy metadata. + const auto componentFieldMetadata = componentField.metadata(); + auto componentFieldMetadataVector = std::vector{}; + vectorField.metadata().get("component_field_metadata", + componentFieldMetadataVector); + componentFieldMetadataVector.push_back(componentFieldMetadata); + vectorField.metadata().set("component_field_metadata", + componentFieldMetadataVector); + } + return packedFields; +} + +FieldSet unpack_vector_fields(const FieldSet& fields, FieldSet unpackedFields) { + for (const auto& field : fields) { + auto componentFieldMetadataVector = std::vector{}; + if (!field.metadata().get("component_field_metadata", + componentFieldMetadataVector)) { + // Not a vector field. + addOrReplaceField(unpackedFields, field); + continue; + } + + // Field is vector. + const auto& vectorField = field; + + auto vectorIndex = 0; + for (const auto& componentFieldMetadata : componentFieldMetadataVector) { + + // Get or create field. + auto componentFieldName = std::string{}; + componentFieldMetadata.get("name", componentFieldName); + const auto componentFieldConfig = + option::name(componentFieldName) | + option::levels(vectorField.levels()) | + option::datatype(vectorField.datatype()); + auto& componentField = getOrCreateField( + unpackedFields, vectorField.functionspace(), componentFieldConfig); + + // Copy field data. + const auto copier = [&](auto&& componentElem, auto&& vectorElem) { + componentElem = vectorElem(vectorIndex); + }; + copyFieldData(componentField, vectorField, copier); + + // Copy metadata. + componentField.metadata() = componentFieldMetadata; + + ++vectorIndex; + } + } + return unpackedFields; +} + +} // namespace util +} // namespace atlas diff --git a/src/atlas/util/PackVectorFields.h b/src/atlas/util/PackVectorFields.h new file mode 100644 index 000000000..fc77da6c5 --- /dev/null +++ b/src/atlas/util/PackVectorFields.h @@ -0,0 +1,40 @@ +/* + * (C) Crown Copyright 2024 Met Office + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + */ + +#pragma once + +#include "atlas/field.h" + +namespace eckit { +class LocalConfiguration; +} + +namespace atlas { +namespace util { + +/// @brief Packs vector field components into vector fields +/// +/// @details Iterates through @param fields and creates vector fields from any +/// component field with the "vector name" string metadata. These, as +/// well as any present scalar fields, are added to the return-value +/// field set. +/// Note, a mutable @param packedFields field set can be supplied if +/// one needs to guarantee the order of the packed fields +FieldSet pack_vector_fields(const FieldSet& fields, + FieldSet packedFields = FieldSet{}); + +/// @brief Unpacks vector field into vector field components. +/// +/// @details Undoes "pack" operation when a set of packed fields are supplied +/// as @param fields. A mutable @param unpackedFields field set can be +/// supplied if one needs to guarantee the order of the unpacked +/// fields. +FieldSet unpack_vector_fields(const FieldSet& fields, + FieldSet unpackedFields = FieldSet{}); + +} // namespace util +} // namespace atlas diff --git a/src/atlas_acc_support/CMakeLists.txt b/src/atlas_acc_support/CMakeLists.txt index ffe144a37..7bcf8ac06 100644 --- a/src/atlas_acc_support/CMakeLists.txt +++ b/src/atlas_acc_support/CMakeLists.txt @@ -32,7 +32,6 @@ if( atlas_HAVE_ACC ) ecbuild_add_library( TARGET atlas_acc_support SOURCES atlas_acc_map_data.c ) target_compile_options( atlas_acc_support PRIVATE ${ACC_C_FLAGS} ) target_link_libraries( atlas_acc_support PRIVATE ${ACC_C_FLAGS} ) - endif() endif() diff --git a/src/atlas_acc_support/atlas_acc_map_data.c b/src/atlas_acc_support/atlas_acc_map_data.c index 6b8a148e7..826579686 100644 --- a/src/atlas_acc_support/atlas_acc_map_data.c +++ b/src/atlas_acc_support/atlas_acc_map_data.c @@ -17,12 +17,111 @@ #endif #include +#include -void atlas_acc_map_data(void* cpu_ptr, void* gpu_ptr, unsigned long size) { - acc_map_data(cpu_ptr, gpu_ptr, size); +#include "atlas_acc_map_data.h" + +void atlas_acc_map_data(void* cpu_ptr, void* gpu_ptr, unsigned long bytes) { + acc_map_data(cpu_ptr, gpu_ptr, bytes); } void atlas_acc_unmap_data(void* cpu_ptr) { acc_unmap_data(cpu_ptr); } + +int atlas_acc_is_present(void* cpu_ptr, unsigned long bytes) { + return acc_is_present(cpu_ptr, bytes); +} + +void* atlas_acc_deviceptr(void* cpu_ptr) { + return acc_deviceptr(cpu_ptr); +} + +atlas_acc_device_t atlas_acc_get_device_type() { + acc_device_t device_type = acc_get_device_type(); + if (device_type == acc_device_host || device_type == acc_device_none) { + return atlas_acc_device_host; + } + return atlas_acc_device_not_host; +} + +#define BUFFER_PRINTF(buffer, ...) snprintf(buffer + strlen(buffer), sizeof(buffer) - strlen(buffer), __VA_ARGS__); + +const char* atlas_acc_version_str() { + static char buffer[16]; + if( strlen(buffer) != 0 ) { + return buffer; + } + + BUFFER_PRINTF(buffer, "%i", _OPENACC); + switch( _OPENACC ) { + case 201111: BUFFER_PRINTF(buffer, " (1.0)"); break; + case 201306: BUFFER_PRINTF(buffer, " (2.0)"); break; + case 201308: BUFFER_PRINTF(buffer, " (2.0)"); break; + case 201510: BUFFER_PRINTF(buffer, " (2.5)"); break; + case 201711: BUFFER_PRINTF(buffer, " (2.6)"); break; + default: break; + } + return buffer; +} +const char* atlas_acc_info_str() { + static char buffer[1024]; + if( strlen(buffer) != 0 ) { + return buffer; + } + // Platform information + acc_device_t devicetype = acc_get_device_type(); + int num_devices = acc_get_num_devices(devicetype); + BUFFER_PRINTF(buffer, " OpenACC version: %s\n", atlas_acc_version_str()); + if (devicetype == acc_device_host ) { + BUFFER_PRINTF(buffer, " No OpenACC GPU devices available\n"); + return buffer; + } + BUFFER_PRINTF(buffer, " Number of OpenACC devices: %i\n", num_devices); + int device_num = acc_get_device_num(devicetype); + BUFFER_PRINTF(buffer, " OpenACC Device number: %i\n", device_num); + BUFFER_PRINTF(buffer, " OpenACC acc_device_t (enum value): %d", devicetype); + switch( devicetype ) { + case acc_device_host: BUFFER_PRINTF(buffer, " (acc_device_host)"); break; + case acc_device_none: BUFFER_PRINTF(buffer, " (acc_device_none)"); break; + case acc_device_not_host: BUFFER_PRINTF(buffer, " (acc_device_not_host)"); break; + case acc_device_nvidia: BUFFER_PRINTF(buffer, " (acc_device_nvidia)"); break; + default: break; + } + BUFFER_PRINTF(buffer, "\n"); + + // acc_get_property, acc_get_property_string introduced with OpenACC 2.6 +#if _OPENACC >= 201711 + long int mem = acc_get_property( device_num, devicetype, acc_property_memory); + long int free_mem = acc_get_property( device_num, devicetype, acc_property_free_memory); + const char *property_name = acc_get_property_string(device_num, devicetype, acc_property_name); + const char *property_vendor = acc_get_property_string(device_num, devicetype, acc_property_vendor ); + const char *property_driver = acc_get_property_string(device_num, devicetype, acc_property_driver ); + // if (mem > 0) + { + BUFFER_PRINTF(buffer, " Memory on OpenACC device: %li\n", mem); + } + // if (free_mem > 0) + { + BUFFER_PRINTF(buffer, " Free Memory on OpenACC device: %li\n", free_mem); + } + // if (property_name != NULL) + { + BUFFER_PRINTF(buffer, " OpenACC device name: %s\n", property_name); + } + // if (property_vendor != NULL) + { + BUFFER_PRINTF(buffer, " OpenACC device vendor: %s\n", property_vendor); + } + // if (property_driver != NULL) + { + BUFFER_PRINTF(buffer, " OpenACC device driver: %s\n", property_driver); + } +#endif + return buffer; +} + +int atlas_acc_get_num_devices() { + return acc_get_num_devices(acc_get_device_type()); +} diff --git a/src/atlas_acc_support/atlas_acc_map_data.h b/src/atlas_acc_support/atlas_acc_map_data.h index 4fa1208c2..ba5990968 100644 --- a/src/atlas_acc_support/atlas_acc_map_data.h +++ b/src/atlas_acc_support/atlas_acc_map_data.h @@ -14,8 +14,24 @@ extern "C" { #endif -void atlas_acc_map_data(void* cpu_ptr, void* gpu_ptr, unsigned long size); +typedef enum { + atlas_acc_device_host = 0, + atlas_acc_device_not_host = 1 +} atlas_acc_device_t; + +void atlas_acc_map_data(void* cpu_ptr, void* gpu_ptr, unsigned long bytes); void atlas_acc_unmap_data(void* cpu_ptr); +int atlas_acc_is_present(void* cpu_ptr, unsigned long bytes); +void* atlas_acc_deviceptr(void* cpu_ptr); +atlas_acc_device_t atlas_acc_get_device_type(); + +int atlas_acc_devices(); + +const char* atlas_acc_version_str(); + +const char* atlas_acc_info_str(); + +int atlas_acc_get_num_devices(); #ifdef __cplusplus } diff --git a/src/atlas_f/CMakeLists.txt b/src/atlas_f/CMakeLists.txt index 737a5b590..2e9a97e31 100644 --- a/src/atlas_f/CMakeLists.txt +++ b/src/atlas_f/CMakeLists.txt @@ -277,6 +277,14 @@ ecbuild_add_library( TARGET atlas_f $ ) +if( HAVE_ACC ) + target_link_options( atlas_f INTERFACE + $<$:SHELL:-acc=gpu> + $<$:SHELL:-acc=gpu> + $<$:SHELL:-acc=gpu> + $<$:SHELL:-acc=gpu> ) +endif() + fckit_target_preprocess_fypp( atlas_f DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/atlas_f.h.in diff --git a/src/atlas_f/atlas_f.h.in b/src/atlas_f/atlas_f.h.in index cea48ee0e..3511bca12 100644 --- a/src/atlas_f/atlas_f.h.in +++ b/src/atlas_f/atlas_f.h.in @@ -18,6 +18,7 @@ #define ATLAS_HAVE_OMP @atlas_HAVE_OMP_Fortran@ #define ATLAS_HAVE_ACC @atlas_HAVE_ACC@ +#define ATLAS_HAVE_GPU @atlas_HAVE_GPU@ #define ATLAS_BITS_GLOBAL @ATLAS_BITS_GLOBAL@ #define ATLAS_BITS_LOCAL @ATLAS_BITS_LOCAL@ diff --git a/src/atlas_f/util/atlas_allocate_module.F90 b/src/atlas_f/util/atlas_allocate_module.F90 index 3cc1d3a7a..1bda17e81 100644 --- a/src/atlas_f/util/atlas_allocate_module.F90 +++ b/src/atlas_f/util/atlas_allocate_module.F90 @@ -231,7 +231,7 @@ subroutine atlas_deallocate_managedmem_real64_r1( A ) use, intrinsic :: iso_c_binding use atlas_allocate_c_binding real(c_double), pointer :: a(:) - call atlas__deallocate_managedmem( c_loc_real64(A(1)) ) + call atlas__deallocate_managedmem_double( c_loc_real64(A(1)), size(A,KIND=c_size_t) ) nullify( a ) end subroutine @@ -239,7 +239,7 @@ subroutine atlas_deallocate_managedmem_real32_r1( A ) use, intrinsic :: iso_c_binding use atlas_allocate_c_binding real(c_float), pointer :: a(:) - call atlas__deallocate_managedmem( c_loc_real32(A(1)) ) + call atlas__deallocate_managedmem_float( c_loc_real32(A(1)), size(A,KIND=c_size_t) ) nullify( a ) end subroutine @@ -247,7 +247,7 @@ subroutine atlas_deallocate_managedmem_int32_r1( A ) use, intrinsic :: iso_c_binding use atlas_allocate_c_binding integer(c_int), pointer :: a(:) - call atlas__deallocate_managedmem( c_loc_int32(A(1)) ) + call atlas__deallocate_managedmem_int( c_loc_int32(A(1)), size(A,KIND=c_size_t) ) nullify( a ) end subroutine @@ -255,7 +255,7 @@ subroutine atlas_deallocate_managedmem_int64_r1( A ) use, intrinsic :: iso_c_binding use atlas_allocate_c_binding integer(c_long), pointer :: a(:) - call atlas__deallocate_managedmem( c_loc_int64(A(1)) ) + call atlas__deallocate_managedmem_long( c_loc_int64(A(1)), size(A,KIND=c_size_t) ) nullify( a ) end subroutine @@ -263,7 +263,7 @@ subroutine atlas_deallocate_managedmem_real64_r2( A ) use, intrinsic :: iso_c_binding use atlas_allocate_c_binding real(c_double), pointer :: a(:,:) - call atlas__deallocate_managedmem( c_loc_real64(A(1,1)) ) + call atlas__deallocate_managedmem_double( c_loc_real64(A(1,1)), size(A,KIND=c_size_t) ) nullify( a ) end subroutine @@ -271,7 +271,7 @@ subroutine atlas_deallocate_managedmem_real32_r2( A ) use, intrinsic :: iso_c_binding use atlas_allocate_c_binding real(c_float), pointer :: a(:,:) - call atlas__deallocate_managedmem( c_loc_real32(A(1,1)) ) + call atlas__deallocate_managedmem_float( c_loc_real32(A(1,1)), size(A,KIND=c_size_t) ) nullify( a ) end subroutine @@ -279,7 +279,7 @@ subroutine atlas_deallocate_managedmem_int32_r2( A ) use, intrinsic :: iso_c_binding use atlas_allocate_c_binding integer(c_int), pointer :: a(:,:) - call atlas__deallocate_managedmem( c_loc_int32(A(1,1)) ) + call atlas__deallocate_managedmem_int( c_loc_int32(A(1,1)), size(A,KIND=c_size_t) ) nullify( a ) end subroutine @@ -287,7 +287,7 @@ subroutine atlas_deallocate_managedmem_int64_r2( A ) use, intrinsic :: iso_c_binding use atlas_allocate_c_binding integer(c_long), pointer :: a(:,:) - call atlas__deallocate_managedmem( c_loc_int64(A(1,1)) ) + call atlas__deallocate_managedmem_long( c_loc_int64(A(1,1)), size(A,KIND=c_size_t) ) nullify( a ) end subroutine diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 33da2af95..cbbe7536a 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -38,29 +38,33 @@ endif() ###################################################### -# Macro atlas_add_cuda_test -# Envisioned to become part of ecbuild_add_test with a CUDA option -# in a future ecbuild release -macro( atlas_add_cuda_test ) +# Macro atlas_add_hic_test +macro( atlas_add_hic_test ) set( options "" ) set( single_value_args TARGET ) set( multi_value_args SOURCES LIBS ENVIRONMENT ) cmake_parse_arguments( _PAR "${options}" "${single_value_args}" "${multi_value_args}" ${_FIRST_ARG} ${ARGN} ) if(_PAR_UNPARSED_ARGUMENTS) - ecbuild_critical("Unknown keywords given to atlas_add_cuda_test(): \"${_PAR_UNPARSED_ARGUMENTS}\"") + ecbuild_critical("Unknown keywords given to atlas_add_hic_test(): \"${_PAR_UNPARSED_ARGUMENTS}\"") endif() if( HAVE_CUDA AND HAVE_TESTS ) - ecbuild_debug("atlas_add_cuda_test: Adding test ${_PAR_TARGET}") + ecbuild_debug("atlas_add_hic_test: Adding test ${_PAR_TARGET}") list( APPEND _PAR_ENVIRONMENT "ATLAS_RUN_NGPUS=1" ) list( REMOVE_DUPLICATES _PAR_ENVIRONMENT ) list( APPEND _libs ${_PAR_LIBS} ) if( _libs ) - ecbuild_debug("atlas_add_cuda_test: Test ${_PAR_TARGET} explicitly links with libraries ${_libs}") + ecbuild_debug("atlas_add_hic_test: Test ${_PAR_TARGET} explicitly links with libraries ${_libs}") + endif() + + if( HAVE_CUDA ) + set_source_files_properties(${_PAR_SOURCES} PROPERTIES LANGUAGE CUDA) + elseif( HAVE_HIP ) + set_source_files_properties(${_PAR_SOURCES} PROPERTIES LANGUAGE HIP) endif() ecbuild_add_test( TARGET ${_PAR_TARGET} @@ -72,10 +76,6 @@ macro( atlas_add_cuda_test ) endif() endmacro() -if( HAVE_CUDA ) - set( ATLAS_TEST_ENVIRONMENT "ATLAS_RUN_NGPUS=1" ) -endif() - add_subdirectory( util ) add_subdirectory( runtime ) diff --git a/src/tests/acc/CMakeLists.txt b/src/tests/acc/CMakeLists.txt index 768805fff..0e323611f 100644 --- a/src/tests/acc/CMakeLists.txt +++ b/src/tests/acc/CMakeLists.txt @@ -20,7 +20,7 @@ if( HAVE_CUDA AND HAVE_TESTS AND HAVE_FCTEST AND HAVE_ACC ) LINKER_LANGUAGE Fortran ENVIRONMENT ${ATLAS_TEST_ENVIRONMENT} ATLAS_RUN_NGPUS=1 ) - target_link_libraries( atlas_test_unified_memory_with_openacc ${ACC_Fortran_FLAGS} CUDA::cudart ) + target_link_libraries( atlas_test_unified_memory_with_openacc ${ACC_Fortran_FLAGS} hic ) set_tests_properties( atlas_test_unified_memory_with_openacc PROPERTIES LABELS "gpu;acc") diff --git a/src/tests/acc/fctest_unified_memory_with_openacc_cxx.cc b/src/tests/acc/fctest_unified_memory_with_openacc_cxx.cc index 47c0e7dc5..eb251572b 100644 --- a/src/tests/acc/fctest_unified_memory_with_openacc_cxx.cc +++ b/src/tests/acc/fctest_unified_memory_with_openacc_cxx.cc @@ -8,10 +8,10 @@ * nor does it submit to any jurisdiction. */ -#include +#include "hic/hic.h" extern "C" { void allocate_unified_impl(double** a, int N) { - cudaMallocManaged(a, N * sizeof(double)); + hicMallocManaged(a, N * sizeof(double)); } } diff --git a/src/tests/array/CMakeLists.txt b/src/tests/array/CMakeLists.txt index 3a5583223..3915caca7 100644 --- a/src/tests/array/CMakeLists.txt +++ b/src/tests/array/CMakeLists.txt @@ -61,22 +61,22 @@ if( CMAKE_BUILD_TYPE MATCHES "DEBUG" ) set ( CMAKE_NVCC_FLAGS "-G" ) endif() -atlas_add_cuda_test( +atlas_add_hic_test( TARGET atlas_test_array_kernel - SOURCES test_array_kernel.cu + SOURCES test_array_kernel.hic LIBS atlas ENVIRONMENT ${ATLAS_TEST_ENVIRONMENT} ) -atlas_add_cuda_test( +atlas_add_hic_test( TARGET atlas_test_vector_kernel - SOURCES test_vector_kernel.cu + SOURCES test_vector_kernel.hic LIBS atlas ENVIRONMENT ${ATLAS_TEST_ENVIRONMENT} ) -atlas_add_cuda_test( +atlas_add_hic_test( TARGET atlas_test_svector_kernel - SOURCES test_svector_kernel.cu + SOURCES test_svector_kernel.hic LIBS atlas ENVIRONMENT ${ATLAS_TEST_ENVIRONMENT} ) diff --git a/src/tests/array/test_array.cc b/src/tests/array/test_array.cc index 9ce8cbec9..290e47e0e 100644 --- a/src/tests/array/test_array.cc +++ b/src/tests/array/test_array.cc @@ -19,6 +19,8 @@ #include "atlas/array/gridtools/GridToolsMakeView.h" #endif +#include "hic/hic.h" + using namespace atlas::array; namespace atlas { @@ -553,10 +555,29 @@ CASE("test_wrap") { EXPECT(view(2) == 19); } +static int devices() { + static int devices_ = [](){ + int n = 0; + auto err = hicGetDeviceCount(&n); + if (err != hicSuccess) { + n = 0; + static_cast(hicGetLastError()); + } + return n; + }(); + return devices_; +} + CASE("test_acc_map") { Array* ds = Array::create(2, 3, 4); - EXPECT_NO_THROW(ds->accMap()); - EXPECT(ds->accMapped() == ATLAS_HAVE_ACC); + EXPECT_NO_THROW(ds->allocateDevice()); + if( ds->deviceAllocated() ) { + EXPECT_NO_THROW(ds->accMap()); + EXPECT_EQ(ds->accMapped(), std::min(devices(),1)); + } + else { + Log::warning() << "WARNING: Array could not be allocated on device, so acc_map could not be tested" << std::endl; + } delete ds; } diff --git a/src/tests/array/test_array_kernel.cu b/src/tests/array/test_array_kernel.hic similarity index 92% rename from src/tests/array/test_array_kernel.cu rename to src/tests/array/test_array_kernel.hic index 8f3ba678a..14935f46c 100644 --- a/src/tests/array/test_array_kernel.cu +++ b/src/tests/array/test_array_kernel.hic @@ -8,7 +8,7 @@ * does it submit to any jurisdiction. */ -#include +#include "hic/hic.h" #include "tests/AtlasTestEnvironment.h" #include "atlas/array.h" #include "atlas/array/MakeView.h" @@ -21,10 +21,10 @@ namespace test { #define REQUIRE_CUDA_SUCCESS(msg) \ do { \ - cudaError_t err = cudaPeekAtLastError(); \ - if (err != cudaSuccess ) { \ + hicError_t err = hicPeekAtLastError(); \ + if (err != hicSuccess ) { \ throw eckit::testing::TestException("REQUIRE_CUDA_SUCCESS ["+std::string(msg)+"] failed:\n"\ - + std::string(cudaGetErrorString(err)) , Here()); \ + + std::string(hicGetErrorString(err)) , Here()); \ } \ } while(false) @@ -70,7 +70,7 @@ CASE( "test_array" ) REQUIRE_CUDA_SUCCESS("kernel_ex"); - cudaDeviceSynchronize(); + hicDeviceSynchronize(); ds->updateHost(); @@ -108,7 +108,7 @@ CASE( "test_array_loop" ) REQUIRE_CUDA_SUCCESS("loop_kernel_ex"); - cudaDeviceSynchronize(); + hicDeviceSynchronize(); ds->updateHost(); diff --git a/src/tests/array/test_svector_kernel.cu b/src/tests/array/test_svector_kernel.hic similarity index 84% rename from src/tests/array/test_svector_kernel.cu rename to src/tests/array/test_svector_kernel.hic index 79631133b..0865e34ff 100644 --- a/src/tests/array/test_svector_kernel.cu +++ b/src/tests/array/test_svector_kernel.hic @@ -8,7 +8,7 @@ * does it submit to any jurisdiction. */ -#include +#include "hic/hic.h" #include "atlas/library/config.h" #include "tests/AtlasTestEnvironment.h" @@ -45,17 +45,17 @@ CASE( "test_svector" ) EXPECT( list_ints.size() == 2); bool *result; - cudaError_t err = cudaMallocManaged(&result, sizeof(bool)); + hicError_t err = hicMallocManaged(&result, sizeof(bool)); - if(err != cudaSuccess) + if(err != hicSuccess) throw_AssertionFailed("failed to allocate GPU memory"); *result=true; kernel_exe<<<1,1>>>(list_ints.data(), list_ints.size(), 0, result); - cudaDeviceSynchronize(); + hicDeviceSynchronize(); - err = cudaGetLastError(); - if(err != cudaSuccess) + err = hicGetLastError(); + if(err != hicSuccess) throw_AssertionFailed("failed to execute kernel"); EXPECT( *result ); @@ -81,9 +81,9 @@ CASE( "test_svector_resize" ) EXPECT( list_ints.size() == 5); bool *result; - cudaError_t err = cudaMallocManaged(&result, sizeof(bool)); + hicError_t err = hicMallocManaged(&result, sizeof(bool)); - if(err != cudaSuccess) + if(err != hicSuccess) throw_AssertionFailed("failed to allocate GPU memory"); *result=true; @@ -92,10 +92,10 @@ CASE( "test_svector_resize" ) list_ints[4] = 4; kernel_exe<<<1,1>>>(list_ints.data(), list_ints.size(), 3, result); - cudaDeviceSynchronize(); + hicDeviceSynchronize(); - err = cudaGetLastError(); - if(err != cudaSuccess) + err = hicGetLastError(); + if(err != hicSuccess) throw_AssertionFailed("failed to execute kernel"); EXPECT( *result ); diff --git a/src/tests/array/test_vector_kernel.cu b/src/tests/array/test_vector_kernel.hic similarity index 89% rename from src/tests/array/test_vector_kernel.cu rename to src/tests/array/test_vector_kernel.hic index 18b0eb2b4..dc8442fbf 100644 --- a/src/tests/array/test_vector_kernel.cu +++ b/src/tests/array/test_vector_kernel.hic @@ -8,7 +8,7 @@ * does it submit to any jurisdiction. */ -#include +#include "hic/hic.h" #include "tests/AtlasTestEnvironment.h" @@ -85,13 +85,13 @@ CASE( "test_vector_kernel" ) VectorView list_ints_d = make_device_vector_view(list_ints); VectorView* list_ints_dp; - cudaMalloc((void**)(&list_ints_dp), sizeof(VectorView)); + hicMalloc((void**)(&list_ints_dp), sizeof(VectorView)); - cudaMemcpy(list_ints_dp, &list_ints_d, sizeof(VectorView), cudaMemcpyHostToDevice); + hicMemcpy(list_ints_dp, &list_ints_d, sizeof(VectorView), hicMemcpyHostToDevice); kernel_ex<<<1,1>>>(list_ints_dp); - if( cudaPeekAtLastError() != cudaSuccess) std::cout << "ERROR " << std::endl; + if( hicPeekAtLastError() != hicSuccess) std::cout << "ERROR " << std::endl; list_ints.updateHost(); EXPECT( list_ints_h[0]->val_ == 8 ); diff --git a/src/tests/field/fctest_field_host.F90 b/src/tests/field/fctest_field_host.F90 index 955dd159c..fcc2ed690 100644 --- a/src/tests/field/fctest_field_host.F90 +++ b/src/tests/field/fctest_field_host.F90 @@ -10,6 +10,7 @@ ! @author Willem Deconinck #include "fckit/fctest.h" +#include "atlas/atlas_f.h" ! ----------------------------------------------------------------------------- @@ -49,7 +50,9 @@ module fcta_Field_fxt call field%data(view) FCTEST_CHECK( .not. field%host_needs_update() ) +#if ! ATLAS_HAVE_GPU FCTEST_CHECK( .not. field%device_needs_update() ) +#endif call field%update_device() FCTEST_CHECK( .not. field%device_needs_update() ) diff --git a/src/tests/field/test_field_acc.cc b/src/tests/field/test_field_acc.cc index 99e2343b3..e998f5dda 100644 --- a/src/tests/field/test_field_acc.cc +++ b/src/tests/field/test_field_acc.cc @@ -12,7 +12,7 @@ #error This file needs to be compiled with OpenACC support #endif -#include +#include "hic/hic.h" #include #include "atlas/field/Field.h" @@ -35,10 +35,10 @@ CASE("test_acc") { *c_ptr = 5; int* d_ptr; - cudaMalloc(&d_ptr, sizeof(int)); + hicMalloc(&d_ptr, sizeof(int)); acc_map_data(c_ptr, d_ptr, sizeof(int)); - cudaMemcpy(d_ptr, c_ptr, sizeof(int), cudaMemcpyHostToDevice); + hicMemcpy(d_ptr, c_ptr, sizeof(int), hicMemcpyHostToDevice); #pragma acc kernels present(c_ptr) { @@ -47,7 +47,7 @@ CASE("test_acc") { EXPECT_EQ( *c_ptr, 5. ); - cudaMemcpy(c_ptr, d_ptr, sizeof(int), cudaMemcpyDeviceToHost); + hicMemcpy(c_ptr, d_ptr, sizeof(int), hicMemcpyDeviceToHost); EXPECT_EQ( *c_ptr, 2. ); } diff --git a/src/tests/interpolation/CMakeLists.txt b/src/tests/interpolation/CMakeLists.txt index 82005b229..a5ac0e86a 100644 --- a/src/tests/interpolation/CMakeLists.txt +++ b/src/tests/interpolation/CMakeLists.txt @@ -75,6 +75,14 @@ ecbuild_add_executable( TARGET atlas_test_interpolation_structured2D NOINSTALL ) +ecbuild_add_test( TARGET atlas_test_interpolation_structured2D_regional + SOURCES test_interpolation_structured2D_regional.cc + LIBS atlas + MPI 2 + CONDITION eckit_HAVE_MPI + ENVIRONMENT ${ATLAS_TEST_ENVIRONMENT} +) + ecbuild_add_test( TARGET atlas_test_interpolation_non_linear SOURCES test_interpolation_non_linear.cc LIBS atlas diff --git a/src/tests/interpolation/test_interpolation_k_nearest_neighbours.cc b/src/tests/interpolation/test_interpolation_k_nearest_neighbours.cc index 0ce3cd190..641005fb6 100644 --- a/src/tests/interpolation/test_interpolation_k_nearest_neighbours.cc +++ b/src/tests/interpolation/test_interpolation_k_nearest_neighbours.cc @@ -1,9 +1,5 @@ /* -<<<<<<< HEAD - * (C) Copyright 1996- ECMWF. -======= * (C) Copyright 2013 ECMWF. ->>>>>>> develop * * This software is licensed under the terms of the Apache Licence Version 2.0 * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. diff --git a/src/tests/interpolation/test_interpolation_spherical_vector.cc b/src/tests/interpolation/test_interpolation_spherical_vector.cc index 95281a880..16dbdabf9 100644 --- a/src/tests/interpolation/test_interpolation_spherical_vector.cc +++ b/src/tests/interpolation/test_interpolation_spherical_vector.cc @@ -95,6 +95,14 @@ struct FunctionSpaceFixtures { {"gaussian_mesh", generateNodeColums("O48", "structured")}, {"structured_columns", functionspace::StructuredColumns(Grid("O48"), option::halo(1))}, + {"structured_columns_classic", + functionspace::StructuredColumns(Grid("F48"), option::halo(1))}, + {"structured_columns_classic_halo2", + functionspace::StructuredColumns(Grid("F48"), option::halo(2))}, + {"structured_columns_classic_highres_halo2", + functionspace::StructuredColumns(Grid("F96"), option::halo(2))}, + {"structured_columns_halo2", + functionspace::StructuredColumns(Grid("O48"), option::halo(2))}, {"structured_columns_lowres", functionspace::StructuredColumns(Grid("O24"), option::halo(1))}, {"structured_columns_hires", @@ -126,7 +134,9 @@ struct InterpSchemeFixtures { static const auto structuredLinear = option::type("structured-linear2D") | option::halo(1) | Config("adjoint", true); - + static const auto structuredCubic = option::type("structured-bicubic") | + option::halo(2) | + Config("adjoint", true); static const auto sphericalVector = option::type("spherical-vector") | Config("adjoint", true); @@ -134,12 +144,15 @@ struct InterpSchemeFixtures { {"cubedsphere_bilinear", cubedsphereBilinear}, {"finite_element", finiteElement}, {"structured_linear", structuredLinear}, + {"structured_cubic", structuredCubic}, {"cubedsphere_bilinear_spherical", sphericalVector | Config("scheme", cubedsphereBilinear)}, {"finite_element_spherical", sphericalVector | Config("scheme", finiteElement)}, {"structured_linear_spherical", - sphericalVector | Config("scheme", structuredLinear)}}; + sphericalVector | Config("scheme", structuredLinear)}, + {"structured_cubic_spherical", + sphericalVector | Config("scheme", structuredCubic)}}; return interpSchemes.at(fixture); } }; @@ -254,9 +267,9 @@ void testInterpolation(const Config& config) { calcError(targetColumn, errorColumn); } else if constexpr (Rank == 3) { - ArrayForEach<0>::apply(std::tie(targetColumn, errorColumn), - calcError); - } + ArrayForEach<0>::apply(std::tie(targetColumn, errorColumn), + calcError); + } }); EXPECT_APPROX_EQ(maxError, 0., config.getDouble("tol")); @@ -296,7 +309,8 @@ void testInterpolation(const Config& config) { } } -CASE("cubed sphere vector interpolation (3d-field, 2-vector)") { + +CASE("cubed sphere CS-LFR-48 vector interpolation (3d-field, 2-vector)") { const auto config = Config("source_fixture", "cubedsphere_mesh") .set("target_fixture", "gaussian_mesh") @@ -308,7 +322,7 @@ CASE("cubed sphere vector interpolation (3d-field, 2-vector)") { testInterpolation((config)); } -CASE("cubed sphere vector interpolation (3d-field, 3-vector)") { +CASE("cubed sphere CS-LFR-48 vector interpolation (3d-field, 3-vector)") { const auto config = Config("source_fixture", "cubedsphere_mesh") .set("target_fixture", "gaussian_mesh") @@ -320,6 +334,36 @@ CASE("cubed sphere vector interpolation (3d-field, 3-vector)") { testInterpolation((config)); } +CASE("cubed sphere CS-LFR-48 (spherical vector) to empty point cloud") { + const auto config = + Config("source_fixture", "cubedsphere_mesh") + .set("target_fixture", "empty_point_cloud") + .set("field_spec_fixture", "2vector") + .set("interp_fixture", "cubedsphere_bilinear_spherical"); + + testInterpolation((config)); +} + +CASE("cubed sphere CS-LFR-48 to empty point cloud") { + const auto config = + Config("source_fixture", "cubedsphere_mesh") + .set("target_fixture", "empty_point_cloud") + .set("field_spec_fixture", "2vector") + .set("interp_fixture", "cubedsphere_bilinear"); + + testInterpolation((config)); +} + + +CASE("finite element to empty point cloud") { + const auto config = Config("source_fixture", "gaussian_mesh") + .set("target_fixture", "cubedsphere_mesh") + .set("field_spec_fixture", "2vector") + .set("interp_fixture", "finite_element"); + + testInterpolation((config)); +} + CASE("finite element vector interpolation (2d-field, 2-vector)") { const auto config = Config("source_fixture", "gaussian_mesh") .set("target_fixture", "cubedsphere_mesh") @@ -331,60 +375,77 @@ CASE("finite element vector interpolation (2d-field, 2-vector)") { testInterpolation((config)); } -CASE("structured columns vector interpolation (2d-field, 2-vector)") { - const auto config = Config("source_fixture", "structured_columns") - .set("target_fixture", "cubedsphere_mesh") - .set("field_spec_fixture", "2vector") - .set("interp_fixture", "structured_linear_spherical") - .set("file_id", "spherical_vector_sc") - .set("tol", 0.00017); +CASE("structured columns F48 cubic vector spherical interpolation (3d-field, 2-vector)") { + const auto config = + Config("source_fixture", "structured_columns_classic_halo2") + .set("target_fixture", "cubedsphere_mesh") + .set("field_spec_fixture", "2vector") + .set("interp_fixture", "structured_cubic_spherical") + .set("file_id", "spherical_cubic_vector_classic_sc") + .set("tol", 0.0000082); - testInterpolation((config)); + testInterpolation((config)); } -CASE("structured columns vector interpolation (2d-field, 2-vector, low-res)") { - const auto config = Config("source_fixture", "structured_columns_lowres") - .set("target_fixture", "gaussian_mesh") - .set("field_spec_fixture", "2vector") - .set("interp_fixture", "structured_linear_spherical") - .set("file_id", "spherical_vector_sc_lr") - .set("tol", 0.00056); +CASE("structured columns F96 cubic vector spherical interpolation (2d-field, 2-vector)") { + const auto config = + Config("source_fixture", "structured_columns_classic_highres_halo2") + .set("target_fixture", "cubedsphere_mesh") + .set("field_spec_fixture", "2vector") + .set("interp_fixture", "structured_cubic_spherical") + .set("file_id", "spherical_2D_cubic_vector_highres_classic_sc") + .set("tol", 0.000000425); testInterpolation((config)); } -CASE("structured columns vector interpolation (2d-field, 2-vector, hi-res)") { - const auto config = Config("source_fixture", "structured_columns_hires") +CASE("structured columns F96 cubic vector spherical interpolation (3d-field, 2-vector)") { + const auto config = + Config("source_fixture", "structured_columns_classic_highres_halo2") + .set("target_fixture", "cubedsphere_mesh") + .set("field_spec_fixture", "2vector") + .set("interp_fixture", "structured_cubic_spherical") + .set("file_id", "spherical_3D_cubic_vector_highres_classic_sc") + .set("tol", 0.00000085); + + testInterpolation((config)); +} + +CASE("structured columns O24 linear vector interpolation (2d-field, 2-vector)") { + const auto config = Config("source_fixture", "structured_columns_lowres") .set("target_fixture", "gaussian_mesh") .set("field_spec_fixture", "2vector") .set("interp_fixture", "structured_linear_spherical") - .set("file_id", "spherical_vector_sc_hr") - .set("tol", 0.000044); + .set("file_id", "spherical_vector_sc_lr") + .set("tol", 0.00056); testInterpolation((config)); } -CASE("cubed sphere (spherical vector) to empty point cloud") { - const auto config = - Config("source_fixture", "cubedsphere_mesh") - .set("target_fixture", "empty_point_cloud") - .set("field_spec_fixture", "2vector") - .set("interp_fixture", "cubedsphere_bilinear_spherical"); +CASE("structured columns O48 cubic vector spherical interpolation (3d-field, 2-vector)") { + const auto config = + Config("source_fixture", "structured_columns_halo2") + .set("target_fixture", "cubedsphere_mesh") + .set("field_spec_fixture", "2vector") + .set("interp_fixture", "structured_cubic_spherical") + .set("file_id", "spherical_cubic_vector_sc3") + .set("tol", 0.000007); - testInterpolation((config)); + testInterpolation((config)); } -CASE("cubed sphere to empty point cloud") { - const auto config = - Config("source_fixture", "cubedsphere_mesh") - .set("target_fixture", "empty_point_cloud") - .set("field_spec_fixture", "2vector") - .set("interp_fixture", "cubedsphere_bilinear"); +CASE("structured columns O48 linear vector interpolation (2d-field, 2-vector)") { + const auto config = Config("source_fixture", "structured_columns") + .set("target_fixture", "cubedsphere_mesh") + .set("field_spec_fixture", "2vector") + .set("interp_fixture", "structured_linear_spherical") + .set("file_id", "spherical_vector_sc") + .set("tol", 0.00017); testInterpolation((config)); } -CASE("structured columns to empty point cloud") { +CASE("structured columns O48 to empty point cloud") { const auto config = Config("source_fixture", "structured_columns") .set("target_fixture", "empty_point_cloud") .set("field_spec_fixture", "2vector") @@ -393,11 +454,13 @@ CASE("structured columns to empty point cloud") { testInterpolation((config)); } -CASE("finite element to empty point cloud") { - const auto config = Config("source_fixture", "gaussian_mesh") - .set("target_fixture", "cubedsphere_mesh") +CASE("structured columns O96 vector interpolation (2d-field, 2-vector, hi-res)") { + const auto config = Config("source_fixture", "structured_columns_hires") + .set("target_fixture", "gaussian_mesh") .set("field_spec_fixture", "2vector") - .set("interp_fixture", "finite_element"); + .set("interp_fixture", "structured_linear_spherical") + .set("file_id", "spherical_vector_sc_hr") + .set("tol", 0.000044); testInterpolation((config)); } diff --git a/src/tests/interpolation/test_interpolation_structured2D_regional.cc b/src/tests/interpolation/test_interpolation_structured2D_regional.cc new file mode 100644 index 000000000..8bba8692e --- /dev/null +++ b/src/tests/interpolation/test_interpolation_structured2D_regional.cc @@ -0,0 +1,217 @@ +/* + * (C) Copyright 2024 Meteorologisk Institutt + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + */ + +#include "atlas/array.h" +#include "atlas/field.h" +#include "atlas/functionspace/StructuredColumns.h" +#include "atlas/grid.h" +#include "atlas/interpolation.h" +#include "atlas/option.h" +#include "atlas/util/Config.h" + +#include "tests/AtlasTestEnvironment.h" + +using atlas::functionspace::StructuredColumns; +using atlas::util::Config; + +namespace atlas { +namespace test { + +//----------------------------------------------------------------------------- + +static Config gridConfigs() { + Config gridConfigs; + + Config projectionConfig; + projectionConfig.set("type", "lambert_conformal_conic"); + projectionConfig.set("latitude0", 56.3); + projectionConfig.set("longitude0", 0.0); + + Config gridConfig; + gridConfig.set("type", "regional"); + std::vector lonlat = {9.9, 56.3}; + gridConfig.set("lonlat(centre)", lonlat); + gridConfig.set("projection", projectionConfig); + + const size_t sourceNx = 21; + const size_t sourceNy = 31; + const double sourceDx = 8.0e3; + const double sourceDy = 9.0e3; + + const size_t xFactor = 4; + const size_t yFactor = 3; + + const size_t targetNx = (sourceNx-1)*xFactor+1; + const size_t targetNy = (sourceNy-1)*yFactor+1; + const double targetDx = static_cast(sourceNx-1)/static_cast(targetNx-1)*sourceDx; + const double targetDy = static_cast(sourceNy-1)/static_cast(targetNy-1)*sourceDy; + + gridConfig.set("nx", sourceNx); + gridConfig.set("ny", sourceNy); + gridConfig.set("dx", sourceDx); + gridConfig.set("dy", sourceDy); + gridConfigs.set("source", gridConfig); + gridConfigs.set("source normalization", (sourceNx-1)*(sourceNy-1)); + + gridConfig.set("nx", targetNx); + gridConfig.set("ny", targetNy); + gridConfig.set("dx", targetDx); + gridConfig.set("dy", targetDy); + gridConfigs.set("target", gridConfig); + gridConfigs.set("target normalization", (targetNx-1)*(targetNy-1)); + + return gridConfigs; +} + +// Dot product +double dotProd(const Field& field01, const Field& field02) { + double dprod{}; + + const size_t ndim = field01.levels() > 0 ? 2 : 1; + if (ndim == 1) { + const auto field01_view = array::make_view(field01); + const auto field02_view = array::make_view(field02); + for (idx_t i=0; i < field01_view.shape(0); ++i) { + dprod += field01_view(i) * field02_view(i); + } + } else if (ndim == 2) { + const auto field01_view = array::make_view(field01); + const auto field02_view = array::make_view(field02); + for (idx_t l=0; l < field01_view.shape(1); ++l) { + for (idx_t i=0; i < field01_view.shape(0); ++i) { + dprod += field01_view(i, l) * field02_view(i, l); + } + } + } + eckit::mpi::comm().allReduceInPlace(dprod, eckit::mpi::Operation::SUM); + + return dprod; +} + + +CASE("test_interpolation_structured2D_regional_1d") { + Grid sourceGrid(gridConfigs().getSubConfiguration("source")); + Grid targetGrid(gridConfigs().getSubConfiguration("target")); + + StructuredColumns sourceFs(sourceGrid, option::halo(1)); + StructuredColumns targetFs(targetGrid, option::halo(1)); + + Interpolation interpolation(Config("type", "regional-linear-2d"), sourceFs, targetFs); + + auto sourceField = sourceFs.createField(Config("name", "source")); + auto targetField = targetFs.createField(Config("name", "target")); + + // Accuracy test + const auto sourceIView = array::make_view(sourceFs.index_i()); + const auto sourceJView = array::make_view(sourceFs.index_j()); + auto sourceView = array::make_view(sourceField); + const auto sourceGhostView = atlas::array::make_view(sourceFs.ghost()); + sourceView.assign(0.0); + for (idx_t i = 0; i < sourceFs.size(); ++i) { + if (sourceGhostView(i) == 0) { + sourceView(i) = static_cast((sourceIView(i)-1)*(sourceJView(i)-1)) + /static_cast(gridConfigs().getInt("source normalization")); + } + } + + interpolation.execute(sourceField, targetField); + + const auto targetIView = array::make_view(targetFs.index_i()); + const auto targetJView = array::make_view(targetFs.index_j()); + const auto targetView = array::make_view(targetField); + const auto targetGhostView = atlas::array::make_view(targetFs.ghost()); + const double tolerance = 1.e-12; + for (idx_t i = 0; i < targetFs.size(); ++i) { + if (targetGhostView(i) == 0) { + const double targetTest = static_cast((targetIView(i)-1)*(targetJView(i)-1)) + /static_cast(gridConfigs().getInt("target normalization")); + EXPECT_APPROX_EQ(targetView(i), targetTest, tolerance); + } + } + + // Adjoint test + auto targetAdjoint = targetFs.createField(); + array::make_view(targetAdjoint).assign(array::make_view(targetField)); + targetAdjoint.adjointHaloExchange(); + + auto sourceAdjoint = sourceFs.createField(); + array::make_view(sourceAdjoint).assign(0.); + interpolation.execute_adjoint(sourceAdjoint, targetAdjoint); + + const auto yDotY = dotProd(targetField, targetField); + const auto xDotXAdj = dotProd(sourceField, sourceAdjoint); + + EXPECT_APPROX_EQ(yDotY / xDotXAdj, 1., 1e-14); +} + + +CASE("test_interpolation_structured2D_regional_2d") { + Grid sourceGrid(gridConfigs().getSubConfiguration("source")); + Grid targetGrid(gridConfigs().getSubConfiguration("target")); + + const idx_t nlevs = 2; + StructuredColumns sourceFs(sourceGrid, option::halo(1) | option::levels(nlevs)); + StructuredColumns targetFs(targetGrid, option::halo(1) | option::levels(nlevs)); + + Interpolation interpolation(Config("type", "regional-linear-2d"), sourceFs, targetFs); + + auto sourceField = sourceFs.createField(Config("name", "source")); + auto targetField = targetFs.createField(Config("name", "target")); + + // Accuracy test + const auto sourceIView = array::make_view(sourceFs.index_i()); + const auto sourceJView = array::make_view(sourceFs.index_j()); + auto sourceView = array::make_view(sourceField); + const auto sourceGhostView = atlas::array::make_view(sourceFs.ghost()); + sourceView.assign(0.0); + for (idx_t i = 0; i < sourceFs.size(); ++i) { + if (sourceGhostView(i) == 0) { + for (idx_t k = 0; k < nlevs; ++k) { + sourceView(i, k) = static_cast((sourceIView(i)-1)*(sourceJView(i)-1)) + /static_cast(gridConfigs().getInt("source normalization")); + } + } + } + + interpolation.execute(sourceField, targetField); + + const auto targetIView = array::make_view(targetFs.index_i()); + const auto targetJView = array::make_view(targetFs.index_j()); + const auto targetView = array::make_view(targetField); + const auto targetGhostView = atlas::array::make_view(targetFs.ghost()); + const double tolerance = 1.e-12; + for (idx_t i = 0; i < targetFs.size(); ++i) { + if (targetGhostView(i) == 0) { + const double targetTest = static_cast((targetIView(i)-1)*(targetJView(i)-1)) + /static_cast(gridConfigs().getInt("target normalization")); + for (idx_t k = 0; k < nlevs; ++k) { + EXPECT_APPROX_EQ(targetView(i, k), targetTest, tolerance); + } + } + } + + // Adjoint test + auto targetAdjoint = targetFs.createField(); + array::make_view(targetAdjoint).assign(array::make_view(targetField)); + targetAdjoint.adjointHaloExchange(); + + auto sourceAdjoint = sourceFs.createField(); + array::make_view(sourceAdjoint).assign(0.); + interpolation.execute_adjoint(sourceAdjoint, targetAdjoint); + + const auto yDotY = dotProd(targetField, targetField); + const auto xDotXAdj = dotProd(sourceField, sourceAdjoint); + + EXPECT_APPROX_EQ(yDotY / xDotXAdj, 1., 1e-14); +} + +} // namespace test +} // namespace atlas + +int main(int argc, char** argv) { + return atlas::test::run(argc, argv); +} diff --git a/src/tests/mesh/CMakeLists.txt b/src/tests/mesh/CMakeLists.txt index f9d7825e3..95aa8916b 100644 --- a/src/tests/mesh/CMakeLists.txt +++ b/src/tests/mesh/CMakeLists.txt @@ -175,9 +175,9 @@ endforeach() -atlas_add_cuda_test( +atlas_add_hic_test( TARGET atlas_test_connectivity_kernel - SOURCES test_connectivity_kernel.cu + SOURCES test_connectivity_kernel.hic LIBS atlas ENVIRONMENT ${ATLAS_TEST_ENVIRONMENT} ) diff --git a/src/tests/mesh/test_connectivity_kernel.cu b/src/tests/mesh/test_connectivity_kernel.hic similarity index 94% rename from src/tests/mesh/test_connectivity_kernel.cu rename to src/tests/mesh/test_connectivity_kernel.hic index f6e08e301..d897da28b 100644 --- a/src/tests/mesh/test_connectivity_kernel.cu +++ b/src/tests/mesh/test_connectivity_kernel.hic @@ -8,7 +8,7 @@ * does it submit to any jurisdiction. */ -#include +#include "hic/hic.h" #include "atlas/mesh/Connectivity.h" #include "tests/AtlasTestEnvironment.h" @@ -83,7 +83,7 @@ CASE( "test_block_connectivity" ) BlockConnectivity conn; bool* result; - cudaMallocManaged(&result, sizeof(bool)); + hicMallocManaged(&result, sizeof(bool)); *result = true; @@ -104,7 +104,7 @@ CASE( "test_block_connectivity" ) kernel_block<<<1,1>>>(conn, result); - cudaDeviceSynchronize(); + hicDeviceSynchronize(); EXPECT( *result == true ); @@ -127,12 +127,12 @@ CASE( "test_irregular_connectivity" ) EXPECT(conn(0,0) == 1 IN_FORTRAN); bool* result; - cudaMallocManaged(&result, sizeof(bool)); + hicMallocManaged(&result, sizeof(bool)); *result = true; kernel_irr<<<1,1>>>(conn, result); - cudaDeviceSynchronize(); + hicDeviceSynchronize(); EXPECT( *result == true ); @@ -155,12 +155,12 @@ CASE( "test_multiblock_connectivity" ) EXPECT(conn.block(0)(0,0) == 1 IN_FORTRAN); bool* result; - cudaMallocManaged(&result, sizeof(bool)); + hicMallocManaged(&result, sizeof(bool)); *result = true; kernel_multiblock<<<1,1>>>(conn, result); - cudaDeviceSynchronize(); + hicDeviceSynchronize(); EXPECT( *result == true ); diff --git a/src/tests/parallel/CMakeLists.txt b/src/tests/parallel/CMakeLists.txt index 7922b7af5..4824b9ce1 100644 --- a/src/tests/parallel/CMakeLists.txt +++ b/src/tests/parallel/CMakeLists.txt @@ -30,6 +30,17 @@ ecbuild_add_test( TARGET atlas_test_haloexchange ENVIRONMENT ${ATLAS_TEST_ENVIRONMENT} ) +ecbuild_add_test( TARGET atlas_test_haloexchange_on_device + MPI 3 + CONDITION eckit_HAVE_MPI AND atlas_HAVE_GPU + SOURCES test_haloexchange.cc + LIBS atlas + ENVIRONMENT ${ATLAS_TEST_ENVIRONMENT} ATLAS_RUN_NGPUS=1 +) +if( TEST atlas_test_haloexchange_on_device ) + set_tests_properties ( atlas_test_haloexchange_on_device PROPERTIES LABELS "gpu") +endif() + ecbuild_add_test( TARGET atlas_test_gather MPI 3 CONDITION eckit_HAVE_MPI diff --git a/src/tests/parallel/test_haloexchange.cc b/src/tests/parallel/test_haloexchange.cc index 3bf2ccdd2..5009c4054 100644 --- a/src/tests/parallel/test_haloexchange.cc +++ b/src/tests/parallel/test_haloexchange.cc @@ -13,6 +13,8 @@ #include #include +#include "hic/hic.h" + #include "atlas/array.h" #include "atlas/array/ArrayView.h" #include "atlas/array/MakeView.h" @@ -105,7 +107,7 @@ struct validate { }; struct Fixture { - Fixture(bool on_device): on_device_(on_device) { + Fixture(bool _on_device = false): on_device(_on_device) { int nnodes_c[] = {5, 6, 7}; nb_nodes = vec(nnodes_c); N = nb_nodes[mpi::comm().rank()]; @@ -147,7 +149,7 @@ struct Fixture { std::vector gidx; int N; - bool on_device_; + bool on_device; }; //----------------------------------------------------------------------------- @@ -159,11 +161,15 @@ void test_rank0_arrview(Fixture& f) { arrv(j) = (size_t(f.part[j]) != mpi::comm().rank() ? 0 : f.gidx[j]); } - arr.syncHostDevice(); + if (f.on_device) { + arr.updateDevice(); + } - f.halo_exchange.execute(arr, f.on_device_); + f.halo_exchange.execute(arr, f.on_device); - arr.syncHostDevice(); + if( f.on_device ) { + arr.updateHost(); + } switch (mpi::comm().rank()) { case 0: { @@ -192,11 +198,15 @@ void test_rank1(Fixture& f) { arrv(j, 1) = (size_t(f.part[j]) != mpi::comm().rank() ? 0 : f.gidx[j] * 100); } - arr.syncHostDevice(); + if (f.on_device) { + arr.updateDevice(); + } - f.halo_exchange.execute(arr, f.on_device_); + f.halo_exchange.execute(arr, f.on_device); - arr.syncHostDevice(); + if (f.on_device) { + arr.updateHost(); + } switch (mpi::comm().rank()) { case 0: { @@ -226,8 +236,6 @@ void test_rank1_strided_v1(Fixture& f) { arrv_t(j, 1) = (size_t(f.part[j]) != mpi::comm().rank() ? 0 : f.gidx[j] * 100); } - arr_t.syncHostDevice(); - // create a wrap array where we fake the strides in a way that the second // dimension // (number of components) contains only one component but the associated @@ -240,19 +248,21 @@ void test_rank1_strided_v1(Fixture& f) { array::ArraySpec { array::make_shape(f.N, 1), #if ATLAS_GRIDTOOLS_STORAGE_BACKEND_CUDA - array::make_strides(32, 1) - } + array::make_strides(32, 1) #else - array::make_strides(2, 1) - } + array::make_strides(2, 1) #endif - )); + })); - arr->syncHostDevice(); + if (f.on_device) { + arr->updateDevice(); + } - f.halo_exchange.execute(*arr, f.on_device_); + f.halo_exchange.execute(*arr, f.on_device); - arr->syncHostDevice(); + if (f.on_device) { + arr->updateHost(); + } switch (mpi::comm().rank()) { case 0: { @@ -282,8 +292,6 @@ void test_rank1_strided_v2(Fixture& f) { arrv_t(j, 1) = (size_t(f.part[j]) != mpi::comm().rank() ? 0 : f.gidx[j] * 100); } - arr_t.syncHostDevice(); - // create a wrap array where we fake the strides in a way that the second // dimension // (number of components) contains only one component but the associated @@ -295,9 +303,9 @@ void test_rank1_strided_v2(Fixture& f) { &(arrv_t(0, 1)), array::ArraySpec { array::make_shape(f.N, 1), #if ATLAS_GRIDTOOLS_STORAGE_BACKEND_CUDA - array::make_strides(32, 1) + array::make_strides(32, 1) #else - array::make_strides(2, 1) + array::make_strides(2, 1) #endif })); @@ -333,11 +341,15 @@ void test_rank2(Fixture& f) { } } - arr.syncHostDevice(); + if (f.on_device) { + arr.updateDevice(); + } - f.halo_exchange.execute(arr, f.on_device_); + f.halo_exchange.execute(arr, f.on_device); - arr.syncHostDevice(); + if (f.on_device) { + arr.updateHost(); + } switch (mpi::comm().rank()) { case 0: { @@ -373,23 +385,26 @@ void test_rank2_l1(Fixture& f) { (size_t(f.part[p]) != mpi::comm().rank() ? 0 : f.gidx[p] * std::pow(10, i)); } } - arr_t.syncHostDevice(); std::unique_ptr arr(array::Array::wrap( arrv_t.data(), array::ArraySpec { array::make_shape(f.N, 1, 2), #if ATLAS_GRIDTOOLS_STORAGE_BACKEND_CUDA - array::make_strides(96, 32, 1) + array::make_strides(96, 32, 1) #else - array::make_strides(6, 2, 1) + array::make_strides(6, 2, 1) #endif })); - arr_t.syncHostDevice(); + if (f.on_device) { + arr->updateDevice(); + } - f.halo_exchange.execute(*arr, false); + f.halo_exchange.execute(*arr, f.on_device); - arr_t.syncHostDevice(); + if (f.on_device) { + arr->updateHost(); + } switch (mpi::comm().rank()) { case 0: { @@ -426,7 +441,6 @@ void test_rank2_l1(Fixture& f) { } void test_rank2_l2_v2(Fixture& f) { -#if ATLAS_GRIDTOOLS_STORAGE_BACKEND_HOST // Test rank 2 halo-exchange array::ArrayT arr_t(f.N, 3, 2); array::ArrayView arrv_t = array::make_host_view(arr_t); @@ -443,13 +457,22 @@ void test_rank2_l2_v2(Fixture& f) { &arrv_t(0, 1, 1), array::ArraySpec { array::make_shape(f.N, 1, 1), #if ATLAS_GRIDTOOLS_STORAGE_BACKEND_CUDA - array::make_strides(192, 32, 1) + array::make_strides(192, 32, 1) #else - array::make_strides(6, 2, 1) + array::make_strides(6, 2, 1) #endif })); - f.halo_exchange.execute(*arr, f.on_device_); + + if (f.on_device) { + arr->updateDevice(); + } + + f.halo_exchange.execute(*arr, f.on_device); + + if (f.on_device) { + arr->updateHost(); + } switch (mpi::comm().rank()) { case 0: { @@ -483,11 +506,9 @@ void test_rank2_l2_v2(Fixture& f) { break; } } -#endif } void test_rank2_v2(Fixture& f) { -#if ATLAS_GRIDTOOLS_STORAGE_BACKEND_HOST array::ArrayT arr_t(f.N, 3, 2); array::ArrayView arrv_t = array::make_view(arr_t); for (int p = 0; p < f.N; ++p) { @@ -503,13 +524,21 @@ void test_rank2_v2(Fixture& f) { &arrv_t(0, 0, 1), array::ArraySpec { array::make_shape(f.N, 3, 1), #if ATLAS_GRIDTOOLS_STORAGE_BACKEND_CUDA - array::make_strides(192, 32, 2) + array::make_strides(192, 32, 2) #else - array::make_strides(6, 2, 2) + array::make_strides(6, 2, 2) #endif })); - f.halo_exchange.execute(*arr, f.on_device_); + if (f.on_device) { + arr->updateDevice(); + } + + f.halo_exchange.execute(*arr, f.on_device); + + if (f.on_device) { + arr->updateHost(); + } switch (mpi::comm().rank()) { case 0: { @@ -543,18 +572,21 @@ void test_rank2_v2(Fixture& f) { break; } } -#endif } void test_rank0_wrap(Fixture& f) { std::unique_ptr arr(array::Array::wrap(f.gidx.data(), array::make_shape(f.N))); array::ArrayView arrv = array::make_view(*arr); - arr->syncHostDevice(); + if (f.on_device) { + arr->updateDevice(); + } - f.halo_exchange.execute(*arr, f.on_device_); + f.halo_exchange.execute(*arr, f.on_device); - arr->syncHostDevice(); + if (f.on_device) { + arr->updateHost(); + } switch (mpi::comm().rank()) { case 0: { @@ -642,7 +674,6 @@ void test_rank2_paralleldim2(Fixture& f) { } void test_rank1_cinterface(Fixture& f) { -#if ATLAS_GRIDTOOLS_STORAGE_BACKEND_HOST array::ArrayT arr(f.N, 2); array::ArrayView arrv = array::make_host_view(arr); for (int j = 0; j < f.N; ++j) { @@ -650,8 +681,6 @@ void test_rank1_cinterface(Fixture& f) { arrv(j, 1) = (size_t(f.part[j]) != mpi::comm().rank() ? 0 : f.gidx[j] * 100); } - arr.syncHostDevice(); - int shapes[2] = {(int)arrv.shape(0), (int)arrv.shape(1)}; int strides[2] = {(int)arrv.stride(0), (int)arrv.stride(1)}; @@ -674,11 +703,10 @@ void test_rank1_cinterface(Fixture& f) { break; } } -#endif } CASE("test_haloexchange") { - Fixture f(false); + Fixture f; SECTION("test_rank0_arrview") { test_rank0_arrview(f); } @@ -701,20 +729,46 @@ CASE("test_haloexchange") { SECTION("test_rank1_paralleldim_1") { test_rank1_paralleldim1(f); } SECTION("test_rank2_paralleldim_2") { test_rank2_paralleldim2(f); } + SECTION("test_rank1_cinterface") { test_rank1_cinterface(f); } +} -#if ATLAS_GRIDTOOLS_STORAGE_BACKEND_CUDA - f.on_device_ = true; +#if ATLAS_HAVE_GPU + +//----------------------------------------------------------------------------- + +static int devices() { + static int devices_ = [](){ + int n = 0; + auto err = hicGetDeviceCount(&n); + if (err != hicSuccess) { + n = 0; + static_cast(hicGetLastError()); + } + return n; + }(); + return devices_; +} + +CASE("test_haloexchange on device") { + if (devices() == 0) { + Log::warning() << "\"test_haloexchange on device skipped\": No devices available" << std::endl; + return; + } + + bool on_device = true; + Fixture f(on_device); SECTION("test_rank0_arrview") { test_rank0_arrview(f); } SECTION("test_rank1") { test_rank1(f); } SECTION("test_rank2") { test_rank2(f); } - SECTION("test_rank0_wrap") { test_rank0_wrap(f); } -#endif + SECTION("test_rank0_wrap") { test_rank0_wrap(f); } } +#endif + //----------------------------------------------------------------------------- diff --git a/src/tests/parallel/test_haloexchange_adjoint.cc b/src/tests/parallel/test_haloexchange_adjoint.cc index 816b68441..392bc3575 100644 --- a/src/tests/parallel/test_haloexchange_adjoint.cc +++ b/src/tests/parallel/test_haloexchange_adjoint.cc @@ -183,11 +183,15 @@ void test_rank0_arrview(Fixture& f) { } } - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } f.halo_exchange_std->execute_adjoint(arr, f.on_device_); - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } switch (mpi::comm().rank()) { case 0: { @@ -240,11 +244,15 @@ void test_rank0_arrview_adj_test(Fixture& f) { arrv(j) = arrv_init(j); } - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } f.halo_exchange_std->execute(arr, f.on_device_); - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } // sum1 POD sum1(0); @@ -252,11 +260,15 @@ void test_rank0_arrview_adj_test(Fixture& f) { sum1 += arrv(j) * arrv(j); } - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } f.halo_exchange_std->execute_adjoint(arr, f.on_device_); - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } // sum2 POD sum2(0); @@ -299,11 +311,15 @@ void test_rank1(Fixture& f) { } } - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } f.halo_exchange_std->execute_adjoint(arr, f.on_device_); - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } switch (mpi::comm().rank()) { case 0: { @@ -336,11 +352,15 @@ void test_rank1_adj_test(Fixture& f) { arrv(j, 1ul) = arrv_init(j, 1ul); } - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } f.halo_exchange_std->execute(arr, f.on_device_); - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } // sum1 POD sum1(0); @@ -350,11 +370,15 @@ void test_rank1_adj_test(Fixture& f) { } } - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } f.halo_exchange_std->execute_adjoint(arr, f.on_device_); - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } // sum2 POD sum2(0); @@ -399,8 +423,6 @@ void test_rank1_strided_v1(Fixture& f) { } } - arr_t.syncHostDevice(); - // create a wrap array where we fake the strides in a way that the second // dimension // (number of components) contains only one component but the associated @@ -460,9 +482,6 @@ void test_rank1_strided_v1_adj_test(Fixture& f) { arrv_t(j, 1ul) = arrv_init_t(j, 1ul); } - arr_init_t.syncHostDevice(); - arr_t.syncHostDevice(); - // create a wrap array where we fake the strides in a way that the second // dimension // (number of components) contains only one component but the associated @@ -483,11 +502,15 @@ void test_rank1_strided_v1_adj_test(Fixture& f) { #endif )); - arr->syncHostDevice(); + if (f.on_device_) { + arr->syncHostDevice(); + } f.halo_exchange_std->execute(*arr, f.on_device_); - arr->syncHostDevice(); + if (f.on_device_) { + arr->syncHostDevice(); + } // sum1 POD sum1(0); @@ -497,11 +520,15 @@ void test_rank1_strided_v1_adj_test(Fixture& f) { } } - arr->syncHostDevice(); + if (f.on_device_) { + arr->syncHostDevice(); + } f.halo_exchange_std->execute_adjoint(*arr, f.on_device_); - arr->syncHostDevice(); + if (f.on_device_) { + arr->syncHostDevice(); + } // sum2 POD sum2(0); @@ -546,8 +573,6 @@ void test_rank1_strided_v2(Fixture& f) { } } - arr_t.syncHostDevice(); - // create a wrap array where we fake the strides in a way that the second // dimension // (number of components) contains only one component but the associated @@ -565,6 +590,10 @@ void test_rank1_strided_v2(Fixture& f) { #endif })); + if (f.on_device_) { + arr->syncHostDevice(); + } + f.halo_exchange_std->execute_adjoint(*arr, false); switch (mpi::comm().rank()) { @@ -599,9 +628,6 @@ void test_rank1_strided_v2_adj_test(Fixture& f) { arrv_t(j, 1ul) = arrv_init_t(j, 1ul); } - arr_init_t.syncHostDevice(); - arr_t.syncHostDevice(); - // create a wrap array where we fake the strides in a way that the second // dimension // (number of components) contains only one component but the associated @@ -619,11 +645,15 @@ void test_rank1_strided_v2_adj_test(Fixture& f) { #endif })); - arr->syncHostDevice(); + if (f.on_device_) { + arr->syncHostDevice(); + } f.halo_exchange_std->execute(*arr, f.on_device_); - arr->syncHostDevice(); + if (f.on_device_) { + arr->syncHostDevice(); + } // sum1 POD sum1(0); @@ -633,11 +663,15 @@ void test_rank1_strided_v2_adj_test(Fixture& f) { } } - arr->syncHostDevice(); + if (f.on_device_) { + arr->syncHostDevice(); + } f.halo_exchange_std->execute_adjoint(*arr, f.on_device_); - arr->syncHostDevice(); + if (f.on_device_) { + arr->syncHostDevice(); + } // sum2 POD sum2(0); @@ -692,11 +726,15 @@ void test_rank2(Fixture& f) { } } - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } f.halo_exchange_std->execute_adjoint(arr, f.on_device_); - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } switch (mpi::comm().rank()) { case 0: { @@ -738,7 +776,9 @@ void test_rank2_adj_test(Fixture& f) { } } - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } f.halo_exchange_std->execute(arr, f.on_device_); @@ -752,11 +792,15 @@ void test_rank2_adj_test(Fixture& f) { } } - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } f.halo_exchange_std->execute_adjoint(arr, f.on_device_); - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } // sum2 POD sum2(0); @@ -810,8 +854,6 @@ void test_rank2_l1(Fixture& f) { } } - arr_t.syncHostDevice(); - std::unique_ptr arr(array::Array::wrap( arrv_t.data(), array::ArraySpec { array::make_shape(f.N, 1, 2), @@ -822,11 +864,8 @@ void test_rank2_l1(Fixture& f) { #endif })); - arr_t.syncHostDevice(); - f.halo_exchange_std->execute_adjoint(*arr, false); - arr_t.syncHostDevice(); switch (mpi::comm().rank()) { case 0: { @@ -877,7 +916,6 @@ void test_rank2_l1_adj_test(Fixture& f) { arrv_t(p, i, static_cast(1)) = arrv_init_t(p, i, static_cast(1)); } } - arr_t.syncHostDevice(); std::unique_ptr arr(array::Array::wrap( arrv_t.data(), array::ArraySpec { @@ -889,12 +927,8 @@ void test_rank2_l1_adj_test(Fixture& f) { #endif })); - arr_t.syncHostDevice(); - f.halo_exchange_std->execute(*arr, false); - arr_t.syncHostDevice(); - // sum1 POD sum1(0); for (std::size_t p = 0; p < static_cast(f.N); ++p) { @@ -905,12 +939,8 @@ void test_rank2_l1_adj_test(Fixture& f) { } } - arr->syncHostDevice(); - f.halo_exchange_std->execute_adjoint(*arr, false); - arr->syncHostDevice(); - // sum2 POD sum2(0); for (std::size_t p = 0; p < static_cast(f.N); ++p) { @@ -1153,7 +1183,8 @@ void test_rank2_v2(Fixture& f) { } void test_rank0_wrap(Fixture& f) { - std::unique_ptr arr(array::Array::wrap(f.gidx.data(), array::make_shape(f.N))); + std::vector existing = f.gidx; + std::unique_ptr arr(array::Array::wrap(existing.data(), array::make_shape(f.N))); array::ArrayView arrv = array::make_view(*arr); switch (mpi::comm().rank()) { case 0: { @@ -1179,11 +1210,15 @@ void test_rank0_wrap(Fixture& f) { } } - arr->syncHostDevice(); + if (f.on_device_) { + arr->syncHostDevice(); + } f.halo_exchange_std->execute_adjoint(*arr, f.on_device_); - arr->syncHostDevice(); + if (f.on_device_) { + arr->syncHostDevice(); + } switch (mpi::comm().rank()) { case 0: { @@ -1202,6 +1237,7 @@ void test_rank0_wrap(Fixture& f) { break; } } + arr->deallocateDevice(); } void test_rank0_wrap_adj_test(Fixture& f) { @@ -1226,7 +1262,9 @@ void test_rank0_wrap_adj_test(Fixture& f) { arrv_init(j) = arrv(j); } - arr->syncHostDevice(); + if (f.on_device_) { + arr->syncHostDevice(); + } f.halo_exchange_std->execute(*arr, f.on_device_); @@ -1236,11 +1274,15 @@ void test_rank0_wrap_adj_test(Fixture& f) { sum1 += arrv(j) * arrv(j); } - arr->syncHostDevice(); + if (f.on_device_) { + arr->syncHostDevice(); + } f.halo_exchange_std->execute_adjoint(*arr, f.on_device_); - arr->syncHostDevice(); + if (f.on_device_) { + arr->syncHostDevice(); + } // sum2 POD sum2(0); @@ -1485,7 +1527,9 @@ void test_rank1_cinterface(Fixture& f) { } } - arr.syncHostDevice(); + if (f.on_device_) { + arr.syncHostDevice(); + } int shapes[2] = {(int)arrv.shape(0), (int)arrv.shape(1)}; int strides[2] = {(int)arrv.stride(0), (int)arrv.stride(1)}; diff --git a/src/tests/util/CMakeLists.txt b/src/tests/util/CMakeLists.txt index 47d8b6def..7c7262ccf 100644 --- a/src/tests/util/CMakeLists.txt +++ b/src/tests/util/CMakeLists.txt @@ -94,3 +94,12 @@ ecbuild_add_test( TARGET atlas_test_unitsphere ENVIRONMENT ${ATLAS_TEST_ENVIRONMENT} ) +ecbuild_add_test( TARGET atlas_test_pack_vector_fields + SOURCES test_pack_vector_fields.cc + LIBS atlas + ENVIRONMENT ${ATLAS_TEST_ENVIRONMENT} +) + + + + diff --git a/src/tests/util/test_pack_vector_fields.cc b/src/tests/util/test_pack_vector_fields.cc new file mode 100644 index 000000000..3c6ea89df --- /dev/null +++ b/src/tests/util/test_pack_vector_fields.cc @@ -0,0 +1,235 @@ +/* + * (C) Crown Copyright 2024 Met Office + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + */ + +#include +#include + +#include "atlas/array.h" +#include "atlas/field.h" +#include "atlas/functionspace.h" +#include "atlas/grid.h" +#include "atlas/option.h" +#include "atlas/util/Config.h" +#include "atlas/util/PackVectorFields.h" +#include "tests/AtlasTestEnvironment.h" + +namespace atlas { +namespace test { + +FieldSet setFields(const FunctionSpace& functionSpace, + const std::vector& fieldConfigs) { + auto fields = FieldSet{}; + + // Set unique values to all field elements. + auto value = 0; + for (const auto& fieldConfig : fieldConfigs) { + auto field = fields.add(functionSpace.createField(fieldConfig)); + for (auto arrayIdx = size_t{0}; arrayIdx < field.size(); arrayIdx++) { + field->data()[arrayIdx] = value++; + } + field.metadata().set("comment", "This field is made with love."); + auto vectorFieldName = std::string{}; + if (fieldConfig.get("vector_field_name", vectorFieldName)) { + field.metadata().set("vector_field_name", vectorFieldName); + } + } + return fields; +} + +FieldSet createOrderedTestFields() { + const auto grid = Grid("O16"); + const auto functionSpace = functionspace::StructuredColumns(grid); + + // Note: vector components 0 and 1 are contiguous in field set. + auto fieldConfigs = std::vector{}; + fieldConfigs.push_back(option::name("scalar") | option::levels(1) | + option::datatype(DataType::kind())); + fieldConfigs.push_back(option::name("vector_component_0") | + option::levels(1) | + option::datatype(DataType::kind()) | + util::Config{"vector_field_name", "vector"}); + fieldConfigs.push_back(option::name("vector_component_1") | + option::levels(1) | + option::datatype(DataType::kind()) | + util::Config{"vector_field_name", "vector"}); + + return setFields(functionSpace, fieldConfigs); +} + +FieldSet createUnorderedTestFields() { + auto fields = FieldSet{}; + + const auto grid = Grid("O16"); + const auto functionSpace = functionspace::StructuredColumns(grid); + + // Note: vector components 0 and 1 are not contiguous in field set. + auto fieldConfigs = std::vector{}; + fieldConfigs.push_back(option::name("vector_component_0") | + option::datatype(DataType::kind()) | + util::Config{"vector_field_name", "vector"}); + fieldConfigs.push_back(option::name("scalar") | + option::datatype(DataType::kind())); + fieldConfigs.push_back(option::name("vector_component_1") | + option::datatype(DataType::kind()) | + util::Config{"vector_field_name", "vector"}); + + return setFields(functionSpace, fieldConfigs); +} + +FieldSet createInconsistentRankFields() { + auto fields = FieldSet{}; + + const auto grid = Grid("O16"); + const auto functionSpace = functionspace::StructuredColumns(grid); + + // Note: vector components 0 and 1 have different ranks. + auto fieldConfigs = std::vector{}; + fieldConfigs.push_back(option::name("vector_component_0") | + option::levels(10) | + option::datatype(DataType::kind()) | + util::Config{"vector_field_name", "vector"}); + fieldConfigs.push_back(option::name("scalar") | + option::datatype(DataType::kind())); + fieldConfigs.push_back(option::name("vector_component_1") | + option::datatype(DataType::kind()) | + util::Config{"vector_field_name", "vector"}); + + return setFields(functionSpace, fieldConfigs); +} + +FieldSet createInconsistentDatatypeFields() { + auto fields = FieldSet{}; + + const auto grid = Grid("O16"); + const auto functionSpace = functionspace::StructuredColumns(grid); + + // Note: vector components 0 and 1 have different datatypes. + auto fieldConfigs = std::vector{}; + fieldConfigs.push_back(option::name("vector_component_0") | + option::datatype(DataType::kind()) | + util::Config{"vector_field_name", "vector"}); + fieldConfigs.push_back(option::name("scalar") | + option::datatype(DataType::kind())); + fieldConfigs.push_back(option::name("vector_component_1") | + option::datatype(DataType::kind()) | + util::Config{"vector_field_name", "vector"}); + + return setFields(functionSpace, fieldConfigs); +} + +FieldSet createInconsistentLevelsFields() { + auto fields = FieldSet{}; + + const auto grid = Grid("O16"); + const auto functionSpace = functionspace::StructuredColumns(grid); + + // Note: vector components 0 and 1 have different number of levels. + auto fieldConfigs = std::vector{}; + fieldConfigs.push_back(option::name("vector_component_0") | + option::levels(10) | + option::datatype(DataType::kind()) | + util::Config{"vector_field_name", "vector"}); + fieldConfigs.push_back(option::name("scalar") | + option::datatype(DataType::kind())); + fieldConfigs.push_back(option::name("vector_component_1") | + option::levels(20) | + option::datatype(DataType::kind()) | + util::Config{"vector_field_name", "vector"}); + + return setFields(functionSpace, fieldConfigs); +} + +FieldSet createInconsistentVariablesFields() { + auto fields = FieldSet{}; + + const auto grid = Grid("O16"); + const auto functionSpace = functionspace::StructuredColumns(grid); + + // Note: vector components 0 and 1 have different number of variables. + auto fieldConfigs = std::vector{}; + fieldConfigs.push_back(option::name("vector_component_0") | + option::datatype(DataType::kind()) | + option::variables(2) | + util::Config{"vector_field_name", "vector"}); + fieldConfigs.push_back(option::name("scalar") | + option::datatype(DataType::kind())); + fieldConfigs.push_back(option::name("vector_component_1") | + option::datatype(DataType::kind()) | + util::Config{"vector_field_name", "vector"}); + + return setFields(functionSpace, fieldConfigs); +} + +void checkTestFields(const FieldSet& fields) { + auto value = 0; + for (const auto& field : fields) { + for (auto arrayIdx = size_t{0}; arrayIdx < field.size(); arrayIdx++) { + EXPECT(field->data()[arrayIdx] == value++); + } + EXPECT(field.metadata().get("comment") == + "This field is made with love."); + } +} + +CASE("Basic pack and unpack") { + const auto fields = createOrderedTestFields(); + + const auto packedFields = util::pack_vector_fields(fields); + + EXPECT(!packedFields.has("vector_component_0")); + EXPECT(!packedFields.has("vector_component_1")); + EXPECT(packedFields.has("vector")); + EXPECT(packedFields.has("scalar")); + + const auto unpackedFields = util::unpack_vector_fields(packedFields); + + EXPECT(unpackedFields.has("vector_component_0")); + EXPECT(unpackedFields.has("vector_component_1")); + EXPECT(!unpackedFields.has("vector")); + EXPECT(unpackedFields.has("scalar")); + + checkTestFields(unpackedFields); +} + +CASE("unpack into existing field set") { + auto fields = createUnorderedTestFields(); + + const auto packedFields = util::pack_vector_fields(fields); + + EXPECT(!packedFields.has("vector_component_0")); + EXPECT(!packedFields.has("vector_component_1")); + EXPECT(packedFields.has("vector")); + EXPECT(packedFields.has("scalar")); + + // Need to unpack into existing field to guarantee field order is preserved. + array::make_view(fields["vector_component_0"]).assign(0.); + array::make_view(fields["vector_component_1"]).assign(0.); + util::unpack_vector_fields(packedFields, fields); + + EXPECT(fields.has("vector_component_0")); + EXPECT(fields.has("vector_component_1")); + EXPECT(!fields.has("vector")); + EXPECT(fields.has("scalar")); + + checkTestFields(fields); +} + +CASE("check that bad inputs throw") { + // Try to apply pack to inconsistent field sets. + EXPECT_THROWS(util::pack_vector_fields(createInconsistentRankFields())); + EXPECT_THROWS( + util::pack_vector_fields(createInconsistentDatatypeFields())); + EXPECT_THROWS( + util::pack_vector_fields(createInconsistentLevelsFields())); + EXPECT_THROWS( + util::pack_vector_fields(createInconsistentVariablesFields())); +} + +} // namespace test +} // namespace atlas + +int main(int argc, char** argv) { return atlas::test::run(argc, argv); } diff --git a/tools/atlas-run b/tools/atlas-run index cd5d91252..39a062bd5 100755 --- a/tools/atlas-run +++ b/tools/atlas-run @@ -55,10 +55,13 @@ then elif command_exists srun ; then LAUNCH="srun ${ATLAS_RUN_MPI_ARGS}" if [ -n "${ATLAS_RUN_NGPUS}" ]; then - LAUNCH="${LAUNCH} --gres=gpu:${ATLAS_RUN_NGPUS}" + LAUNCH="${LAUNCH} --gpus-per-task=${ATLAS_RUN_NGPUS}" fi if [ -z "${ATLAS_RUN_NPROCS}" ]; then LAUNCH="${LAUNCH} -n 1" + if [ -n "${SLURM_GPUS}" ]; then + LAUNCH="${LAUNCH} --gpus-per-task=1" + fi else LAUNCH="${LAUNCH} -n ${ATLAS_RUN_NPROCS}" fi diff --git a/tools/c2f.py b/tools/c2f.py index cb30c421c..652b65935 100755 --- a/tools/c2f.py +++ b/tools/c2f.py @@ -7,9 +7,8 @@ # granted to it by virtue of its status as an intergovernmental organisation nor # does it submit to any jurisdiction. -#! /usr/bin/env python +#! /usr/bin/env python3 -from __future__ import print_function from argparse import ArgumentParser import re import time @@ -50,7 +49,7 @@ 'logical(c_bool)':'c_bool' } -function_signature = re.compile('''^ +function_signature = re.compile(r'''^ ( #1 Type (const\s+)? #2 Leading const ([^\*\&\s\[]+) #3 @@ -72,7 +71,7 @@ def __init__(self,msg): class Argument: - arg_signature = re.compile('''^ + arg_signature = re.compile(r'''^ ( #1 Type (const\s+)? #2 Leading const ([^\*\&\s\[]+) #3 @@ -180,9 +179,6 @@ def __init__(self,line): except KeyError: raise ParsingFailed("Could not parse return type for statement "+line) - def __nonzero__(self): # python 2 - return self.type != "void" - def __bool__(self): # python 3 return self.type != "void" @@ -304,9 +300,9 @@ def statements(self): with open(input,'r') as file: content = file.read() - externC = re.compile('^extern\s+"C"(\s*{)?') + externC = re.compile(r'^extern\s+"C"(\s*{)?') - regex_externC = [re.compile('^extern\s+"C"(\s*{)?'),re.compile('^{')] + regex_externC = [re.compile(r'^extern\s+"C"(\s*{)?'),re.compile('^{')] in_externC = [False,False] for line in content.splitlines():