diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index fbb41bf93..b7037788f 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -121,11 +121,11 @@ jobs:
 
           - name: macos
             # Xcode compiler requires empty environment variables, so we pass null (~) here
-            os: macos-12
-            compiler: clang-14
+            os: macos-14
+            compiler: clang-15
             compiler_cc: ~
             compiler_cxx: ~
-            compiler_fc: gfortran-11
+            compiler_fc: gfortran-13
             caching: true
             coverage: false
             cmake_options: -DMPI_SLOTS=4
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e79acf4aa..6417ccd00 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,19 @@ This project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html
 
 ## [Unreleased]
 
+## [0.39.0] - 2024-09-18
+
+### Added
+- Add HIC abstraction layer for HIP and CUDA by @wdeconinck in https://github.com/ecmwf/atlas/pull/219
+- Support for HIP via HIC
+- Add regional interpolation by @benjaminmenetrier in https://github.com/ecmwf/atlas/pull/215
+- Pack vector components into higher-rank vector fields. by @odlomax in https://github.com/ecmwf/atlas/pull/218
+
+### Fixed
+- Fix missing header in FieldImpl.h by @tehrengruber in https://github.com/ecmwf/atlas/pull/214
+- Bug fixes to vector interpolation with StructuredColumns and spherical vector interpolation by @MarekWlasak in https://github.com/ecmwf/atlas/pull/222
+
+
 ## [0.38.1] - 2024-07-15
 
 ### Fixed
@@ -551,6 +564,7 @@ Fix StructuredInterpolation2D with retry for failed stencils
 ## 0.13.0 - 2018-02-16
 
 [Unreleased]: https://github.com/ecmwf/atlas/compare/master...develop
+[0.39.0]: https://github.com/ecmwf/atlas/compare/0.38.1...0.39.0
 [0.38.1]: https://github.com/ecmwf/atlas/compare/0.38.0...0.38.1
 [0.38.0]: https://github.com/ecmwf/atlas/compare/0.37.0...0.38.0
 [0.37.0]: https://github.com/ecmwf/atlas/compare/0.36.0...0.37.0
diff --git a/CMakeLists.txt b/CMakeLists.txt
index df9963eaa..8f89d8fa6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -64,6 +64,9 @@ ecbuild_add_option( FEATURE ECKIT_DEVELOP
                     DESCRIPTION "Used to enable new features or API depending on eckit develop branch, not yet in a tagged release"
                     DEFAULT OFF )
 
+add_subdirectory( hic )
+find_package(hic)
+
 
 include( features/BOUNDSCHECKING )
 include( features/FORTRAN )
diff --git a/VERSION b/VERSION
index bb22182d4..4ef2eb086 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.38.1
+0.39.0
diff --git a/cmake/atlas-import.cmake.in b/cmake/atlas-import.cmake.in
index 98b7c9bb2..1c812e4de 100644
--- a/cmake/atlas-import.cmake.in
+++ b/cmake/atlas-import.cmake.in
@@ -23,6 +23,7 @@ if( atlas_HAVE_FORTRAN )
 endif()
 
 find_dependency( atlas_io HINTS ${CMAKE_CURRENT_LIST_DIR}/../atlas_io @atlas_io_DIR@ @atlas_io_BINARY_DIR@ )
+find_dependency( hic HINTS ${CMAKE_CURRENT_LIST_DIR}/../hic @hic_DIR@ @hic_BINARY_DIR@ )
 
 ## Eigen3
 set( Eigen3_HINT @Eigen3_DIR@ )
diff --git a/cmake/atlas_compile_flags.cmake b/cmake/atlas_compile_flags.cmake
index 8b334f149..8261a407d 100644
--- a/cmake/atlas_compile_flags.cmake
+++ b/cmake/atlas_compile_flags.cmake
@@ -11,8 +11,9 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
 if( CMAKE_CXX_COMPILER_ID MATCHES Cray )
 
-
-  ecbuild_add_cxx_flags("-hnomessage=3140" NAME atlas_cxx_disable_warnings ) # colon separated numbers
+  if( NOT CMAKE_CXX_COMPILER_ID MATCHES CrayClang )
+    ecbuild_add_cxx_flags("-hnomessage=3140" NAME atlas_cxx_disable_warnings ) # colon separated numbers
+  endif()
   ecbuild_add_fortran_flags("-hnomessage=3140" NAME atlas_fortran_disable_warnings ) # colon separated numbers
 
 # CC-3140 crayc++: WARNING File = atlas/functionspace/NodeColumns.cc, Line = 1, Column = 1
diff --git a/cmake/atlas_host_device.cmake b/cmake/atlas_host_device.cmake
index 3a404a8e4..d4e9a5d67 100644
--- a/cmake/atlas_host_device.cmake
+++ b/cmake/atlas_host_device.cmake
@@ -6,7 +6,7 @@
 # granted to it by virtue of its status as an intergovernmental organisation nor
 # does it submit to any jurisdiction.
 
-function( create_cuda_wrapper variable )
+function( create_hic_wrapper variable )
   set( options "" )
   set( single_value_args SOURCE )
   set( multi_value_args "" )
@@ -17,18 +17,24 @@ function( create_cuda_wrapper variable )
   get_filename_component(name      ${_PAR_SOURCE} NAME)
   get_filename_component(abspath   ${_PAR_SOURCE} ABSOLUTE)
 
+  if( HAVE_CUDA )
+    set( extension "cu" )
+  elseif( HAVE_HIP )
+    set( extension "hip" )
+  endif()
+
   if( directory )
-    set(cuda_wrapper ${CMAKE_CURRENT_BINARY_DIR}/${directory}/${base}.cu)
+    set(hic_wrapper ${CMAKE_CURRENT_BINARY_DIR}/${directory}/${base}.${extension})
   else()
-    set(cuda_wrapper ${CMAKE_CURRENT_BINARY_DIR}/${base}.cu)
+    set(hic_wrapper ${CMAKE_CURRENT_BINARY_DIR}/${base}.${extension})
   endif()
-  set(${variable} ${cuda_wrapper} PARENT_SCOPE)
+  set(${variable} ${hic_wrapper} PARENT_SCOPE)
   set(content
 "
 #include \"atlas/${directory}/${name}\"
 ")
-  if( ${abspath} IS_NEWER_THAN ${cuda_wrapper} )
-    file(WRITE ${cuda_wrapper} "${content}")
+  if( ${abspath} IS_NEWER_THAN ${hic_wrapper} )
+    file(WRITE ${hic_wrapper} "${content}")
   endif()
 endfunction()
 
@@ -40,12 +46,12 @@ function( atlas_host_device srclist )
   set( multi_value_args SOURCES )
   cmake_parse_arguments( _PAR "${options}" "${single_value_args}" "${multi_value_args}" ${ARGN} )
 
-  if( ATLAS_GRIDTOOLS_STORAGE_BACKEND_CUDA )
-    set( use_cuda_srclist ${_PAR_SOURCES} )
+  if( HAVE_GPU )
+    set( use_hic_srclist ${_PAR_SOURCES} )
 
-    foreach( src ${use_cuda_srclist} )
-      create_cuda_wrapper( cuda_wrapper SOURCE ${src} )
-      list( APPEND ${srclist} ${cuda_wrapper} )
+    foreach( src ${use_hic_srclist} )
+      create_hic_wrapper( hic_wrapper SOURCE ${src} )
+      list( APPEND ${srclist} ${hic_wrapper} )
       ecbuild_list_exclude_pattern( LIST ${srclist} REGEX ${src} )
     endforeach()
 
diff --git a/cmake/features/ACC.cmake b/cmake/features/ACC.cmake
index 1464d9bc0..056b2c812 100644
--- a/cmake/features/ACC.cmake
+++ b/cmake/features/ACC.cmake
@@ -3,9 +3,14 @@
 if( atlas_HAVE_ATLAS_FIELD )
 
 set( ATLAS_ACC_CAPABLE FALSE )
-if( HAVE_CUDA )
+if( HAVE_GPU )
   if( CMAKE_Fortran_COMPILER_ID MATCHES "PGI|NVHPC" )
     set( ATLAS_ACC_CAPABLE TRUE )
+  else()
+    find_package(OpenACC COMPONENTS C Fortran)
+    if(OpenACC_Fortran_FOUND AND OpenACC_C_FOUND)
+        set( ATLAS_ACC_CAPABLE TRUE )
+    endif()
   endif()
 endif()
 
@@ -22,6 +27,9 @@ if( atlas_HAVE_ACC )
     if( NOT ACC_C_COMPILER )
       ecbuild_error( "Could not find OpenACC capable C compiler" )
     endif()
+  else()
+    set( ACC_Fortran_FLAGS ${OpenACC_Fortran_FLAGS} )
+    set( ACC_C_FLAGS       ${OpenACC_C_FLAGS} )
   endif()
 endif()
 
diff --git a/cmake/features/CUDA.cmake b/cmake/features/CUDA.cmake
index 1bd57058a..80d5723d8 100644
--- a/cmake/features/CUDA.cmake
+++ b/cmake/features/CUDA.cmake
@@ -1,15 +1,37 @@
 
-ecbuild_add_option( FEATURE CUDA
-                    DESCRIPTION "Enable CUDA support"
-                    DEFAULT OFF
-                  )
+# ecbuild_add_option( FEATURE CUDA
+#                     DESCRIPTION "Enable CUDA support"
+#                     DEFAULT OFF
+#                   )
+# ecbuild_add_option( FEATURE HIP
+#                     DESCRIPTION "Enable CUDA support"
+#                     DEFAULT OFF
+#                   )
 
-if( HAVE_CUDA )
-
-    enable_language( CUDA )
-    ecbuild_info( "CUDA language enabled" )
-
-    find_package( CUDAToolkit REQUIRED )
+set( atlas_HAVE_CUDA 0 )
+set( atlas_HAVE_HIP  0 )
+set( atlas_HAVE_GPU  0 )
 
+if( hic_HAVE_CUDA )
+  enable_language( CUDA )
+  ecbuild_info( "CUDA language enabled" )
+  find_package(CUDAToolkit REQUIRED)
+  set( atlas_HAVE_CUDA 1 )
+  set( atlas_HAVE_GPU  1 )
+elseif( hic_HAVE_HIP )
+  enable_language( HIP )
+  ecbuild_info( "HIP language enabled" )
+  find_package(hip CONFIG REQUIRED)
+  set( atlas_HAVE_HIP 1 )
+  set( atlas_HAVE_GPU 1 )
 endif()
 
+set( HAVE_CUDA ${atlas_HAVE_CUDA} )
+set( HAVE_HIP  ${atlas_HAVE_HIP} )
+set( HAVE_GPU  ${atlas_HAVE_GPU} )
+
+if( HAVE_GPU )
+  ecbuild_info("GPU support enabled")
+else()
+  ecbuild_info("GPU support not enabled")
+endif()
\ No newline at end of file
diff --git a/cmake/project_summary.cmake b/cmake/project_summary.cmake
index eb4face8b..62ead6584 100644
--- a/cmake/project_summary.cmake
+++ b/cmake/project_summary.cmake
@@ -60,10 +60,10 @@ if( atlas_HAVE_GRIDTOOLS_STORAGE )
 
 else()
 
-    if( NOT atlas_HAVE_CUDA )
+    if( NOT atlas_HAVE_GPU )
         ecbuild_info( "Array storage backend: Native [HOST]" )
     else()
-        ecbuild_info( "Array storage backend: Native [CUDA]" )
+        ecbuild_info( "Array storage backend: Native [GPU]" )
     endif()
 
 endif()
diff --git a/hic/AUTHORS b/hic/AUTHORS
new file mode 100644
index 000000000..e4c77d93c
--- /dev/null
+++ b/hic/AUTHORS
@@ -0,0 +1,8 @@
+Authors
+=======
+
+- Willem Deconinck
+
+Thanks for contributions from
+=============================
+
diff --git a/hic/CHANGELOG.md b/hic/CHANGELOG.md
new file mode 100644
index 000000000..e69de29bb
diff --git a/hic/CMakeLists.txt b/hic/CMakeLists.txt
new file mode 100644
index 000000000..904f7a077
--- /dev/null
+++ b/hic/CMakeLists.txt
@@ -0,0 +1,56 @@
+# (C) Copyright 2024- ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation nor
+# does it submit to any jurisdiction.
+
+############################################################################################
+# hic
+
+cmake_minimum_required( VERSION 3.18 FATAL_ERROR )
+
+find_package( ecbuild 3.4 REQUIRED HINTS ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/../ecbuild ${CMAKE_CURRENT_SOURCE_DIR}/../../ecbuild )
+
+################################################################################
+# Initialise project
+
+if( NOT DEFINED atlas_VERSION )
+  set( atlas_VERSION 0.0.0 )
+endif()
+
+project( hic VERSION ${atlas_VERSION} LANGUAGES CXX )
+
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+################################################################################
+# Features that can be enabled / disabled with -DENABLE_<FEATURE>
+
+ecbuild_add_option( FEATURE CUDA DESCRIPTION "Use CUDA as backend to HIC" DEFAULT OFF )
+ecbuild_add_option( FEATURE HIP  DESCRIPTION "Use HIP as backend to HIC"  DEFAULT OFF )
+
+if( HAVE_CUDA )
+  find_package(CUDAToolkit REQUIRED)
+elseif( HAVE_HIP )
+  find_package(hip CONFIG REQUIRED)
+endif()
+
+add_subdirectory( src )
+add_subdirectory( tests )
+
+################################################################################
+# Export and summarize
+
+ecbuild_add_resources(
+    TARGET hic-others
+    SOURCES_PACK
+        README.md
+        CHANGELOG.md
+        LICENSE
+)
+
+ecbuild_install_project( NAME hic )
+ecbuild_print_summary()
+
diff --git a/hic/LICENSE b/hic/LICENSE
new file mode 100644
index 000000000..af6ca0287
--- /dev/null
+++ b/hic/LICENSE
@@ -0,0 +1,190 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   Copyright 1996-2018 ECMWF
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/hic/README.md b/hic/README.md
new file mode 100644
index 000000000..1b756fb40
--- /dev/null
+++ b/hic/README.md
@@ -0,0 +1,5 @@
+hic
+===
+
+HIC: An abstraction to HIP and CUDA
+
diff --git a/hic/cmake/hic-import.cmake.in b/hic/cmake/hic-import.cmake.in
new file mode 100644
index 000000000..23ac59885
--- /dev/null
+++ b/hic/cmake/hic-import.cmake.in
@@ -0,0 +1,13 @@
+
+include( CMakeFindDependencyMacro )
+
+set( hic_HAVE_CUDA @hic_HAVE_CUDA@ )
+set( hic_HAVE_HIP  @hic_HAVE_HIP@ )
+
+if( hic_HAVE_CUDA )
+  find_dependency( CUDAToolkit )
+endif()
+if( hic_HAVE_HIP )
+  find_dependency( hip CONFIG )
+endif()
+
diff --git a/hic/src/CMakeLists.txt b/hic/src/CMakeLists.txt
new file mode 100644
index 000000000..5048de357
--- /dev/null
+++ b/hic/src/CMakeLists.txt
@@ -0,0 +1,39 @@
+# (C) Copyright 2024- ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation nor
+# does it submit to any jurisdiction.
+
+set(HIC_BACKEND_CUDA  0)
+set(HIC_BACKEND_HIP   0)
+set(HIC_BACKEND_DUMMY 0)
+if( HAVE_CUDA )
+  set(HIC_BACKEND_CUDA  1)
+elseif( HAVE_HIP )
+  set(HIC_BACKEND_HIP   1)
+else()
+  set(HIC_BACKEND_DUMMY 1)
+endif()
+
+configure_file(${PROJECT_SOURCE_DIR}/src/hic/hic_config.h.in ${PROJECT_BINARY_DIR}/src/hic/hic_config.h @ONLY )
+
+ecbuild_add_library( TARGET hic
+  TYPE INTERFACE
+  PUBLIC_INCLUDES
+    $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/src>
+    $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/src>
+    $<INSTALL_INTERFACE:include>
+)
+
+install( FILES ${PROJECT_BINARY_DIR}/src/hic/hic_config.h DESTINATION include/hic )
+install( FILES hic/hic.h                                  DESTINATION include/hic )
+install( FILES hic/hic_runtime.h                          DESTINATION include/hic )
+install( FILES hic/hic_dummy/hic_dummy_runtime.h          DESTINATION include/hic/hic_dummy )
+
+if( HAVE_CUDA )
+  target_link_libraries( hic INTERFACE CUDA::cudart )
+elseif( HAVE_HIP )
+  target_link_libraries( hic INTERFACE hip::host )
+endif()
diff --git a/hic/src/hic/hic.h b/hic/src/hic/hic.h
new file mode 100644
index 000000000..66a2530bf
--- /dev/null
+++ b/hic/src/hic/hic.h
@@ -0,0 +1,38 @@
+/*
+ * (C) Copyright 2024- ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+#pragma once
+
+#include "hic/hic_config.h"
+#include "hic/hic_runtime.h"
+
+#include <stdexcept>
+#include <sstream>
+
+#if HIC_BACKEND_DUMMY
+#define HIC_CALL(val)
+#else
+#define HIC_CALL(val) hic_assert((val), #val, __FILE__, __LINE__)
+#endif
+
+inline void hic_assert(hicError_t err, const char* const func, const char* const file, const int line) {
+    // Ignore errors when HIP/CUDA runtime is unloaded or deinitialized.
+    // This happens when calling HIP/CUDA after main has ended, e.g. in teardown of static variables calling `hicFree`
+    //   --> ignore hicErrorDeinitialized (a.k.a. cudaErrorCudartUnloading / hipErrorDeinitialized)
+    if (err != hicSuccess && err != hicErrorDeinitialized) {
+        std::ostringstream msg;
+        msg << "HIC Runtime Error [code="<<err<<"] at: " << file << " + " << line << " : " << func << "\n";
+        msg << "  Reason: " << hicGetErrorString(err);
+        throw std::runtime_error(msg.str());
+    }
+}
+
+#define HIC_CHECK_KERNEL_LAUNCH() HIC_CALL(hicPeekAtLastError())
+#define HIC_CHECK_LAST_ERROR()    HIC_CALL(hicGetLastError())
+
diff --git a/hic/src/hic/hic_config.h.in b/hic/src/hic/hic_config.h.in
new file mode 100644
index 000000000..052c8590d
--- /dev/null
+++ b/hic/src/hic/hic_config.h.in
@@ -0,0 +1,41 @@
+/*
+ * (C) Copyright 2024- ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+#pragma once
+
+#if defined(HIC_BACKEND_DUMMY) && HIC_BACKEND_DUMMY==1
+#define HIC_BACKEND_CUDA 0
+#define HIC_BACKEND_HIP  0
+#else
+#cmakedefine01 HIC_BACKEND_CUDA
+#cmakedefine01 HIC_BACKEND_HIP
+#cmakedefine01 HIC_BACKEND_DUMMY
+#endif
+
+#if (defined(__CUDACC__) || defined(__HIPCC__))
+#define HIC_COMPILER 1
+#define HIC_HOST_DEVICE __host__ __device__
+#define HIC_DEVICE      __device__
+#define HIC_HOST        __host__
+#define HIC_GLOBAL      __global__
+#else
+#define HIC_COMPILER 0
+#define HIC_HOST_DEVICE
+#define HIC_DEVICE
+#define HIC_HOST
+#define HIC_GLOBAL
+#endif
+
+#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
+#define HIC_HOST_COMPILE 0
+#define HIC_DEVICE_COMPILE 1
+#else
+#define HIC_HOST_COMPILE 1
+#define HIC_DEVICE_COMPILE 0
+#endif
diff --git a/hic/src/hic/hic_dummy/hic_dummy_runtime.h b/hic/src/hic/hic_dummy/hic_dummy_runtime.h
new file mode 100644
index 000000000..1560b790c
--- /dev/null
+++ b/hic/src/hic/hic_dummy/hic_dummy_runtime.h
@@ -0,0 +1,89 @@
+/*
+ * (C) Copyright 2024- ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#include <stdexcept>
+#include <string>
+
+#define DUMMY_SHOULD_NOT_BE_CALLED(SYMBOL) dummyShouldNotBeCalled( #SYMBOL )
+#define DUMMY_FUNCTION(SYMBOL) \
+    template <typename... Args> inline \
+    dummyError_t dummy##SYMBOL(Args&&... args) { \
+        DUMMY_SHOULD_NOT_BE_CALLED( hic##SYMBOL ); \
+        return dummyError_t{0}; \
+    }
+#define DUMMY_VALUE(SYMBOL) \
+    constexpr int dummy##SYMBOL = 0;
+
+namespace {
+
+[[noreturn]] void dummyShouldNotBeCalled(const char* symbol) {
+    throw std::runtime_error(std::string(symbol)+" is using the dummy backend and should not be called");
+}
+
+using dummyError_t  = int;
+using dummyEvent_t  = void*;
+using dummyStream_t = void*;
+
+const char* dummyGetErrorString(dummyError_t) {
+    DUMMY_SHOULD_NOT_BE_CALLED( hicGetErrorString );
+}
+
+dummyError_t dummyGetLastError( void ) {
+    DUMMY_SHOULD_NOT_BE_CALLED( hicGetLastError );
+}
+
+dummyError_t dummyPeekAtLastError( void ) {
+    DUMMY_SHOULD_NOT_BE_CALLED( hicPeekAtLastError );
+}
+
+struct dummyPointerAttributes {
+    int type{0};
+    int device{-2};
+    void* hostPointer{nullptr};
+    void* devicePointer{nullptr};
+};
+ 
+DUMMY_FUNCTION(DeviceSynchronize)
+DUMMY_FUNCTION(Free)
+DUMMY_FUNCTION(FreeAsync)
+DUMMY_FUNCTION(GetDeviceCount);
+DUMMY_FUNCTION(GetErrorString)
+DUMMY_FUNCTION(GetLastError)
+DUMMY_FUNCTION(HostGetDevicePointer)
+DUMMY_FUNCTION(HostRegister)
+DUMMY_FUNCTION(HostUnregister)
+DUMMY_FUNCTION(Malloc)
+DUMMY_FUNCTION(MallocAsync)
+DUMMY_FUNCTION(MallocManaged)
+DUMMY_FUNCTION(Memcpy)
+DUMMY_FUNCTION(Memcpy2D)
+DUMMY_FUNCTION(MemcpyAsync)
+DUMMY_FUNCTION(Memcpy2DAsync)
+DUMMY_FUNCTION(MemPrefetchAsync)
+DUMMY_FUNCTION(StreamCreate)
+DUMMY_FUNCTION(StreamDestroy)
+DUMMY_FUNCTION(StreamSynchronize)
+DUMMY_FUNCTION(PointerGetAttributes)
+
+DUMMY_VALUE(CpuDeviceId)
+DUMMY_VALUE(HostRegisterMapped)
+DUMMY_VALUE(MemoryTypeDevice)
+DUMMY_VALUE(MemoryTypeHost)
+DUMMY_VALUE(MemoryTypeUnregistered)
+DUMMY_VALUE(MemoryTypeManaged)
+DUMMY_VALUE(MemcpyDeviceToHost)
+DUMMY_VALUE(MemcpyHostToDevice)
+DUMMY_VALUE(Success)
+
+}
+
+#undef DUMMY_FUNCTION
+#undef DUMMY_VALUE
+#undef DUMMY_SHOULD_NOT_BE_CALLED
\ No newline at end of file
diff --git a/hic/src/hic/hic_runtime.h b/hic/src/hic/hic_runtime.h
new file mode 100644
index 000000000..25af23cb0
--- /dev/null
+++ b/hic/src/hic/hic_runtime.h
@@ -0,0 +1,180 @@
+/*
+ * (C) Copyright 2024- ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+#pragma once
+
+#if !defined(HIC_NAMESPACE)
+  #define HIC_NAMESPACE
+  #define HIC_NAMESPACE_BEGIN
+  #define HIC_NAMESPACE_END
+#else
+  #define HIC_NAMESPACE_BEGIN namespace HIC_NAMESPACE {
+  #define HIC_NAMESPACE_END }
+#endif
+
+#if HIC_BACKEND_CUDA
+  #define HIC_BACKEND cuda
+  #include <cuda_runtime.h>
+#elif HIC_BACKEND_HIP
+  #define HIC_BACKEND hip
+  #if defined(DEPRECATED)
+  #define DEFINED_OUTERSCOPE DEPRECATED
+  #undef DEPRECATED
+  #endif
+  #include <hip/hip_runtime.h>
+  #if defined(DEPRECATED)
+  #undef DEPRECATED
+  #endif
+  #if defined(DEFINED_OUTERSCOPE)
+  #define DEPRECATED DEFINED_OUTERSCOPE
+  #endif
+
+#if HIP_VERSION_MAJOR < 6
+  enum hicMemoryType {
+      hicMemoryTypeUnregistered = 0 ,
+      hicMemoryTypeHost         = 1 ,
+      hicMemoryTypeDevice       = 2 ,
+      hicMemoryTypeManaged      = 3
+  };
+  struct hicPointerAttributes {
+    hicMemoryType type;
+    int device;
+    void* devicePointer;
+    void* hostPointer;
+  };
+
+  [[nodiscard]] inline hipError_t hicPointerGetAttributes(hicPointerAttributes* attributes, const void* ptr) {
+    hipPointerAttribute_t attr_;
+    auto err = hipPointerGetAttributes(&attr_, ptr);
+    if( err != hipSuccess ) {
+      attributes->device        = 0;
+      attributes->devicePointer = nullptr;
+      attributes->hostPointer   = nullptr;
+      return err;
+    }
+    const auto& type = attr_.memoryType;
+    if(attr_.isManaged) {
+      attributes->type = hicMemoryTypeManaged;
+    }
+    else if(type == hipMemoryTypeHost) {
+      attributes->type = hicMemoryTypeHost;
+    }
+    else if(type == hipMemoryTypeDevice) {
+      attributes->type = hicMemoryTypeDevice;
+    }
+    else if(type == hipMemoryTypeUnified) {
+      attributes->type = hicMemoryTypeManaged;
+    }
+    else {
+      attributes->type = hicMemoryTypeUnregistered;
+    }
+    attributes->device        = attr_.device;
+    attributes->devicePointer = attr_.devicePointer;
+    attributes->hostPointer   = attr_.hostPointer;
+    return err;
+  };
+#endif
+
+#elif HIC_BACKEND_DUMMY
+  #define HIC_BACKEND dummy
+  #include "hic/hic_dummy/hic_dummy_runtime.h"
+#else
+  #error Unsupported hic backend. Please define HIC_BACKEND_CUDA or HIC_BACKEND_HIP or HIC_BACKEND_DUMMY
+#endif
+
+#define HIC_PREFIX hic
+#define HIC_CONCAT_(A, B) A ## B
+#define HIC_CONCAT(A, B) HIC_CONCAT_(A, B)
+#define HIC_SYMBOL(API) HIC_CONCAT(HIC_PREFIX, API)
+#define HIC_BACKEND_SYMBOL(API) HIC_CONCAT(HIC_BACKEND, API)
+
+#define HIC_TYPE(TYPE) \
+    using HIC_SYMBOL(TYPE) = HIC_BACKEND_SYMBOL(TYPE);
+
+#define HIC_FUNCTION(FUNCTION) \
+    template <typename... Args> inline \
+    auto HIC_SYMBOL(FUNCTION)(Args&&... args) -> decltype(HIC_BACKEND_SYMBOL(FUNCTION)(std::forward<Args>(args)...)) { \
+      return HIC_BACKEND_SYMBOL(FUNCTION)(std::forward<Args>(args)...); \
+    }
+
+#define HIC_VALUE(VALUE) \
+    constexpr decltype(HIC_BACKEND_SYMBOL(VALUE)) HIC_SYMBOL(VALUE) = HIC_BACKEND_SYMBOL(VALUE);
+
+//------------------------------------------------
+HIC_NAMESPACE_BEGIN
+//------------------------------------------------
+
+HIC_FUNCTION(DeviceSynchronize)
+HIC_FUNCTION(Free)
+HIC_FUNCTION(FreeAsync)
+HIC_FUNCTION(GetDeviceCount)
+HIC_FUNCTION(GetErrorString)
+HIC_FUNCTION(GetLastError)
+HIC_FUNCTION(HostGetDevicePointer)
+HIC_FUNCTION(HostRegister)
+HIC_FUNCTION(HostUnregister)
+HIC_FUNCTION(Malloc)
+HIC_FUNCTION(MallocAsync)
+HIC_FUNCTION(MallocManaged)
+HIC_FUNCTION(Memcpy)
+HIC_FUNCTION(Memcpy2D)
+HIC_FUNCTION(MemcpyAsync)
+HIC_FUNCTION(Memcpy2DAsync)
+HIC_FUNCTION(MemPrefetchAsync)
+HIC_FUNCTION(PeekAtLastError)
+#if !HIC_BACKEND_HIP
+HIC_FUNCTION(PointerGetAttributes)
+#elif HIP_VERSION_MAJOR >= 6
+HIC_FUNCTION(PointerGetAttributes)
+#endif
+HIC_FUNCTION(StreamCreate)
+HIC_FUNCTION(StreamDestroy)
+HIC_FUNCTION(StreamSynchronize)
+
+HIC_TYPE(Error_t)
+HIC_TYPE(Event_t)
+HIC_TYPE(Stream_t)
+#if !HIC_BACKEND_HIP
+HIC_TYPE(PointerAttributes)
+#elif HIP_VERSION_MAJOR >= 6
+using HIC_SYMBOL(PointerAttributes) = HIC_BACKEND_SYMBOL(PointerAttribute_t);
+#endif
+
+HIC_VALUE(CpuDeviceId)
+HIC_VALUE(HostRegisterMapped)
+#if !HIC_BACKEND_HIP
+HIC_VALUE(MemoryTypeDevice)
+HIC_VALUE(MemoryTypeHost)
+HIC_VALUE(MemoryTypeUnregistered)
+HIC_VALUE(MemoryTypeManaged)
+#endif
+HIC_VALUE(MemcpyDeviceToHost)
+HIC_VALUE(MemcpyHostToDevice)
+HIC_VALUE(Success)
+
+#if HIC_BACKEND_CUDA
+    constexpr hicError_t hicErrorDeinitialized = cudaErrorCudartUnloading;
+#elif HIC_BACKEND_HIP
+    constexpr hicError_t hicErrorDeinitialized = hipErrorDeinitialized;
+#else
+    constexpr hicError_t hicErrorDeinitialized = 4;
+#endif
+
+//------------------------------------------------
+HIC_NAMESPACE_END
+//------------------------------------------------
+
+#undef HIC_FUNCTION
+#undef HIC_TYPE
+#undef HIC_VALUE
+#undef HIC_CONCAT
+#undef HIC_CONCAT_
+#undef HIC_PREFIX
+#undef HIC_SYMBOL
+#undef HIC_BACKEND_SYMBOL
diff --git a/hic/tests/CMakeLists.txt b/hic/tests/CMakeLists.txt
new file mode 100644
index 000000000..ecd23347d
--- /dev/null
+++ b/hic/tests/CMakeLists.txt
@@ -0,0 +1,22 @@
+# (C) Copyright 2024 ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation nor
+# does it submit to any jurisdiction.
+
+ecbuild_add_test( TARGET hic_test_dummy
+  SOURCES   test_hic_dummy.cc
+  LIBS      hic
+  DEFINITIONS HIC_BACKEND_DUMMY=1
+)
+
+if( HAVE_HIP OR HAVE_CUDA )
+  ecbuild_add_test( TARGET hic_test_hic
+    SOURCES   test_hic.cc
+    LIBS      hic
+  )
+endif()
+
+add_subdirectory(test_find_hic)
diff --git a/hic/tests/test_find_hic/CMakeLists.txt b/hic/tests/test_find_hic/CMakeLists.txt
new file mode 100644
index 000000000..e47613247
--- /dev/null
+++ b/hic/tests/test_find_hic/CMakeLists.txt
@@ -0,0 +1,31 @@
+
+# This test builds a package that requires fypp processing
+# It uses the overriding of compile flags, like IFS is using.
+#
+# Test created to avoid regression after fixing issue FCKIT-19,
+# where compile flags were not propagated to fypp-generated files.
+
+if( HAVE_TESTS )
+    
+  configure_file( test.sh.in ${CMAKE_CURRENT_BINARY_DIR}/test.sh @ONLY )
+
+  unset( _test_args )
+  if( CMAKE_TOOLCHAIN_FILE )
+    if( NOT IS_ABSOLUTE ${CMAKE_TOOLCHAIN_FILE})
+        set( CMAKE_TOOLCHAIN_FILE "${CMAKE_BINARY_DIR}/${CMAKE_TOOLCHAIN_FILE}" )
+    endif()
+    list( APPEND _test_args "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}" )
+  endif()
+  foreach( lang CXX )
+    if( CMAKE_${lang}_COMPILER )
+      list( APPEND _test_args "-DCMAKE_${lang}_COMPILER=${CMAKE_${lang}_COMPILER}" )
+    endif()
+    if( CMAKE_${lang}_FLAGS )
+      list( APPEND _test_args "-DCMAKE_${lang}_FLAGS=${CMAKE_${lang}_FLAGS}" )
+    endif()
+  endforeach()
+
+  add_test( NAME hic_test_find_hic
+            COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test.sh ${_test_args} )
+
+endif()
diff --git a/hic/tests/test_find_hic/find_hic/CMakeLists.txt b/hic/tests/test_find_hic/find_hic/CMakeLists.txt
new file mode 100644
index 000000000..4900f462c
--- /dev/null
+++ b/hic/tests/test_find_hic/find_hic/CMakeLists.txt
@@ -0,0 +1,6 @@
+
+cmake_minimum_required( VERSION 3.18 FATAL_ERROR )
+
+project( find_hic LANGUAGES CXX )
+
+find_package( hic REQUIRED )
diff --git a/hic/tests/test_find_hic/test.sh.in b/hic/tests/test_find_hic/test.sh.in
new file mode 100755
index 000000000..81b37eda1
--- /dev/null
+++ b/hic/tests/test_find_hic/test.sh.in
@@ -0,0 +1,47 @@
+#!/usr/bin/env bash
+
+# Description:
+#   Build downstream example projects 
+#   each individually with make/install
+#
+# Usage:
+#   test-individual.sh [CMAKE_ARGUMENTS]
+
+SOURCE=@PROJECT_SOURCE_DIR@/tests/test_find_hic/find_hic
+BUILD=@CMAKE_CURRENT_BINARY_DIR@/build
+
+# Error handling
+function test_failed {
+  EXIT_CODE=$?
+  { set +ex; } 2>/dev/null
+  if [ $EXIT_CODE -ne 0 ]; then
+    echo "+++++++++++++++++"
+    echo "Test failed"
+    echo "+++++++++++++++++"
+  fi
+  exit $EXIT_CODE
+}
+trap test_failed EXIT
+set -e -o pipefail
+set -x
+
+# Start with clean build
+rm -rf $BUILD
+
+export hic_ROOT=@PROJECT_BINARY_DIR@
+export ecbuild_DIR=@ecbuild_DIR@
+
+# Build
+mkdir -p $BUILD && cd $BUILD
+cmake $SOURCE \
+  -DCMAKE_BUILD_TYPE=RelWithDebInfo \
+  "$@"
+
+# make VERBOSE=1
+
+# ctest -VV
+
+{ set +ex; } 2>/dev/null
+echo "+++++++++++++++++"
+echo "Test passed"
+echo "+++++++++++++++++"
diff --git a/hic/tests/test_hic.cc b/hic/tests/test_hic.cc
new file mode 100644
index 000000000..7355ec807
--- /dev/null
+++ b/hic/tests/test_hic.cc
@@ -0,0 +1,66 @@
+/*
+ * (C) Copyright 2024- ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+#include <iostream>
+#include <functional>
+#include <vector>
+#include <utility>
+
+#include "hic/hic.h"
+
+// -----------------------------------------------------------------------------
+
+int test_hicMalloc() {
+    std::cout << "--- " << __FUNCTION__ << std::endl;
+    void* p;
+    HIC_CALL( hicMalloc(&p, 8) );
+    HIC_CALL( hicFree(p) );
+    std::cout << "--- " << __FUNCTION__ << " SUCCEEDED " << std::endl; 
+    return 0; // success 
+}
+
+// -----------------------------------------------------------------------------
+
+int test_hicMallocManaged() {
+    std::cout << "--- " << __FUNCTION__ << std::endl;
+    void* p;
+    HIC_CALL( hicMallocManaged(&p, 8) );
+    HIC_CALL( hicFree(p) );
+    std::cout << "--- " << __FUNCTION__ << " SUCCEEDED " << std::endl; 
+    return 0; // success 
+}
+
+// -----------------------------------------------------------------------------
+
+std::vector<std::function<int()>> tests = {
+    test_hicMalloc,
+    test_hicMallocManaged,
+};
+
+int main(int argc, char* argv[]) {
+    int num_devices = 0;
+    hicGetDeviceCount(&num_devices);
+    if( num_devices == 0 ) {
+        std::ignore = hicGetLastError();
+        std::cout << "TEST IGNORED, hicGetDeviceCount -> 0" << std::endl; 
+        return 0;
+    }
+    std::cout << "hicGetDeviceCount -> " << num_devices << std::endl; 
+    int error = 0;
+    for( auto& test: tests) {
+        try {
+            error += test();
+        }
+        catch( std::exception& e ) {
+            error += 1;
+            std::cout << e.what() << std::endl;
+        }
+    }
+    return error;
+}
diff --git a/hic/tests/test_hic_dummy.cc b/hic/tests/test_hic_dummy.cc
new file mode 100644
index 000000000..cb78b8186
--- /dev/null
+++ b/hic/tests/test_hic_dummy.cc
@@ -0,0 +1,43 @@
+/*
+ * (C) Copyright 2024- ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+#include <iostream>
+#include <functional>
+#include <vector>
+#include <utility>
+
+#include "hic/hic.h"
+
+int test_throw() {
+    std::cout << "--- " << __FUNCTION__ << std::endl;
+    // For the dummy backend we expect to have an error thrown when used
+    void* p;
+    try {
+        std::ignore = hicMalloc(&p, 8);
+        std::ignore = hicFree(p);
+    } catch(std::runtime_error& e) {
+        std::string expected_message = "hicMalloc is using the dummy backend and should not be called";
+        if (e.what() == expected_message) {
+            std::cout << "--- " << __FUNCTION__ << " SUCCEEDED " << std::endl; 
+            return 0; // success
+        }
+    }
+    std::cout << "--- " << __FUNCTION__ << " SUCCEEDED " << std::endl; 
+    return 1; // fail
+}
+
+std::vector<std::function<int()>> tests = { test_throw };
+
+int main(int argc, char* argv[]) {
+    int error = 0;
+    for( auto& test: tests) {
+        error += test();
+    }
+    return error;
+}
diff --git a/src/atlas/CMakeLists.txt b/src/atlas/CMakeLists.txt
index d3f0c6b5c..47c82b031 100644
--- a/src/atlas/CMakeLists.txt
+++ b/src/atlas/CMakeLists.txt
@@ -60,6 +60,8 @@ library/detail/BlackMagic.h
 library/detail/Debug.h
 
 
+parallel/acc/acc.cc
+parallel/acc/acc.h
 parallel/mpi/mpi.cc
 parallel/mpi/mpi.h
 parallel/omp/omp.cc
@@ -655,6 +657,8 @@ interpolation/method/structured/QuasiCubic2D.cc
 interpolation/method/structured/QuasiCubic2D.h
 interpolation/method/structured/QuasiCubic3D.cc
 interpolation/method/structured/QuasiCubic3D.h
+interpolation/method/structured/RegionalLinear2D.cc
+interpolation/method/structured/RegionalLinear2D.h
 interpolation/method/structured/StructuredInterpolation2D.h
 interpolation/method/structured/StructuredInterpolation2D.tcc
 interpolation/method/structured/StructuredInterpolation3D.h
@@ -824,6 +828,8 @@ util/PolygonXY.cc
 util/PolygonXY.h
 util/Metadata.cc
 util/Metadata.h
+util/PackVectorFields.cc
+util/PackVectorFields.h
 util/Point.cc
 util/Point.h
 util/Polygon.cc
@@ -923,14 +929,15 @@ list( APPEND source_list
 )
 
 
-if( atlas_HAVE_CUDA )
+if( atlas_HAVE_GPU )
   include( atlas_host_device )
   list( APPEND source_list
-    parallel/HaloExchangeCUDA.h
-    parallel/HaloExchangeCUDA.cu
+    parallel/HaloExchangeGPU.h
+    parallel/HaloExchangeGPU.hic
   )
   atlas_host_device( source_list
     SOURCES
+      parallel/HaloExchangeGPU.hic
       mesh/Connectivity.cc
   )
 
@@ -944,7 +951,7 @@ if( atlas_HAVE_CUDA )
     atlas_host_device( source_list
       SOURCES
         array/native/NativeArrayView.cc
-        array/native/NativeDataStore.h
+        array/native/NativeIndexView.cc
     )
   endif()
 endif()
@@ -977,10 +984,10 @@ ecbuild_add_library( TARGET atlas
     eckit_mpi
     eckit_option
     atlas_io
+    hic
     $<${atlas_HAVE_EIGEN}:Eigen3::Eigen>
     $<${atlas_HAVE_OMP_CXX}:OpenMP::OpenMP_CXX>
     $<${atlas_HAVE_GRIDTOOLS_STORAGE}:GridTools::gridtools>
-    $<${atlas_HAVE_CUDA}:CUDA::cudart>
 
   PRIVATE_INCLUDES
     ${CGAL_INCLUDE_DIRS}
@@ -994,4 +1001,12 @@ ecbuild_add_library( TARGET atlas
 
 )
 
+if( HAVE_ACC )
+  target_link_options( atlas INTERFACE
+      $<$<LINK_LANG_AND_ID:C,NVHPC>:SHELL:-acc=gpu>
+      $<$<LINK_LANG_AND_ID:CXX,NVHPC>:SHELL:-acc=gpu>
+      $<$<LINK_LANG_AND_ID:Fortran,NVHPC>:SHELL:-acc=gpu>
+      $<$<LINK_LANG_AND_ID:CUDA,NVIDIA>:SHELL:-acc=gpu> )
+endif()
+
 target_compile_features( atlas PUBLIC cxx_std_17 )
diff --git a/src/atlas/array/SVector.h b/src/atlas/array/SVector.h
index af6c81853..b275dc09d 100644
--- a/src/atlas/array/SVector.h
+++ b/src/atlas/array/SVector.h
@@ -16,7 +16,7 @@
 
 #include "atlas/library/config.h"
 #include "atlas/util/Allocate.h"
-#ifndef __CUDA_ARCH__
+#if ATLAS_HOST_COMPILE
 #include "atlas/runtime/Exception.h"
 #endif
 
@@ -34,7 +34,7 @@ class SVector {
     ATLAS_HOST_DEVICE
     SVector(const T* data, const idx_t size): data_(data), size_(size), externally_allocated_(true) {}
 
-#ifdef __CUDACC__
+#if ATLAS_HIC_COMPILER
     // Note that this does not copy!!! It is mainly intended to be passed to a CUDA kernel which requires value semantics for this class
     ATLAS_HOST_DEVICE
     SVector(SVector const& other): data_(other.data_), size_(other.size_), externally_allocated_(true) {}
@@ -70,7 +70,7 @@ class SVector {
     ATLAS_HOST_DEVICE
     void clear() {
         if (data_ && !externally_allocated_) {
-#ifndef __CUDA_ARCH__
+#if ATLAS_HOST_COMPILE
             deallocate(data_, size_);
 #endif
         }
@@ -103,23 +103,23 @@ class SVector {
 
     ATLAS_HOST_DEVICE
     T& operator()(const idx_t idx) {
-        assert(data_ && idx < size_);
+        //assert(data_ && idx < size_);
         return data_[idx];
     }
     ATLAS_HOST_DEVICE
     T const& operator()(const idx_t idx) const {
-        assert(data_ && idx < size_);
+        //assert(data_ && idx < size_);
         return data_[idx];
     }
 
     ATLAS_HOST_DEVICE
     T& operator[](const idx_t idx) {
-        assert(data_ && idx < size_);
+        //assert(data_ && idx < size_);
         return data_[idx];
     }
     ATLAS_HOST_DEVICE
     T const& operator[](const idx_t idx) const {
-        assert(data_ && idx < size_);
+        //assert(data_ && idx < size_);
         return data_[idx];
     }
 
@@ -140,7 +140,7 @@ class SVector {
     }
 
     void resize(idx_t N) {
-#ifndef __CUDA_ARCH__
+#if ATLAS_HOST_COMPILE
         ATLAS_ASSERT(not externally_allocated_, "Cannot resize externally allocated (or wrapped) data");
 #endif
         resize_impl(N);
@@ -161,7 +161,7 @@ class SVector {
             for (idx_t c = 0; c < size; ++c) {
                 ptr[c].~T();
             }
-            util::delete_managedmem(ptr);
+            util::delete_managedmem(ptr, size);
         }
     }
 
diff --git a/src/atlas/array/Vector.h b/src/atlas/array/Vector.h
index aeadd9bfc..ecef21769 100644
--- a/src/atlas/array/Vector.h
+++ b/src/atlas/array/Vector.h
@@ -16,9 +16,7 @@
 #include "atlas/library/config.h"
 #include "atlas/runtime/Exception.h"
 
-#if ATLAS_HAVE_CUDA
-#include <cuda_runtime.h>
-#endif
+#include "hic/hic.h"
 
 namespace atlas {
 namespace array {
@@ -59,10 +57,9 @@ class Vector {
         size_ = N;
     }
 
-    void updateDevice() {
-        if (!data_gpu_) {
-#if ATLAS_HAVE_CUDA
-            ::cudaMalloc((void**)(&data_gpu_), sizeof(T*) * size_);
+    void allocateDevice() {
+        if constexpr (ATLAS_HAVE_GPU) {
+            HIC_CALL(hicMalloc((void**)(&data_gpu_), sizeof(T*) * size_));
 
             T* buff = new T[size_];
 
@@ -70,32 +67,36 @@ class Vector {
                 data_[i]->updateDevice();
                 buff[i] = data_[i]->gpu_object_ptr();
             }
-            ::cudaMemcpy(data_gpu_, buff, sizeof(T*) * size_, cudaMemcpyHostToDevice);
-            delete buff;
-#else
+            HIC_CALL(hicMemcpy(data_gpu_, buff, sizeof(T*) * size_, hicMemcpyHostToDevice));
+            delete[] buff;
+        }
+        else {
             data_gpu_ = data_;
-#endif
-            size_gpu_ = size_;
+        }
+        size_gpu_ = size_;
+    }
+
+    void updateDevice() {
+        if (!data_gpu_) {
+            allocateDevice();
         }
         else {
             ATLAS_ASSERT(size_gpu_ == size_);
-#if ATLAS_HAVE_CUDA
-            for (idx_t i = 0; i < size(); ++i) {
-                data_[i]->updateDevice();
-                assert(data_gpu_[i] == data_[i]->gpu_object_ptr());
+            if constexpr (ATLAS_HAVE_GPU) {
+                for (idx_t i = 0; i < size(); ++i) {
+                    data_[i]->updateDevice();
+                    assert(data_gpu_[i] == data_[i]->gpu_object_ptr());
+                }
             }
-#endif
         }
     }
     void updateHost() {
         ATLAS_ASSERT(data_gpu_ != nullptr);
-
-#if ATLAS_HAVE_CUDA
-
-        for (idx_t i = 0; i < size(); ++i) {
-            data_[i]->updateHost();
+        if constexpr (ATLAS_HAVE_GPU) {
+            for (idx_t i = 0; i < size(); ++i) {
+                data_[i]->updateHost();
+            }
         }
-#endif
     }
 
     T* gpu_object_ptr() { return data_gpu_; }
diff --git a/src/atlas/array/gridtools/GridToolsDataStore.h b/src/atlas/array/gridtools/GridToolsDataStore.h
index d2017f14e..8fb604640 100644
--- a/src/atlas/array/gridtools/GridToolsDataStore.h
+++ b/src/atlas/array/gridtools/GridToolsDataStore.h
@@ -10,6 +10,7 @@
 
 #pragma once
 
+#include "atlas/library/config.h"
 #include "atlas/array/ArrayDataStore.h"
 #include "atlas/array/gridtools/GridToolsTraits.h"
 
@@ -54,7 +55,7 @@ struct GridToolsDataStore : ArrayDataStore {
         accUnmap();
     }
 
-    bool deviceAllocated() const override { return ATLAS_HAVE_CUDA; }
+    bool deviceAllocated() const override { return ATLAS_HAVE_GPU; }
 
     bool hostNeedsUpdate() const override { return data_store_->host_needs_update(); }
 
@@ -112,7 +113,7 @@ struct GridToolsDataStore : ArrayDataStore {
         return ::gridtools::make_host_view<::gridtools::access_mode::read_only>(*data_store_).data();
     }
     void* device_data() const {
-#if ATLAS_HAVE_CUDA
+#if ATLAS_HAVE_GPU
         return ::gridtools::make_device_view<::gridtools::access_mode::read_only>(*data_store_).data();
 #else
         return ::gridtools::make_host_view<::gridtools::access_mode::read_only>(*data_store_).data();
diff --git a/src/atlas/array/gridtools/GridToolsIndexView.cu b/src/atlas/array/gridtools/GridToolsIndexView.cu
deleted file mode 100644
index a4a8427b2..000000000
--- a/src/atlas/array/gridtools/GridToolsIndexView.cu
+++ /dev/null
@@ -1,11 +0,0 @@
-/*
- * (C) Copyright 2013 ECMWF.
- *
- * This software is licensed under the terms of the Apache Licence Version 2.0
- * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
- * In applying this licence, ECMWF does not waive the privileges and immunities
- * granted to it by virtue of its status as an intergovernmental organisation
- * nor does it submit to any jurisdiction.
- */
-
-#include "GridToolsIndexView.cc"
diff --git a/src/atlas/array/native/NativeArrayView.h b/src/atlas/array/native/NativeArrayView.h
index 5c85f115f..e9e90a47e 100644
--- a/src/atlas/array/native/NativeArrayView.h
+++ b/src/atlas/array/native/NativeArrayView.h
@@ -352,7 +352,9 @@ class ArrayView {
     ATLAS_HOST_DEVICE
     void check_bounds(Ints... idx) const {
         static_assert(sizeof...(idx) == Rank, "Expected number of indices is different from rank of array");
+#if ATLAS_HOST_COMPILE
         return check_bounds_part<0>(idx...);
+#endif
     }
 #else
     template <typename... Ints>
@@ -366,11 +368,13 @@ class ArrayView {
     ATLAS_HOST_DEVICE
     void check_bounds_force(Ints... idx) const {
         static_assert(sizeof...(idx) == Rank, "Expected number of indices is different from rank of array");
+#if ATLAS_HOST_COMPILE
         return check_bounds_part<0>(idx...);
+#endif
     }
 
     template <int Dim, typename Int, typename... Ints>
-    ATLAS_HOST_DEVICE
+    ATLAS_HOST
     void check_bounds_part(Int idx, Ints... next_idx) const {
         if (idx_t(idx) >= shape_[Dim]) {
             throw_OutOfRange("ArrayView", array_dim<Dim>(), idx, shape_[Dim]);
@@ -379,7 +383,7 @@ class ArrayView {
     }
 
     template <int Dim, typename Int>
-    ATLAS_HOST_DEVICE
+    ATLAS_HOST
     void check_bounds_part(Int last_idx) const {
         if (idx_t(last_idx) >= shape_[Dim]) {
             throw_OutOfRange("ArrayView", array_dim<Dim>(), last_idx, shape_[Dim]);
diff --git a/src/atlas/array/native/NativeDataStore.h b/src/atlas/array/native/NativeDataStore.h
index d0ce0baef..60d35dd50 100644
--- a/src/atlas/array/native/NativeDataStore.h
+++ b/src/atlas/array/native/NativeDataStore.h
@@ -16,20 +16,18 @@
 #include <limits>   // std::numeric_limits<T>::signaling_NaN
 #include <sstream>
 
-#if ATLAS_HAVE_CUDA
-#include <cuda_runtime.h>
-#endif
-
 #include "atlas/array/ArrayDataStore.h"
 #include "atlas/library/Library.h"
 #include "atlas/library/config.h"
+#include "atlas/parallel/acc/acc.h"
 #include "atlas/runtime/Exception.h"
 #include "atlas/runtime/Log.h"
 #include "eckit/log/Bytes.h"
 
-#if ATLAS_HAVE_ACC
-#include "atlas_acc_support/atlas_acc_map_data.h"
-#endif
+#include "hic/hic.h"
+
+
+#define ATLAS_ACC_DEBUG 0
 
 //------------------------------------------------------------------------------
 
@@ -96,17 +94,31 @@ template <typename Value>
 void initialise(Value[], size_t) {}
 #endif
 
+static int devices() {
+    static int devices_ = [](){
+        int n = 0;
+        auto err = hicGetDeviceCount(&n);
+        if (err != hicSuccess) {
+            n = 0;
+            static_cast<void>(hicGetLastError());
+        }
+        return n;
+    }();
+    return devices_;
+}
+
 template <typename Value>
 class DataStore : public ArrayDataStore {
 public:
     DataStore(size_t size): size_(size) {
         allocateHost();
         initialise(host_data_, size_);
-#if ATLAS_HAVE_CUDA
-        device_updated_ = false;
-#else
-        device_data_ = host_data_;
-#endif
+        if (ATLAS_HAVE_GPU && devices()) {
+            device_updated_ = false;
+        }
+        else {
+            device_data_ = host_data_;
+        }
     }
 
     ~DataStore() {
@@ -115,28 +127,28 @@ class DataStore : public ArrayDataStore {
     }
 
     void updateDevice() const override {
-#if ATLAS_HAVE_CUDA
-        if (not device_allocated_) {
-            allocateDevice();
-        }
-        cudaError_t err = cudaMemcpy(device_data_, host_data_, size_*sizeof(Value), cudaMemcpyHostToDevice);
-        if (err != cudaSuccess) {
-            throw_AssertionFailed("Failed to updateDevice: "+std::string(cudaGetErrorString(err)), Here());
+        if (ATLAS_HAVE_GPU && devices()) {
+            if (not device_allocated_) {
+                allocateDevice();
+            }
+            hicError_t err = hicMemcpy(device_data_, host_data_, size_*sizeof(Value), hicMemcpyHostToDevice);
+            if (err != hicSuccess) {
+                throw_AssertionFailed("Failed to updateDevice: "+std::string(hicGetErrorString(err)), Here());
+            }
+            device_updated_ = true;
         }
-        device_updated_ = true;
-#endif
     }
 
     void updateHost() const override {
-#if ATLAS_HAVE_CUDA
-        if (device_allocated_) {
-            cudaError_t err = cudaMemcpy(host_data_, device_data_, size_*sizeof(Value), cudaMemcpyDeviceToHost);
-            if (err != cudaSuccess) {
-                throw_AssertionFailed("Failed to updateHost: "+std::string(cudaGetErrorString(err)), Here());
+        if constexpr (ATLAS_HAVE_GPU) {
+            if (device_allocated_) {
+                hicError_t err = hicMemcpy(host_data_, device_data_, size_*sizeof(Value), hicMemcpyDeviceToHost);
+                if (err != hicSuccess) {
+                    throw_AssertionFailed("Failed to updateHost: "+std::string(hicGetErrorString(err)), Here());
+                }
+                host_updated_ = true;
             }
-            host_updated_ = true;
         }
-#endif
     }
 
     bool valid() const override { return true; }
@@ -162,33 +174,33 @@ class DataStore : public ArrayDataStore {
     bool deviceAllocated() const override { return device_allocated_; }
 
     void allocateDevice() const override {
-#if ATLAS_HAVE_CUDA
-        if (device_allocated_) {
-           return;
-        }
-        if (size_) {
-            cudaError_t err = cudaMalloc((void**)&device_data_, sizeof(Value)*size_);
-            if (err != cudaSuccess) {
-                throw_AssertionFailed("Failed to allocate GPU memory: " + std::string(cudaGetErrorString(err)), Here());
+        if (ATLAS_HAVE_GPU && devices()) {
+            if (device_allocated_) {
+                return;
+            }
+            if (size_) {
+                hicError_t err = hicMalloc((void**)&device_data_, sizeof(Value)*size_);
+                if (err != hicSuccess) {
+                    throw_AssertionFailed("Failed to allocate GPU memory: " + std::string(hicGetErrorString(err)), Here());
+                }
+                device_allocated_ = true;
+                accMap();
             }
-            device_allocated_ = true;
-            accMap();
         }
-#endif
     }
 
     void deallocateDevice() const override {
-#if ATLAS_HAVE_CUDA
-        if (device_allocated_) {
-            accUnmap();
-            cudaError_t err = cudaFree(device_data_);
-            if (err != cudaSuccess) {
-                throw_AssertionFailed("Failed to deallocate GPU memory: " + std::string(cudaGetErrorString(err)), Here());
+        if constexpr (ATLAS_HAVE_GPU) {
+            if (device_allocated_) {
+                accUnmap();
+                hicError_t err = hicFree(device_data_);
+                if (err != hicSuccess) {
+                    throw_AssertionFailed("Failed to deallocate GPU memory: " + std::string(hicGetErrorString(err)), Here());
+                }
+                device_data_ = nullptr;
+                device_allocated_ = false;
             }
-            device_data_ = nullptr;
-            device_allocated_ = false;
         }
-#endif
     }
 
     bool hostNeedsUpdate() const override { return (not host_updated_); }
@@ -210,13 +222,17 @@ class DataStore : public ArrayDataStore {
     void* voidDeviceData() override { return static_cast<void*>(device_data_); }
 
     void accMap() const override {
-#if ATLAS_HAVE_ACC
-        if (not acc_mapped_) {
+        if (not acc_mapped_ && acc::devices()) {
             ATLAS_ASSERT(deviceAllocated(),"Could not accMap as device data is not allocated");
-            atlas_acc_map_data((void*)host_data_, (void*)device_data_, size_ * sizeof(Value));
+            ATLAS_ASSERT(!atlas::acc::is_present(host_data_, size_ * sizeof(Value)));
+            if constexpr(ATLAS_ACC_DEBUG) {
+                std::cout << "               + acc_map_data(hostptr:"<<host_data_<<", device:"<<device_data_<<", bytes:"<<footprint()<<")" <<std::endl;
+            }
+            atlas::acc::map((void*)host_data_, (void*)device_data_, size_ * sizeof(Value));
+            ATLAS_ASSERT(atlas::acc::is_present(host_data_, size_ * sizeof(Value)));
+            ATLAS_ASSERT(atlas::acc::deviceptr(host_data_) == device_data_);
             acc_mapped_ = true;
         }
-#endif
     }
 
     bool accMapped() const override {
@@ -224,12 +240,15 @@ class DataStore : public ArrayDataStore {
     }
 
     void accUnmap() const override {
-#if ATLAS_HAVE_ACC
         if (acc_mapped_) {
-            atlas_acc_unmap_data(host_data_);
+            if constexpr(ATLAS_ACC_DEBUG) {
+                std::cout << "               - acc_unmap_data(hostptr:"<<host_data_<<", device:"<<device_data_<<", bytes:"<<footprint()<<")" <<std::endl;
+            }
+            ATLAS_ASSERT(atlas::acc::is_present(host_data_, size_ * sizeof(Value)));
+            atlas::acc::unmap(host_data_);
+            ATLAS_ASSERT(!atlas::acc::is_present(host_data_, size_ * sizeof(Value)));
             acc_mapped_ = false;
         }
-#endif
     }
 
 
@@ -291,36 +310,37 @@ template <typename Value>
 class WrappedDataStore : public ArrayDataStore {
 public:
     WrappedDataStore(Value* host_data, size_t size): host_data_(host_data), size_(size) {
-#if ATLAS_HAVE_CUDA
-        device_updated_ = false;
-#else
-        device_data_ = host_data_;
-#endif
+        if (ATLAS_HAVE_GPU && devices()) {
+            device_updated_ = false;
+        }
+        else {
+            device_data_ = host_data_;
+        }
     }
 
     void updateDevice() const override {
-#if ATLAS_HAVE_CUDA
-        if (not device_allocated_) {
-            allocateDevice();
-        }
-        cudaError_t err = cudaMemcpy(device_data_, host_data_, size_*sizeof(Value), cudaMemcpyHostToDevice);
-        if (err != cudaSuccess) {
-            throw_AssertionFailed("Failed to updateDevice: "+std::string(cudaGetErrorString(err)), Here());
+        if (ATLAS_HAVE_GPU && devices()) {
+            if (not device_allocated_) {
+                allocateDevice();
+            }
+            hicError_t err = hicMemcpy(device_data_, host_data_, size_*sizeof(Value), hicMemcpyHostToDevice);
+            if (err != hicSuccess) {
+                throw_AssertionFailed("Failed to updateDevice: "+std::string(hicGetErrorString(err)), Here());
+            }
+            device_updated_ = true;
         }
-        device_updated_ = true;
-#endif
     }
 
     void updateHost() const override {
-#if ATLAS_HAVE_CUDA
-        if (device_allocated_) {
-            cudaError_t err = cudaMemcpy(host_data_, device_data_, size_*sizeof(Value), cudaMemcpyDeviceToHost);
-            if (err != cudaSuccess) {
-                throw_AssertionFailed("Failed to updateHost: "+std::string(cudaGetErrorString(err)), Here());
+        if constexpr (ATLAS_HAVE_GPU) {
+            if (device_allocated_) {
+                hicError_t err = hicMemcpy(host_data_, device_data_, size_*sizeof(Value), hicMemcpyDeviceToHost);
+                if (err != hicSuccess) {
+                    throw_AssertionFailed("Failed to updateHost: "+std::string(hicGetErrorString(err)), Here());
+                }
+                host_updated_ = true;
             }
-            host_updated_ = true;
         }
-#endif
     }
 
     bool valid() const override { return true; }
@@ -346,33 +366,33 @@ class WrappedDataStore : public ArrayDataStore {
     bool deviceAllocated() const override { return device_allocated_; }
 
     void allocateDevice() const override {
-#if ATLAS_HAVE_CUDA
-        if (device_allocated_) {
-           return;
-        }
-        if (size_) {
-            cudaError_t err = cudaMalloc((void**)&device_data_, sizeof(Value)*size_);
-            if (err != cudaSuccess) {
-                throw_AssertionFailed("Failed to allocate GPU memory: " + std::string(cudaGetErrorString(err)), Here());
+        if (ATLAS_HAVE_GPU && devices()) {
+            if (device_allocated_) {
+                return;
+            }
+            if (size_) {
+                hicError_t err = hicMalloc((void**)&device_data_, sizeof(Value)*size_);
+                if (err != hicSuccess) {
+                    throw_AssertionFailed("Failed to allocate GPU memory: " + std::string(hicGetErrorString(err)), Here());
+                }
+                device_allocated_ = true;
+                accMap();
             }
-            device_allocated_ = true;
-            accMap();
         }
-#endif
     }
 
     void deallocateDevice() const override {
-#if ATLAS_HAVE_CUDA
-        if (device_allocated_) {
-            accUnmap();
-            cudaError_t err = cudaFree(device_data_);
-            if (err != cudaSuccess) {
-                throw_AssertionFailed("Failed to deallocate GPU memory: " + std::string(cudaGetErrorString(err)), Here());
+        if constexpr (ATLAS_HAVE_GPU) {
+            if (device_allocated_) {
+                accUnmap();
+                hicError_t err = hicFree(device_data_);
+                if (err != hicSuccess) {
+                    throw_AssertionFailed("Failed to deallocate GPU memory: " + std::string(hicGetErrorString(err)), Here());
+                }
+                device_data_ = nullptr;
+                device_allocated_ = false;
             }
-            device_data_ = nullptr;
-            device_allocated_ = false;
         }
-#endif
     }
 
     bool hostNeedsUpdate() const override { return (not host_updated_); }
@@ -394,13 +414,17 @@ class WrappedDataStore : public ArrayDataStore {
     void* voidDeviceData() override { return static_cast<void*>(device_data_); }
 
     void accMap() const override {
-#if ATLAS_HAVE_ACC
-        if (not acc_mapped_) {
+        if (not acc_mapped_ && acc::devices()) {
             ATLAS_ASSERT(deviceAllocated(),"Could not accMap as device data is not allocated");
-            atlas_acc_map_data((void*)host_data_, (void*)device_data_, size_ * sizeof(Value));
+            ATLAS_ASSERT(!atlas::acc::is_present(host_data_, size_ * sizeof(Value)));
+            if constexpr(ATLAS_ACC_DEBUG) {
+                std::cout << "               + acc_map_data(hostptr:"<<host_data_<<", device:"<<device_data_<<", bytes:"<<size_ * sizeof(Value)<<")" <<std::endl;
+            }
+            atlas::acc::map((void*)host_data_, (void*)device_data_, size_ * sizeof(Value));
+            ATLAS_ASSERT(atlas::acc::is_present(host_data_, size_ * sizeof(Value)));
+            ATLAS_ASSERT(atlas::acc::deviceptr(host_data_) == device_data_);
             acc_mapped_ = true;
         }
-#endif
     }
 
     bool accMapped() const override {
@@ -410,7 +434,11 @@ class WrappedDataStore : public ArrayDataStore {
     void accUnmap() const override {
 #if ATLAS_HAVE_ACC
         if (acc_mapped_) {
-            atlas_acc_unmap_data(host_data_);
+            ATLAS_ASSERT(atlas::acc::is_present(host_data_, size_ * sizeof(Value)));
+            if constexpr(ATLAS_ACC_DEBUG) {
+                std::cout << "               - acc_unmap_data(hostptr:"<<host_data_<<", device:"<<device_data_<<", bytes:"<<size_ * sizeof(Value)<<")" <<std::endl;
+            }
+            atlas::acc::unmap(host_data_);
             acc_mapped_ = false;
         }
 #endif
diff --git a/src/atlas/array/native/NativeMakeView.cc b/src/atlas/array/native/NativeMakeView.cc
index a4484925f..5c24c0037 100644
--- a/src/atlas/array/native/NativeMakeView.cc
+++ b/src/atlas/array/native/NativeMakeView.cc
@@ -49,7 +49,7 @@ ArrayView<const Value, Rank> make_host_view(const Array& array) {
 
 template <typename Value, int Rank>
 ArrayView<Value, Rank> make_device_view(Array& array) {
-#if ATLAS_HAVE_CUDA
+#if ATLAS_HAVE_GPU
     ATLAS_ASSERT(array.deviceAllocated(),"make_device_view: Array not allocated on device");
     return ArrayView<Value, Rank>((array.device_data<Value>()), array.shape(), array.strides());
 #else
@@ -59,7 +59,7 @@ ArrayView<Value, Rank> make_device_view(Array& array) {
 
 template <typename Value, int Rank>
 ArrayView<const Value, Rank> make_device_view(const Array& array) {
-#if ATLAS_HAVE_CUDA
+#if ATLAS_HAVE_GPU
     ATLAS_ASSERT(array.deviceAllocated(),"make_device_view: Array not allocated on device");
     return ArrayView<const Value, Rank>(array.device_data<const Value>(), array.shape(), array.strides());
 #else
diff --git a/src/atlas/field/detail/FieldImpl.h b/src/atlas/field/detail/FieldImpl.h
index 066f2cba7..cdd9c308f 100644
--- a/src/atlas/field/detail/FieldImpl.h
+++ b/src/atlas/field/detail/FieldImpl.h
@@ -16,6 +16,7 @@
 #include <functional>
 #include <string>
 #include <vector>
+#include <algorithm>
 
 #include "atlas/util/Object.h"
 
diff --git a/src/atlas/functionspace/Spectral.h b/src/atlas/functionspace/Spectral.h
index dbb32b0fd..7e5fe7c43 100644
--- a/src/atlas/functionspace/Spectral.h
+++ b/src/atlas/functionspace/Spectral.h
@@ -47,9 +47,10 @@ class Spectral : public functionspace::FunctionSpaceImpl {
      n = total wavenumber
   
   const auto zonal_wavenumbers = Spectral::zonal_wavenumbers();
+  const int truncation = Spectral::truncation();
   idx_t jc=0;
   for( int jm=0; jm<zonal_wavenumbers.size(); ++jm ) {
-    int m = zonal_wavenumbers(m);
+    int m = zonal_wavenumbers(jm);
     for( int n=m; m<=truncation; ++n ) {
       data( jc++, jfld ) = func_real_part(m,n);
       data( jc++, jfld ) = func_imag_part(m,n);
diff --git a/src/atlas/functionspace/detail/StructuredColumns.cc b/src/atlas/functionspace/detail/StructuredColumns.cc
index 653ea10ab..54b215feb 100644
--- a/src/atlas/functionspace/detail/StructuredColumns.cc
+++ b/src/atlas/functionspace/detail/StructuredColumns.cc
@@ -540,7 +540,7 @@ const util::PartitionPolygon& StructuredColumns::polygon(idx_t halo) const {
     }
     if (not polygons_[halo]) {
         if (halo > this->halo()) {
-            throw_Exception("StructuredColumsn does not contain a halo of size " + std::to_string(halo) + ".", Here());
+            throw_Exception("StructuredColumns does not contain a halo of size " + std::to_string(halo) + ".", Here());
         }
         polygons_[halo].reset(new grid::StructuredPartitionPolygon(*this, halo));
     }
@@ -726,7 +726,7 @@ struct FixupHaloForVectors {
     template <typename DATATYPE>
     void apply(Field& field) {
         std::string type = field.metadata().getString("type", "scalar");
-        if (type == "vector ") {
+        if (type == "vector") {
             ATLAS_NOTIMPLEMENTED;
         }
     }
@@ -770,12 +770,16 @@ struct FixupHaloForVectors<3> {
     template <typename DATATYPE>
     void apply(Field& field) {
         std::string type = field.metadata().getString("type", "scalar");
+
+        // if levels not setup in functionspace use levels from field
+        idx_t k_end = (fs.k_begin() ==  0) && (fs.k_end() == 0) ? field.levels() : fs.k_end();
+
         if (type == "vector") {
             auto array = array::make_view<DATATYPE, RANK>(field);
             for (idx_t j = fs.j_begin_halo(); j < 0; ++j) {
                 for (idx_t i = fs.i_begin_halo(j); i < fs.i_end_halo(j); ++i) {
                     idx_t n = fs.index(i, j);
-                    for (idx_t k = fs.k_begin(); k < fs.k_end(); ++k) {
+                    for (idx_t k = fs.k_begin(); k < k_end; ++k) {
                         array(n, k, XX) = -array(n, k, XX);
                         array(n, k, YY) = -array(n, k, YY);
                     }
@@ -784,7 +788,7 @@ struct FixupHaloForVectors<3> {
             for (idx_t j = fs.grid().ny(); j < fs.j_end_halo(); ++j) {
                 for (idx_t i = fs.i_begin_halo(j); i < fs.i_end_halo(j); ++i) {
                     idx_t n = fs.index(i, j);
-                    for (idx_t k = fs.k_begin(); k < fs.k_end(); ++k) {
+                    for (idx_t k = fs.k_begin(); k < k_end; ++k) {
                         array(n, k, XX) = -array(n, k, XX);
                         array(n, k, YY) = -array(n, k, YY);
                     }
diff --git a/src/atlas/functionspace/detail/StructuredColumns_setup.cc b/src/atlas/functionspace/detail/StructuredColumns_setup.cc
index 4bdc0a4e9..094b47849 100644
--- a/src/atlas/functionspace/detail/StructuredColumns_setup.cc
+++ b/src/atlas/functionspace/detail/StructuredColumns_setup.cc
@@ -588,13 +588,20 @@ void StructuredColumns::setup(const grid::Distribution& distribution, const ecki
 
         atlas_omp_parallel_for(idx_t n = 0; n < gridpoints.size(); ++n) {
             const GridPoint& gp = gridpoints[n];
-            if (gp.j >= 0 && gp.j < grid_->ny()) {
-                xy(gp.r, XX) = grid_->x(gp.i, gp.j);
-                xy(gp.r, YY) = grid_->y(gp.j);
-            }
-            else {
-                xy(gp.r, XX) = compute_x(gp.i, gp.j);
-                xy(gp.r, YY) = compute_y(gp.j);
+            if (regional) {
+              std::array<double,2> lonlatVec;
+              grid_->lonlat(gp.i, gp.j, lonlatVec.data());
+              xy(gp.r, XX) = lonlatVec[0];
+              xy(gp.r, YY) = lonlatVec[1];
+            } else {
+              if (gp.j >= 0 && gp.j < grid_->ny()) {
+                  xy(gp.r, XX) = grid_->x(gp.i, gp.j);
+                  xy(gp.r, YY) = grid_->y(gp.j);
+              }
+              else {
+                  xy(gp.r, XX) = compute_x(gp.i, gp.j);
+                  xy(gp.r, YY) = compute_y(gp.j);
+              }
             }
 
             bool in_domain(false);
diff --git a/src/atlas/interpolation/method/MethodFactory.cc b/src/atlas/interpolation/method/MethodFactory.cc
index a34e29961..f7a25c72f 100644
--- a/src/atlas/interpolation/method/MethodFactory.cc
+++ b/src/atlas/interpolation/method/MethodFactory.cc
@@ -24,6 +24,7 @@
 #include "structured/Linear3D.h"
 #include "structured/QuasiCubic2D.h"
 #include "structured/QuasiCubic3D.h"
+#include "structured/RegionalLinear2D.h"
 #include "unstructured/FiniteElement.h"
 #include "unstructured/UnstructuredBilinearLonLat.h"
 
@@ -46,6 +47,7 @@ void force_link() {
             MethodBuilder<method::Cubic3D>();
             MethodBuilder<method::QuasiCubic2D>();
             MethodBuilder<method::QuasiCubic3D>();
+            MethodBuilder<method::RegionalLinear2D>();
             MethodBuilder<method::GridBoxAverage>();
             MethodBuilder<method::GridBoxMaximum>();
             MethodBuilder<method::CubedSphereBilinear>();
diff --git a/src/atlas/interpolation/method/binning/Binning.cc b/src/atlas/interpolation/method/binning/Binning.cc
index cbbe4d7aa..f5a6b0743 100644
--- a/src/atlas/interpolation/method/binning/Binning.cc
+++ b/src/atlas/interpolation/method/binning/Binning.cc
@@ -102,8 +102,9 @@ void Binning::do_setup(const FunctionSpace& source,
     // start of the indexes associated with the row 'i+1'
     size_t ubound = ptr_tamx_o[idx_row_next];
 
-    if (lbound == ubound)
+    if (lbound == ubound) {
       continue;
+    }
 
     double sum_row = 0;
     for (size_t i = lbound; i < ubound; ++i) {
diff --git a/src/atlas/interpolation/method/cubedsphere/CellFinder.cc b/src/atlas/interpolation/method/cubedsphere/CellFinder.cc
index 9912d16f1..4a77231a0 100644
--- a/src/atlas/interpolation/method/cubedsphere/CellFinder.cc
+++ b/src/atlas/interpolation/method/cubedsphere/CellFinder.cc
@@ -37,7 +37,7 @@ CellFinder::CellFinder(const Mesh& mesh, const util::Config& config): mesh_{mesh
     auto halo = config.getInt("halo", 0);
     for (idx_t i = 0; i < mesh_.cells().size(); ++i) {
         if (haloView(i) <= halo) {
-            points.emplace_back(PointLonLat(lonlatView(i, LON), lonlatView(i, LAT)));
+            points.emplace_back(lonlatView(i, LON), lonlatView(i, LAT));
             payloads.emplace_back(i);
         }
     }
diff --git a/src/atlas/interpolation/method/sphericalvector/ComplexMatrixMultiply.h b/src/atlas/interpolation/method/sphericalvector/ComplexMatrixMultiply.h
index 34cfdacb2..8d15d035a 100644
--- a/src/atlas/interpolation/method/sphericalvector/ComplexMatrixMultiply.h
+++ b/src/atlas/interpolation/method/sphericalvector/ComplexMatrixMultiply.h
@@ -70,7 +70,7 @@ class ComplexMatrixMultiply {
           // tinyNum ~= 2.3e-13 for double.
           constexpr auto tinyNum = 1024 * std::numeric_limits<Real>::epsilon();
           const auto complexMagnitude = std::abs(complexRowIter.value());
-          const auto realValue = realRowIter.value();
+          const auto realValue = std::abs(realRowIter.value());
           const auto error = std::abs(complexMagnitude - realValue);
 
           const auto printError = [&]() {
diff --git a/src/atlas/interpolation/method/structured/RegionalLinear2D.cc b/src/atlas/interpolation/method/structured/RegionalLinear2D.cc
new file mode 100644
index 000000000..e8588b617
--- /dev/null
+++ b/src/atlas/interpolation/method/structured/RegionalLinear2D.cc
@@ -0,0 +1,572 @@
+/*
+ * (C) Copyright 2024 Meteorologisk Institutt
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ */
+
+#include "atlas/interpolation/method/structured/RegionalLinear2D.h"
+
+#include <limits>
+
+#include "atlas/array.h"
+#include "atlas/interpolation/method/MethodFactory.h"
+#include "atlas/util/KDTree.h"
+#include "atlas/util/Point.h"
+
+namespace atlas {
+namespace interpolation {
+namespace method {
+
+namespace {
+MethodBuilder<RegionalLinear2D> __builder("regional-linear-2d");
+}
+
+void RegionalLinear2D::print(std::ostream&) const { ATLAS_NOTIMPLEMENTED; }
+
+void RegionalLinear2D::do_setup(const Grid& source, const Grid& target,
+                                const Cache&) {
+  ATLAS_NOTIMPLEMENTED;
+}
+
+void RegionalLinear2D::do_setup(const FunctionSpace& source,
+                                const FunctionSpace& target) {
+  ATLAS_TRACE("interpolation::method::RegionalLinear2D::do_setup");
+  source_ = source;
+  target_ = target;
+
+  if (target_.size() == 0) {
+    return;
+  }
+  ASSERT(source_.type() == "StructuredColumns");
+
+  // Get grid parameters
+  const functionspace::StructuredColumns sourceFs(source_);
+  const RegularGrid sourceGrid(sourceFs.grid());
+  const Projection & sourceProj = sourceGrid.projection();
+  const size_t sourceNx = sourceGrid.nx();
+  const size_t sourceNy = sourceGrid.ny();
+  const double sourceDx = sourceGrid.dx();
+  const double sourceDy = std::abs(sourceGrid.y(1)-sourceGrid.y(0));
+  const bool reversedY = sourceGrid.y(1) < sourceGrid.y(0);
+
+  // Check grid regularity in y direction
+  for (size_t sourceJ = 0; sourceJ < sourceNy-1; ++sourceJ) {
+    if (reversedY) {
+      ASSERT(std::abs(sourceGrid.y(sourceJ)-sourceGrid.y(sourceJ+1)-sourceDy) < 1.0e-12*sourceDy);
+    } else {
+      ASSERT(std::abs(sourceGrid.y(sourceJ+1)-sourceGrid.y(sourceJ)-sourceDy) < 1.0e-12*sourceDy);
+    }
+  }
+
+  // Source grid indices
+  const Field sourceFieldIndexI = sourceFs.index_i();
+  const Field sourceFieldIndexJ = sourceFs.index_j();
+  const auto sourceIndexIView = array::make_view<int, 1>(sourceFieldIndexI);
+  const auto sourceIndexJView = array::make_view<int, 1>(sourceFieldIndexJ);
+  sourceSize_ = sourceFs.size();
+
+  // Destination grid size
+  targetSize_ = target_.size();
+
+  // Ghost points
+  const auto sourceGhostView = array::make_view<int, 1>(sourceFs.ghost());
+  const auto targetGhostView = array::make_view<int, 1>(target_.ghost());
+
+  // Define reduced grid horizontal distribution
+  std::vector<int> mpiTask(sourceNx*sourceNy, 0);
+  for (size_t sourceJnode = 0; sourceJnode < sourceSize_; ++sourceJnode) {
+    if (sourceGhostView(sourceJnode) == 0) {
+      mpiTask[(sourceIndexIView(sourceJnode)-1)*sourceNy+sourceIndexJView(sourceJnode)-1] = comm_.rank();
+    }
+  }
+  comm_.allReduceInPlace(mpiTask.begin(), mpiTask.end(), eckit::mpi::sum());
+
+  // Define local tree on destination grid
+  std::vector<Point3> targetPoints;
+  std::vector<size_t> targetIndices;
+  targetPoints.reserve(targetSize_);
+  targetIndices.reserve(targetSize_);
+  const auto targetLonLatView = array::make_view<double, 2>(target_.lonlat());
+  for (size_t targetJnode = 0; targetJnode < targetSize_; ++targetJnode) {
+    PointLonLat p({targetLonLatView(targetJnode, 0), targetLonLatView(targetJnode, 1)});
+    sourceProj.lonlat2xy(p);
+    targetPoints.emplace_back(p[0], p[1], 0.0);
+    targetIndices.emplace_back(targetJnode);
+  }
+  util::IndexKDTree targetTree;
+  if (targetSize_ > 0) {
+    targetTree.build(targetPoints, targetIndices);
+  }
+  const double radius = std::sqrt(sourceDx*sourceDx+sourceDy*sourceDy);
+
+  // Delta for colocation
+  const double eps = 1.0e-8;
+
+  // RecvCounts and received points list
+  targetRecvCounts_.resize(comm_.size());
+  std::fill(targetRecvCounts_.begin(), targetRecvCounts_.end(), 0);
+  std::vector<int> targetRecvPointsList;
+  for (size_t sourceJ = 0; sourceJ < sourceNy; ++sourceJ) {
+    double yMin, yMax;
+    if (reversedY) {
+      yMin = sourceJ < sourceNy-1 ? sourceGrid.y(sourceJ+1)-eps : -std::numeric_limits<double>::max();
+      yMax = sourceJ > 0 ? sourceGrid.y(sourceJ-1)+eps : std::numeric_limits<double>::max();
+    } else {
+      yMin = sourceJ > 0 ? sourceGrid.y(sourceJ-1)-eps : -std::numeric_limits<double>::max();
+      yMax = sourceJ < sourceNy-1 ? sourceGrid.y(sourceJ+1)+eps : std::numeric_limits<double>::max();
+    }
+    for (size_t sourceI = 0; sourceI < sourceNx; ++sourceI) {
+      const double xMin = sourceI > 0 ? sourceGrid.x(sourceI-1)-eps : -std::numeric_limits<double>::max();
+      const double xMax = sourceI < sourceNx-1 ? sourceGrid.x(sourceI+1)+eps :
+        std::numeric_limits<double>::max();
+
+      bool pointsNeeded = false;
+      if (targetSize_ > 0) {
+        const Point3 p(sourceGrid.x(sourceI), sourceGrid.y(sourceJ), 0.0);
+        const auto list = targetTree.closestPointsWithinRadius(p, radius);
+        for (const auto & item : list) {
+          const PointXYZ targetPoint = item.point();
+          const size_t targetJnode = item.payload();
+          if (targetGhostView(targetJnode) == 0) {
+            const bool inX = (xMin <= targetPoint[0] && targetPoint[0] <= xMax);
+            const bool inY = (yMin <= targetPoint[1] && targetPoint[1] <= yMax);
+            if (inX && inY) {
+              pointsNeeded = true;
+              break;
+            }
+          }
+        }
+      }
+      if (pointsNeeded) {
+        ++targetRecvCounts_[mpiTask[sourceI*sourceNy+sourceJ]];
+        targetRecvPointsList.push_back(sourceI*sourceNy+sourceJ);
+      }
+    }
+  }
+
+  // Buffer size
+  targetRecvSize_ = targetRecvPointsList.size();
+
+  if (targetRecvSize_ > 0) {
+    // RecvDispls
+    targetRecvDispls_.push_back(0);
+    for (size_t jt = 0; jt < comm_.size()-1; ++jt) {
+      targetRecvDispls_.push_back(targetRecvDispls_[jt]+targetRecvCounts_[jt]);
+    }
+
+    // Allgather RecvCounts
+    eckit::mpi::Buffer<int> targetRecvCountsBuffer(comm_.size());
+    comm_.allGatherv(targetRecvCounts_.begin(), targetRecvCounts_.end(), targetRecvCountsBuffer);
+    std::vector<int> targetRecvCountsGlb_ = std::move(targetRecvCountsBuffer.buffer);
+
+    // SendCounts
+    for (size_t jt = 0; jt < comm_.size(); ++jt) {
+      sourceSendCounts_.push_back(targetRecvCountsGlb_[jt*comm_.size()+comm_.rank()]);
+    }
+
+    // Buffer size
+    sourceSendSize_ = 0;
+    for (const auto & n : sourceSendCounts_) {
+      sourceSendSize_ += n;
+    }
+
+    // SendDispls
+    sourceSendDispls_.push_back(0);
+    for (size_t jt = 0; jt < comm_.size()-1; ++jt) {
+      sourceSendDispls_.push_back(sourceSendDispls_[jt]+sourceSendCounts_[jt]);
+    }
+
+    // Ordered received points list
+    std::vector<size_t> targetRecvOffset(comm_.size(), 0);
+    std::vector<int> targetRecvPointsListOrdered(targetRecvSize_);
+    for (size_t jr = 0; jr < targetRecvSize_; ++jr) {
+      const size_t sourceI = targetRecvPointsList[jr]/sourceNy;
+      const size_t sourceJ = targetRecvPointsList[jr]-sourceI*sourceNy;
+      size_t jt = mpiTask[sourceI*sourceNy+sourceJ];
+      size_t jro = targetRecvDispls_[jt]+targetRecvOffset[jt];
+      targetRecvPointsListOrdered[jro] = targetRecvPointsList[jr];
+      ++targetRecvOffset[jt];
+    }
+    std::vector<int> sourceSentPointsList(sourceSendSize_);
+    comm_.allToAllv(targetRecvPointsListOrdered.data(), targetRecvCounts_.data(), targetRecvDispls_.data(),
+                    sourceSentPointsList.data(), sourceSendCounts_.data(), sourceSendDispls_.data());
+
+    // Sort indices
+    std::vector<int> gij;
+    for (size_t sourceJnode = 0; sourceJnode < sourceSize_; ++sourceJnode) {
+      if (sourceGhostView(sourceJnode) == 0) {
+        gij.push_back((sourceIndexIView(sourceJnode)-1)*sourceNy+sourceIndexJView(sourceJnode)-1);
+      } else {
+        gij.push_back(-1);
+      }
+    }
+    std::vector<size_t> gidx(sourceSize_);
+    std::iota(gidx.begin(), gidx.end(), 0);
+    std::stable_sort(gidx.begin(), gidx.end(), [&gij](size_t i1, size_t i2)
+      {return gij[i1] < gij[i2];});
+    std::vector<size_t> ridx(sourceSendSize_);
+    std::iota(ridx.begin(), ridx.end(), 0);
+    std::stable_sort(ridx.begin(), ridx.end(), [&sourceSentPointsList](size_t i1, size_t i2)
+      {return sourceSentPointsList[i1] < sourceSentPointsList[i2];});
+
+    // Mapping for sent points
+    sourceSendMapping_.resize(sourceSendSize_);
+    size_t sourceJnode = 0;
+    for (size_t js = 0; js < sourceSendSize_; ++js) {
+      while (gij[gidx[sourceJnode]] < sourceSentPointsList[ridx[js]]) {
+        ++sourceJnode;
+        ASSERT(sourceJnode < sourceSize_);
+      }
+      sourceSendMapping_[ridx[js]] = gidx[sourceJnode];
+    }
+
+    // Sort indices
+    std::vector<size_t> idx(targetRecvPointsListOrdered.size());
+    std::iota(idx.begin(), idx.end(), 0);
+    std::stable_sort(idx.begin(), idx.end(), [&targetRecvPointsListOrdered](size_t i1, size_t i2)
+      {return targetRecvPointsListOrdered[i1] < targetRecvPointsListOrdered[i2];});
+
+    // Compute horizontal interpolation
+    stencil_.resize(targetSize_);
+    weights_.resize(targetSize_);
+    stencilSize_.resize(targetSize_);
+    for (size_t targetJnode = 0; targetJnode < targetSize_; ++targetJnode) {
+      // Interpolation element default values
+      if (targetGhostView(targetJnode) == 0) {
+        // Destination grid indices
+        const double targetX = targetPoints[targetJnode][0];
+        bool colocatedX = false;
+        int indexI = -1;
+        for (size_t sourceI = 0; sourceI < sourceNx-1; ++sourceI) {
+          if (std::abs(targetX-sourceGrid.x(sourceI)) < eps) {
+            indexI = sourceI;
+            colocatedX = true;
+          }
+          if (sourceGrid.x(sourceI)+eps < targetX && targetX < sourceGrid.x(sourceI+1)-eps) {
+            indexI = sourceI;
+            colocatedX = false;
+          }
+        }
+        if (std::abs(targetX-sourceGrid.x(sourceNx-1)) < eps) {
+          indexI = sourceNx-1;
+          colocatedX = true;
+        }
+        const double targetY = targetPoints[targetJnode][1];
+        bool colocatedY = false;
+        int indexJ = -1;
+        for (size_t sourceJ = 0; sourceJ < sourceNy-1; ++sourceJ) {
+          if (std::abs(targetY-sourceGrid.y(sourceJ)) < eps) {
+            indexJ = sourceJ;
+            colocatedY = true;
+          }
+          if (reversedY) {
+            if (sourceGrid.y(sourceJ+1)+eps < targetY && targetY < sourceGrid.y(sourceJ)-eps) {
+              indexJ = sourceJ;
+              colocatedY = false;
+            }
+          } else {
+            if (sourceGrid.y(sourceJ)+eps < targetY && targetY < sourceGrid.y(sourceJ+1)-eps) {
+              indexJ = sourceJ;
+              colocatedY = false;
+            }
+          }
+        }
+        if (std::abs(targetY-sourceGrid.y(sourceNy-1)) < eps) {
+          indexJ = sourceNy-1;
+          colocatedY = true;
+        }
+
+        if (indexI == -1 || indexJ == -1) {
+          // Point outside of the domain, using nearest neighbor
+          if (indexI > -1) {
+            if (!colocatedX &&
+              (std::abs(targetX-sourceGrid.x(indexI+1)) < std::abs(targetX-sourceGrid.x(indexI)))) {
+              indexI += 1;
+            }
+          } else {
+            if (std::abs(targetX-sourceGrid.x(0)) < std::abs(targetX-sourceGrid.x(sourceNx-1))) {
+              indexI = 0;
+            } else {
+              indexI = sourceNx-1;
+            }
+          }
+          if (indexJ > -1) {
+            if (!colocatedY &&
+              (std::abs(targetY-sourceGrid.y(indexJ+1)) < std::abs(targetY-sourceGrid.y(indexJ)))) {
+              indexJ += 1;
+            }
+          } else {
+            if (std::abs(targetY-sourceGrid.y(0)) < std::abs(targetY-sourceGrid.y(sourceNy-1))) {
+              indexJ = 0;
+            } else {
+              indexJ = sourceNy-1;
+            }
+            Log::info() << "WARNING: point outside of the domain" << std::endl;
+          }
+
+          // Colocated point (actually nearest neighbor)
+          colocatedX = true;
+          colocatedY = true;
+        }
+
+        // Bilinear interpolation factor
+        const double alphaX = 1.0-(sourceGrid.x(indexI)+sourceDx-targetX)/sourceDx;
+        const double alphaY = reversedY ? (sourceGrid.y(indexJ)-targetY)/sourceDy
+          : 1.0-(sourceGrid.y(indexJ)+sourceDy-targetY)/sourceDy;
+
+        // Points to find
+        std::vector<bool> toFind = {true, !colocatedX, !colocatedY, !colocatedX && !colocatedY};
+        std::vector<size_t> valueToFind = {indexI*sourceNy+indexJ, (indexI+1)*sourceNy+indexJ,
+          indexI*sourceNy+(indexJ+1), (indexI+1)*sourceNy+(indexJ+1)};
+        std::array<int,4> foundIndex;
+        foundIndex.fill(-1);
+
+        // Binary search for each point
+        for (size_t jj = 0; jj < 4; ++jj) {
+          if (toFind[jj]) {
+            size_t low = 0;
+            size_t high = targetRecvPointsListOrdered.size()-1;
+            while (low <= high) {
+              size_t mid = low+(high-low)/2;
+              if (valueToFind[jj] == static_cast<size_t>(targetRecvPointsListOrdered[idx[mid]])) {
+                foundIndex[jj] = idx[mid];
+                break;
+              }
+              if (valueToFind[jj] > static_cast<size_t>(targetRecvPointsListOrdered[idx[mid]])) {
+                low = mid+1;
+              }
+              if (valueToFind[jj] < static_cast<size_t>(targetRecvPointsListOrdered[idx[mid]])) {
+                high = mid-1;
+              }
+            }
+            ASSERT(foundIndex[jj] > -1);
+            ASSERT(static_cast<size_t>(targetRecvPointsListOrdered[foundIndex[jj]]) ==
+              valueToFind[jj]);
+          }
+        }
+
+        // Create interpolation operations
+        if (colocatedX && colocatedY) {
+          // Colocated point
+          stencil_[targetJnode][0] = foundIndex[0];
+          weights_[targetJnode][0] = 1.0;
+          stencilSize_[targetJnode] = 1;
+        } else if (colocatedY) {
+          // Linear interpolation along x
+          stencil_[targetJnode][0] = foundIndex[0];
+          weights_[targetJnode][0] = 1.0-alphaX;
+          stencil_[targetJnode][1] = foundIndex[1];
+          weights_[targetJnode][1] = alphaX;
+          stencilSize_[targetJnode] = 2;
+        } else if (colocatedX) {
+          // Linear interpolation along y
+          stencil_[targetJnode][0] = foundIndex[0];
+          weights_[targetJnode][0] = 1.0-alphaY;
+          stencil_[targetJnode][1] = foundIndex[2];
+          weights_[targetJnode][1] = alphaY;
+          stencilSize_[targetJnode] = 2;
+        } else {
+          // Bilinear interpolation
+          stencil_[targetJnode][0] = foundIndex[0];
+          weights_[targetJnode][0] = (1.0-alphaX)*(1.0-alphaY);
+          stencil_[targetJnode][1] = foundIndex[1];
+          weights_[targetJnode][1] = alphaX*(1.0-alphaY);
+          stencil_[targetJnode][2] = foundIndex[2];
+          weights_[targetJnode][2] = (1.0-alphaX)*alphaY;
+          stencil_[targetJnode][3] = foundIndex[3];
+          weights_[targetJnode][3] = alphaX*alphaY;
+          stencilSize_[targetJnode] = 4;
+        }
+       } else {
+        // Ghost point
+        stencilSize_[targetJnode] = 0;
+      }
+    }
+  }
+}
+
+void RegionalLinear2D::do_execute(const FieldSet& sourceFieldSet,
+                                 FieldSet& targetFieldSet,
+                                 Metadata& metadata) const {
+  ATLAS_TRACE("atlas::interpolation::method::RegionalLinear2D::do_execute()");
+  ATLAS_ASSERT(sourceFieldSet.size() == targetFieldSet.size());
+
+  for (auto i = 0; i < sourceFieldSet.size(); ++i) {
+    do_execute(sourceFieldSet[i], targetFieldSet[i], metadata);
+  }
+}
+
+void RegionalLinear2D::do_execute(const Field& sourceField, Field& targetField,
+                                 Metadata&) const {
+  ATLAS_TRACE("atlas::interpolation::method::RegionalLinear2D::do_execute()");
+
+  if (targetField.size() == 0) {
+      return;
+  }
+
+  // Check number of levels
+  ASSERT(sourceField.levels() == targetField.levels());
+  const size_t nz = sourceField.levels() > 0 ? sourceField.levels() : 1;
+  const size_t ndim = sourceField.levels() > 0 ? 2 : 1;
+
+  // Scale counts and displs for all levels
+  std::vector<int> sourceSendCounts3D(comm_.size());
+  std::vector<int> sourceSendDispls3D(comm_.size());
+  std::vector<int> targetRecvCounts3D(comm_.size());
+  std::vector<int> targetRecvDispls3D(comm_.size());
+  for (size_t jt = 0; jt < comm_.size(); ++jt) {
+    sourceSendCounts3D[jt] = sourceSendCounts_[jt]*nz;
+    sourceSendDispls3D[jt] = sourceSendDispls_[jt]*nz;
+    targetRecvCounts3D[jt] = targetRecvCounts_[jt]*nz;
+    targetRecvDispls3D[jt] = targetRecvDispls_[jt]*nz;
+  }
+
+  // Halo exchange
+  haloExchange(sourceField);
+
+  // Serialize
+  std::vector<double> sourceSendVec(sourceSendSize_*nz);
+  if (ndim == 1) {
+    const auto sourceView = array::make_view<double, 1>(sourceField);
+    for (size_t js = 0; js < sourceSendSize_; ++js) {
+      size_t sourceJnode = sourceSendMapping_[js];
+      sourceSendVec[js] = sourceView(sourceJnode);
+    }
+  } else if (ndim == 2) {    
+    const auto sourceView = array::make_view<double, 2>(sourceField);
+    for (size_t js = 0; js < sourceSendSize_; ++js) {
+      for (size_t k = 0; k < nz; ++k) {
+        size_t sourceJnode = sourceSendMapping_[js];
+        sourceSendVec[js*nz+k] = sourceView(sourceJnode, k);
+      }
+    }
+  }
+
+  // Communication
+  std::vector<double> targetRecvVec(targetRecvSize_*nz);
+  comm_.allToAllv(sourceSendVec.data(), sourceSendCounts3D.data(), sourceSendDispls3D.data(),
+                  targetRecvVec.data(), targetRecvCounts3D.data(), targetRecvDispls3D.data());
+
+  // Interpolation
+  if (ndim == 1) {
+    auto targetView = array::make_view<double, 1>(targetField);
+    targetView.assign(0.0);
+    for (size_t targetJnode = 0; targetJnode < targetSize_; ++targetJnode) {
+      for (size_t jj = 0; jj < stencilSize_[targetJnode]; ++jj) {
+        targetView(targetJnode) += weights_[targetJnode][jj]
+                                   *targetRecvVec[stencil_[targetJnode][jj]];
+      }
+    }
+  } else if (ndim == 2) {
+    auto targetView = array::make_view<double, 2>(targetField);
+    targetView.assign(0.0);
+    for (size_t targetJnode = 0; targetJnode < targetSize_; ++targetJnode) {
+      for (size_t jj = 0; jj < stencilSize_[targetJnode]; ++jj) {
+        for (size_t k = 0; k < nz; ++k) {
+          targetView(targetJnode, k) += weights_[targetJnode][jj]
+                                        *targetRecvVec[stencil_[targetJnode][jj]*nz+k];
+        }
+      }
+    }
+  }
+
+  // Set target field dirty
+  targetField.set_dirty();
+}
+
+void RegionalLinear2D::do_execute_adjoint(FieldSet& sourceFieldSet,
+                                         const FieldSet& targetFieldSet,
+                                         Metadata& metadata) const {
+  ATLAS_TRACE(
+      "atlas::interpolation::method::RegionalLinear2D::do_execute_adjoint()");
+  ATLAS_ASSERT(sourceFieldSet.size() == targetFieldSet.size());
+
+  for (auto i = 0; i < sourceFieldSet.size(); ++i) {
+    do_execute_adjoint(sourceFieldSet[i], targetFieldSet[i], metadata);
+  }
+}
+
+void RegionalLinear2D::do_execute_adjoint(Field& sourceField,
+                                         const Field& targetField,
+                                         Metadata& metadata) const {
+  ATLAS_TRACE(
+      "atlas::interpolation::method::RegionalLinear2D::do_execute_adjoint()");
+
+  if (targetField.size() == 0) {
+      return;
+  }
+
+  // Check number of levels
+  ASSERT(sourceField.levels() == targetField.levels());
+  const size_t nz = sourceField.levels() > 0 ? sourceField.levels() : 1;
+  const size_t ndim = sourceField.levels() > 0 ? 2 : 1;
+
+  // Scale counts and displs for all levels
+  std::vector<int> sourceSendCounts3D(comm_.size());
+  std::vector<int> sourceSendDispls3D(comm_.size());
+  std::vector<int> targetRecvCounts3D(comm_.size());
+  std::vector<int> targetRecvDispls3D(comm_.size());
+  for (size_t jt = 0; jt < comm_.size(); ++jt) {
+    sourceSendCounts3D[jt] = sourceSendCounts_[jt]*nz;
+    sourceSendDispls3D[jt] = sourceSendDispls_[jt]*nz;
+    targetRecvCounts3D[jt] = targetRecvCounts_[jt]*nz;
+    targetRecvDispls3D[jt] = targetRecvDispls_[jt]*nz;
+  }
+
+  // Copy destination field
+  Field targetTmpField = targetField.clone();
+
+  // Interpolation adjoint
+  std::vector<double> targetRecvVec(targetRecvSize_*nz, 0.0);
+  if (ndim == 1) {
+    const auto targetView = array::make_view<double, 1>(targetTmpField);
+    for (size_t targetJnode = 0; targetJnode < targetSize_; ++targetJnode) {
+      for (size_t jj = 0; jj < stencilSize_[targetJnode]; ++jj) {
+        targetRecvVec[stencil_[targetJnode][jj]] += weights_[targetJnode][jj]
+                                                    *targetView(targetJnode);
+      }
+    }
+  } else if (ndim == 2) {
+    const auto targetView = array::make_view<double, 2>(targetTmpField);
+    for (size_t targetJnode = 0; targetJnode < targetSize_; ++targetJnode) {
+      for (size_t jj = 0; jj < stencilSize_[targetJnode]; ++jj) {
+        for (size_t k = 0; k < nz; ++k) {
+          targetRecvVec[stencil_[targetJnode][jj]*nz+k] += weights_[targetJnode][jj]
+                                                           *targetView(targetJnode, k);
+        }
+      }
+    }
+  }
+
+  // Communication
+  std::vector<double> sourceSendVec(sourceSendSize_*nz);
+  comm_.allToAllv(targetRecvVec.data(), targetRecvCounts3D.data(), targetRecvDispls3D.data(),
+                  sourceSendVec.data(), sourceSendCounts3D.data(), sourceSendDispls3D.data());
+
+  // Deserialize
+  if (ndim == 1) {
+    auto sourceView = array::make_view<double, 1>(sourceField);
+    sourceView.assign(0.0);
+    for (size_t js = 0; js < sourceSendSize_; ++js) {
+      size_t sourceJnode = sourceSendMapping_[js];
+      sourceView(sourceJnode) += sourceSendVec[js];
+    }
+  } else if (ndim == 2) {
+    auto sourceView = array::make_view<double, 2>(sourceField);
+    sourceView.assign(0.0);
+    for (size_t js = 0; js < sourceSendSize_; ++js) {
+      size_t sourceJnode = sourceSendMapping_[js];
+      for (size_t k = 0; k < nz; ++k) {
+        sourceView(sourceJnode, k) += sourceSendVec[js*nz+k];
+      }
+    }
+  }
+
+  // Adjoint halo exchange
+  adjointHaloExchange(sourceField);
+}
+
+}  // namespace method
+}  // namespace interpolation
+}  // namespace atlas
diff --git a/src/atlas/interpolation/method/structured/RegionalLinear2D.h b/src/atlas/interpolation/method/structured/RegionalLinear2D.h
new file mode 100644
index 000000000..462f88662
--- /dev/null
+++ b/src/atlas/interpolation/method/structured/RegionalLinear2D.h
@@ -0,0 +1,77 @@
+/*
+ * (C) Copyright 2024 Meteorologisk Institutt
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ */
+
+#pragma once
+
+#include "atlas/interpolation/method/Method.h"
+
+#include <vector>
+
+#include "atlas/field.h"
+#include "atlas/functionspace.h"
+#include "atlas/grid/Grid.h"
+
+#include "eckit/config/Configuration.h"
+#include "eckit/mpi/Comm.h"
+
+namespace atlas {
+namespace interpolation {
+namespace method {
+
+
+class RegionalLinear2D : public Method {
+ public:
+  /// @brief   Regional linear interpolation
+  ///
+  /// @details 
+  ///
+  RegionalLinear2D(const Config& config) : Method(config), comm_(eckit::mpi::comm()) {}
+  ~RegionalLinear2D() override {}
+
+  void print(std::ostream&) const override;
+  const FunctionSpace& source() const override { return source_; }
+  const FunctionSpace& target() const override { return target_; }
+
+  void do_execute(const FieldSet& sourceFieldSet, FieldSet& targetFieldSet,
+                  Metadata& metadata) const override;
+  void do_execute(const Field& sourceField, Field& targetField,
+                  Metadata& metadata) const override;
+
+  void do_execute_adjoint(FieldSet& sourceFieldSet,
+                          const FieldSet& targetFieldSet,
+                          Metadata& metadata) const override;
+  void do_execute_adjoint(Field& sourceField, const Field& targetField,
+                          Metadata& metadata) const override;
+
+ private:
+  using Method::do_setup;
+  void do_setup(const FunctionSpace& source, const FunctionSpace& target) override;
+  void do_setup(const Grid& source, const Grid& target, const Cache&) override;
+
+  FunctionSpace source_{};
+  FunctionSpace target_{};
+
+  const eckit::mpi::Comm & comm_;
+  size_t sourceSize_;
+  std::vector<int> mpiTask_;
+  size_t targetSize_;
+  size_t sourceSendSize_;
+  size_t targetRecvSize_;
+  std::vector<int> sourceSendCounts_;
+  std::vector<int> sourceSendDispls_;
+  std::vector<int> targetRecvCounts_;
+  std::vector<int> targetRecvDispls_;
+  std::vector<size_t> sourceSendMapping_;
+  std::vector<std::array<size_t,4>> stencil_;
+  std::vector<std::array<double,4>> weights_;
+  std::vector<size_t> stencilSize_;
+};
+
+
+}  // namespace method
+}  // namespace interpolation
+}  // namespace atlas
diff --git a/src/atlas/interpolation/method/unstructured/ConservativeSphericalPolygonInterpolation.h b/src/atlas/interpolation/method/unstructured/ConservativeSphericalPolygonInterpolation.h
index 3dd79099b..6d4db4dba 100644
--- a/src/atlas/interpolation/method/unstructured/ConservativeSphericalPolygonInterpolation.h
+++ b/src/atlas/interpolation/method/unstructured/ConservativeSphericalPolygonInterpolation.h
@@ -110,9 +110,10 @@ class ConservativeSphericalPolygonInterpolation : public Method {
             SRCTGT_INTERSECTPLG_DIFF,    // index, 1/(unit_sphere.area) ( \sum_{scell} scell.area - \sum{tcell} tcell.area )
             REMAP_CONS,  // index, error in mass conservation
             REMAP_L2,    // index, error accuracy for given analytical function
-            REMAP_LINF   // index, like REMAP_L2 but in L_infinity norm
+            REMAP_LINF,  // index, like REMAP_L2 but in L_infinity norm
+            ERRORS_ENUM_SIZE
         };
-        std::array<double, 11> errors;
+        std::array<double, ERRORS_ENUM_SIZE> errors;
 
         double tgt_area_sum;
         double src_area_sum;
diff --git a/src/atlas/library/Library.cc b/src/atlas/library/Library.cc
index 6ed6b227a..99bebfe96 100644
--- a/src/atlas/library/Library.cc
+++ b/src/atlas/library/Library.cc
@@ -485,8 +485,8 @@ void Library::Information::print(std::ostream& out) const {
     feature_MPI = true;
 #endif
     std::string array_data_store = "Native-host";
-#if ATLAS_HAVE_CUDA
-    array_data_store = "Native-CUDA";
+#if ATLAS_HAVE_GPU
+    array_data_store = "Native-GPU";
 #endif
 #if ATLAS_HAVE_GRIDTOOLS_STORAGE
     array_data_store = "Gridtools-host";
diff --git a/src/atlas/library/config.h b/src/atlas/library/config.h
index d684c3716..82c2b2e7e 100644
--- a/src/atlas/library/config.h
+++ b/src/atlas/library/config.h
@@ -16,6 +16,7 @@
 
 #include "atlas/atlas_ecbuild_config.h"
 #include "atlas/library/defines.h"
+#include "hic/hic_config.h"
 
 #ifndef DOXYGEN_SHOULD_SKIP_THIS
 #define DOXYGEN_HIDE(X) X
@@ -24,6 +25,21 @@
 #define ATLAS_HAVE_TRACE 1
 #define ATLAS_HAVE_TRACE_BARRIERS 1
 
+#define ATLAS_HIC_COMPILER   HIC_COMPILER
+#define ATLAS_HOST_COMPILE   HIC_HOST_COMPILE
+#define ATLAS_DEVICE_COMPILE HIC_DEVICE_COMPILE
+#define ATLAS_HOST_DEVICE    HIC_HOST_DEVICE
+#define ATLAS_DEVICE         HIC_DEVICE
+#define ATLAS_HOST           HIC_HOST
+#define ATLAS_GLOBAL         HIC_GLOBAL
+
+#if HIC_BACKEND_CUDA || HIC_BACKEND_HIP
+#define ATLAS_HAVE_GPU 1
+#else
+#define ATLAS_HAVE_GPU 0
+#endif
+
+
 namespace atlas {
 
 /// @typedef gidx_t
diff --git a/src/atlas/library/defines.h.in b/src/atlas/library/defines.h.in
index cf329bfd5..de1672dcf 100644
--- a/src/atlas/library/defines.h.in
+++ b/src/atlas/library/defines.h.in
@@ -17,7 +17,7 @@
 #define ATLAS_HAVE_OMP                       @atlas_HAVE_OMP_CXX@
 #define ATLAS_OMP_TASK_SUPPORTED             @ATLAS_OMP_TASK_SUPPORTED@
 #define ATLAS_OMP_TASK_UNTIED_SUPPORTED      @ATLAS_OMP_TASK_UNTIED_SUPPORTED@
-#define ATLAS_HAVE_CUDA                      @atlas_HAVE_CUDA@
+#define ATLAS_HAVE_GPU                       @atlas_HAVE_GPU@
 #define ATLAS_HAVE_ACC                       @atlas_HAVE_ACC@
 #define ATLAS_HAVE_QHULL                     @atlas_HAVE_QHULL@
 #define ATLAS_HAVE_CGAL                      @atlas_HAVE_CGAL@
@@ -45,16 +45,6 @@
 #define ATLAS_ECKIT_DEVELOP                  @ATLAS_ECKIT_DEVELOP@
 #define ATLAS_HAVE_FUNCTIONSPACE             @atlas_HAVE_ATLAS_FUNCTIONSPACE@
 
-#ifdef __CUDACC__
-#define ATLAS_HOST_DEVICE __host__ __device__
-#define ATLAS_DEVICE __device__
-#define ATLAS_HOST __host__
-#else
-#define ATLAS_HOST_DEVICE
-#define ATLAS_DEVICE
-#define ATLAS_HOST
-#endif
-
 #define ATLAS_BITS_LOCAL @ATLAS_BITS_LOCAL@
 
 #if defined( __GNUC__ ) || defined( __clang__ )
diff --git a/src/atlas/mesh/Connectivity.h b/src/atlas/mesh/Connectivity.h
index a77eafdfe..1631384d7 100644
--- a/src/atlas/mesh/Connectivity.h
+++ b/src/atlas/mesh/Connectivity.h
@@ -183,7 +183,7 @@ class IrregularConnectivityImpl {
     /// No resizing can be performed as data is not owned.
     IrregularConnectivityImpl(idx_t values[], idx_t rows, idx_t displs[], idx_t counts[]);
 
-#ifdef __CUDACC__
+#if ATLAS_HIC_COMPILER
     /// @brief Copy ctr (only to be used when calling a cuda kernel)
     // This ctr has to be defined in the header, since __CUDACC__ will identify
     // whether
@@ -529,7 +529,7 @@ class BlockConnectivityImpl {
     /// No resizing can be performed as data is not owned.
     BlockConnectivityImpl(idx_t rows, idx_t cols, idx_t values[]);
 
-#ifdef __CUDACC__
+#if ATLAS_HIC_COMPILER
     /// @brief Copy ctr (only to be used when calling a cuda kernel)
     // This ctr has to be defined in the header, since __CUDACC__ will identify
     // whether it is compiled it for a GPU kernel
@@ -645,6 +645,7 @@ using Connectivity           = IrregularConnectivity;
 
 // -----------------------------------------------------------------------------------------------------
 
+ATLAS_HOST_DEVICE
 inline idx_t IrregularConnectivityImpl::operator()(idx_t row_idx, idx_t col_idx) const {
     assert(counts_[row_idx] > (col_idx));
     return values_[displs_[row_idx] + col_idx] FROM_FORTRAN;
@@ -662,36 +663,43 @@ inline void IrregularConnectivityImpl::set(idx_t row_idx, idx_t col_idx, const i
     values_[displs_[row_idx] + col_idx] = value TO_FORTRAN;
 }
 
+ATLAS_HOST_DEVICE
 inline IrregularConnectivityImpl::Row IrregularConnectivityImpl::row(idx_t row_idx) const {
     return IrregularConnectivityImpl::Row(const_cast<idx_t*>(values_.data()) + displs_(row_idx), counts_(row_idx));
 }
 
 // -----------------------------------------------------------------------------------------------------
 
+ATLAS_HOST_DEVICE
 inline idx_t MultiBlockConnectivityImpl::operator()(idx_t row_idx, idx_t col_idx) const {
     return IrregularConnectivityImpl::operator()(row_idx, col_idx);
 }
 
+ATLAS_HOST_DEVICE
 inline idx_t MultiBlockConnectivityImpl::operator()(idx_t block_idx, idx_t block_row_idx, idx_t block_col_idx) const {
     return block(block_idx)(block_row_idx, block_col_idx);
 }
 
 // -----------------------------------------------------------------------------------------------------
 
+ATLAS_HOST_DEVICE
 inline idx_t BlockConnectivityImpl::operator()(idx_t row_idx, idx_t col_idx) const {
     return values_[index(row_idx, col_idx)] FROM_FORTRAN;
 }
 
+ATLAS_HOST_DEVICE
 inline void BlockConnectivityImpl::set(idx_t row_idx, const idx_t column_values[]) {
     for (idx_t n = 0; n < cols_; ++n) {
         values_[index(row_idx, n)] = column_values[n] TO_FORTRAN;
     }
 }
 
+ATLAS_HOST_DEVICE
 inline void BlockConnectivityImpl::set(idx_t row_idx, idx_t col_idx, const idx_t value) {
     values_[index(row_idx, col_idx)] = value TO_FORTRAN;
 }
 
+ATLAS_HOST_DEVICE
 inline idx_t BlockConnectivityImpl::index(idx_t i, idx_t j) const {
     return i * cols_ + j;
 }
diff --git a/src/atlas/mesh/actions/BuildEdges.cc b/src/atlas/mesh/actions/BuildEdges.cc
index 08a568a17..4ab0f453a 100644
--- a/src/atlas/mesh/actions/BuildEdges.cc
+++ b/src/atlas/mesh/actions/BuildEdges.cc
@@ -93,7 +93,7 @@ void build_element_to_edge_connectivity(Mesh& mesh) {
         UniqueLonLat compute_uid(mesh);
 
         for (idx_t jedge = 0; jedge < nb_edges; ++jedge) {
-            edge_sort.emplace_back(Sort(compute_uid(edge_node_connectivity.row(jedge)), jedge));
+            edge_sort.emplace_back(compute_uid(edge_node_connectivity.row(jedge)), jedge);
         }
 
         std::sort(edge_sort.data(), edge_sort.data() + nb_edges);
diff --git a/src/atlas/mesh/actions/BuildParallelFields.cc b/src/atlas/mesh/actions/BuildParallelFields.cc
index 17f617d18..bd403a29b 100644
--- a/src/atlas/mesh/actions/BuildParallelFields.cc
+++ b/src/atlas/mesh/actions/BuildParallelFields.cc
@@ -984,7 +984,7 @@ Field& build_edges_global_idx(Mesh& mesh) {
     std::vector<Node> edge_sort;
     edge_sort.reserve(glb_nb_edges);
     for (idx_t jedge = 0; jedge < glb_edge_id.shape(0); ++jedge) {
-        edge_sort.emplace_back(Node(glb_edge_id(jedge), jedge));
+        edge_sort.emplace_back(glb_edge_id(jedge), jedge);
     }
     std::sort(edge_sort.begin(), edge_sort.end());
 
diff --git a/src/atlas/parallel/HaloExchange.h b/src/atlas/parallel/HaloExchange.h
index 0d6b0e229..2c4931007 100644
--- a/src/atlas/parallel/HaloExchange.h
+++ b/src/atlas/parallel/HaloExchange.h
@@ -33,8 +33,8 @@
 #include "atlas/runtime/Exception.h"
 #include "atlas/util/Object.h"
 
-#if ATLAS_HAVE_CUDA
-#include "atlas/parallel/HaloExchangeCUDA.h"
+#if ATLAS_HAVE_GPU
+#include "atlas/parallel/HaloExchangeGPU.h"
 #endif
 
 namespace atlas {
@@ -90,7 +90,7 @@ class HaloExchange : public util::Object {
     DATA_TYPE* allocate_buffer(const int buffer_size, const bool on_device) const;
 
     template <typename DATA_TYPE>
-    void deallocate_buffer(DATA_TYPE* buffer, const bool on_device) const;
+    void deallocate_buffer(DATA_TYPE* buffer, const int buffer_size, const bool on_device) const;
 
     template <int ParallelDim, typename DATA_TYPE, int RANK>
     void pack_send_buffer(const array::ArrayView<DATA_TYPE, RANK>& hfield,
@@ -174,6 +174,14 @@ void HaloExchange::execute(array::Array& field, bool on_device) const {
     DATA_TYPE* inner_buffer = allocate_buffer<DATA_TYPE>(inner_size, on_device);
     DATA_TYPE* halo_buffer  = allocate_buffer<DATA_TYPE>(halo_size, on_device);
 
+#if ATLAS_HAVE_GPU
+    if (on_device) {
+        ATLAS_ASSERT( is_device_accessible(inner_buffer) );
+        ATLAS_ASSERT( is_device_accessible(halo_buffer) );
+        ATLAS_ASSERT( is_device_accessible(field_dv.data()) );
+    }
+#endif
+
     counts_displs_setup<DATA_TYPE>(var_size, inner_counts_init, halo_counts_init, inner_counts, halo_counts,
                                    inner_displs, halo_displs);
 
@@ -190,8 +198,8 @@ void HaloExchange::execute(array::Array& field, bool on_device) const {
 
     wait_for_send(inner_counts_init, inner_req);
 
-    deallocate_buffer<DATA_TYPE>(inner_buffer, on_device);
-    deallocate_buffer<DATA_TYPE>(halo_buffer, on_device);
+    deallocate_buffer<DATA_TYPE>(inner_buffer, inner_size, on_device);
+    deallocate_buffer<DATA_TYPE>(halo_buffer, halo_size, on_device);
 }
 
 template <typename DATA_TYPE, int RANK, typename ParallelDim>
@@ -241,8 +249,8 @@ void HaloExchange::execute_adjoint(array::Array& field, bool on_device) const {
 
     zero_halos<parallelDim>(field_hv, field_dv, halo_buffer, halo_size, on_device);
 
-    deallocate_buffer<DATA_TYPE>(halo_buffer, on_device);
-    deallocate_buffer<DATA_TYPE>(inner_buffer, on_device);
+    deallocate_buffer<DATA_TYPE>(halo_buffer, halo_size, on_device);
+    deallocate_buffer<DATA_TYPE>(inner_buffer, inner_size, on_device);
 }
 
 template <typename DATA_TYPE>
@@ -261,12 +269,12 @@ DATA_TYPE* HaloExchange::allocate_buffer(const int buffer_size, const bool on_de
 
 
 template <typename DATA_TYPE>
-void HaloExchange::deallocate_buffer(DATA_TYPE* buffer, const bool on_device) const {
+void HaloExchange::deallocate_buffer(DATA_TYPE* buffer, const int buffer_size, const bool on_device) const {
     if (on_device) {
-        util::delete_devicemem(buffer);
+        util::delete_devicemem(buffer, buffer_size);
     }
     else {
-        util::delete_hostmem(buffer);
+        util::delete_hostmem(buffer, buffer_size);
     }
 }
 
@@ -294,7 +302,7 @@ void HaloExchange::ireceive(int tag, std::vector<int>& recv_displs, std::vector<
         for (size_t jproc = 0; jproc < static_cast<size_t>(nproc); ++jproc) {
             if (recv_counts[jproc] > 0) {
                 recv_req[jproc] =
-                    comm().iReceive(&recv_buffer[recv_displs[jproc]], recv_counts[jproc], jproc, tag);
+                    comm().iReceive(recv_buffer+recv_displs[jproc], recv_counts[jproc], jproc, tag);
             }
         }
     }
@@ -309,7 +317,7 @@ void HaloExchange::isend_and_wait_for_receive(int tag, std::vector<int>& recv_co
     ATLAS_TRACE_MPI(ISEND) {
         for (size_t jproc = 0; jproc < static_cast<size_t>(nproc); ++jproc) {
             if (send_counts[jproc] > 0) {
-                send_req[jproc] = comm().iSend(&send_buffer[send_displs[jproc]], send_counts[jproc], jproc, tag);
+                send_req[jproc] = comm().iSend(send_buffer+send_displs[jproc], send_counts[jproc], jproc, tag);
             }
         }
     }
@@ -380,7 +388,7 @@ void HaloExchange::zero_halos(ATLAS_MAYBE_UNUSED const array::ArrayView<DATA_TYP
                               array::ArrayView<DATA_TYPE, RANK>& dfield, DATA_TYPE* recv_buffer, int recv_size,
                               ATLAS_MAYBE_UNUSED const bool on_device) const {
     ATLAS_TRACE();
-#if ATLAS_HAVE_CUDA
+#if ATLAS_HAVE_GPU
     if (on_device) {
         ATLAS_NOTIMPLEMENTED;
     }
@@ -394,9 +402,9 @@ void HaloExchange::pack_send_buffer(ATLAS_MAYBE_UNUSED const array::ArrayView<DA
                                     const array::ArrayView<DATA_TYPE, RANK>& dfield, DATA_TYPE* send_buffer,
                                     int send_size, ATLAS_MAYBE_UNUSED const bool on_device) const {
     ATLAS_TRACE();
-#if ATLAS_HAVE_CUDA
+#if ATLAS_HAVE_GPU
     if (on_device) {
-        halo_packer_cuda<ParallelDim, DATA_TYPE, RANK>::pack(sendcnt_, sendmap_, hfield, dfield, send_buffer,
+        halo_packer_hic<ParallelDim, DATA_TYPE, RANK>::pack(sendcnt_, sendmap_, hfield, dfield, send_buffer,
                                                              send_size);
     }
     else
@@ -410,9 +418,9 @@ void HaloExchange::unpack_recv_buffer(const DATA_TYPE* recv_buffer, int recv_siz
                                       array::ArrayView<DATA_TYPE, RANK>& dfield,
                                       ATLAS_MAYBE_UNUSED const bool on_device) const {
     ATLAS_TRACE();
-#if ATLAS_HAVE_CUDA
+#if ATLAS_HAVE_GPU
     if (on_device) {
-        halo_packer_cuda<ParallelDim, DATA_TYPE, RANK>::unpack(recvcnt_, recvmap_, recv_buffer, recv_size, hfield,
+        halo_packer_hic<ParallelDim, DATA_TYPE, RANK>::unpack(recvcnt_, recvmap_, recv_buffer, recv_size, hfield,
                                                                dfield);
     }
     else
@@ -425,9 +433,9 @@ void HaloExchange::pack_recv_adjoint_buffer(ATLAS_MAYBE_UNUSED const array::Arra
                                             const array::ArrayView<DATA_TYPE, RANK>& dfield, DATA_TYPE* recv_buffer,
                                             int recv_size, ATLAS_MAYBE_UNUSED const bool on_device) const {
     ATLAS_TRACE();
-#if ATLAS_HAVE_CUDA
+#if ATLAS_HAVE_GPU
     if (on_device) {
-        halo_packer_cuda<ParallelDim, DATA_TYPE, RANK>::pack(recvcnt_, recvmap_, hfield, dfield, recv_buffer,
+        halo_packer_hic<ParallelDim, DATA_TYPE, RANK>::pack(recvcnt_, recvmap_, hfield, dfield, recv_buffer,
                                                              recv_size);
     }
     else
@@ -441,9 +449,9 @@ void HaloExchange::unpack_send_adjoint_buffer(const DATA_TYPE* send_buffer, int
                                               array::ArrayView<DATA_TYPE, RANK>& dfield,
                                               ATLAS_MAYBE_UNUSED const bool on_device) const {
     ATLAS_TRACE();
-#if ATLAS_HAVE_CUDA
+#if ATLAS_HAVE_GPU
     if (on_device) {
-        halo_packer_cuda<ParallelDim, DATA_TYPE, RANK>::unpack(sendcnt_, sendmap_, send_buffer, send_size, hfield,
+        halo_packer_hic<ParallelDim, DATA_TYPE, RANK>::unpack(sendcnt_, sendmap_, send_buffer, send_size, hfield,
                                                                dfield);
     }
     else
diff --git a/src/atlas/parallel/HaloExchangeCUDA.h b/src/atlas/parallel/HaloExchangeGPU.h
similarity index 94%
rename from src/atlas/parallel/HaloExchangeCUDA.h
rename to src/atlas/parallel/HaloExchangeGPU.h
index ada9641e6..f3b9a46c5 100644
--- a/src/atlas/parallel/HaloExchangeCUDA.h
+++ b/src/atlas/parallel/HaloExchangeGPU.h
@@ -18,7 +18,7 @@ namespace atlas {
 namespace parallel {
 
 template <int ParallelDim, typename DATA_TYPE, int RANK>
-struct halo_packer_cuda {
+struct halo_packer_hic {
     static void pack(const int sendcnt, array::SVector<int> const& sendmap,
                      const array::ArrayView<DATA_TYPE, RANK>& hfield, const array::ArrayView<DATA_TYPE, RANK>& dfield,
                      DATA_TYPE* send_buffer, int send_buffer_size);
@@ -28,5 +28,8 @@ struct halo_packer_cuda {
                        array::ArrayView<DATA_TYPE, RANK>& dfield);
 };
 
+bool is_device_accessible(const void* data);
+
+
 }  // namespace parallel
 }  // namespace atlas
diff --git a/src/atlas/parallel/HaloExchangeCUDA.cu b/src/atlas/parallel/HaloExchangeGPU.hic
similarity index 80%
rename from src/atlas/parallel/HaloExchangeCUDA.cu
rename to src/atlas/parallel/HaloExchangeGPU.hic
index a9242515a..c96893d1c 100644
--- a/src/atlas/parallel/HaloExchangeCUDA.cu
+++ b/src/atlas/parallel/HaloExchangeGPU.hic
@@ -9,14 +9,26 @@
  */
 
 
-#include "atlas/parallel/HaloExchangeCUDA.h"
+#include "atlas/parallel/HaloExchangeGPU.h"
 #include "atlas/parallel/HaloExchangeImpl.h"
 #include "atlas/array/SVector.h"
 #include "atlas/runtime/Exception.h"
 
+#include "hic/hic.h"
+
 namespace atlas {
 namespace parallel {
 
+bool is_device_accessible(const void* ptr) {
+    hicPointerAttributes attr;
+    hicError_t code = hicPointerGetAttributes(&attr, ptr);
+    if( code != hicSuccess ) {
+      static_cast<void>(hicGetLastError());
+      return false;
+    }
+    return ptr == attr.devicePointer;
+}
+
 template<int ParallelDim, int RANK>
 struct get_buffer_index{
     template<typename DATA_TYPE>
@@ -120,7 +132,7 @@ struct get_first_non_parallel_dim<ParallelDim, RANK, RANK>
 };
 
 template<int ParallelDim, int RANK>
-struct get_n_cuda_blocks
+struct get_n_hic_blocks
 {
   template<typename DATA_TYPE>
   static unsigned int apply(const array::ArrayView<DATA_TYPE, RANK>& hfield, const unsigned int block_size_y) {
@@ -130,7 +142,7 @@ struct get_n_cuda_blocks
 };
 
 template<>
-struct get_n_cuda_blocks<0, 1> {
+struct get_n_hic_blocks<0, 1> {
     template<typename DATA_TYPE>
     static unsigned int apply(const array::ArrayView<DATA_TYPE, 1>& hfield, const unsigned int block_size_y) {
         return 1;
@@ -138,70 +150,71 @@ struct get_n_cuda_blocks<0, 1> {
 };
 
 template<int ParallelDim, typename DATA_TYPE, int RANK>
-void halo_packer_cuda<ParallelDim, DATA_TYPE, RANK>::pack( const int sendcnt, array::SVector<int> const & sendmap,
+void halo_packer_hic<ParallelDim, DATA_TYPE, RANK>::pack( const int sendcnt, array::SVector<int> const & sendmap,
                    const array::ArrayView<DATA_TYPE, RANK>& hfield, const array::ArrayView<DATA_TYPE, RANK>& dfield,
                    DATA_TYPE* send_buffer, int send_buffer_size )
 {
   const unsigned int block_size_x = 32;
   const unsigned int block_size_y = (RANK==1) ? 1 : 4;
 
-  unsigned int nblocks_y = get_n_cuda_blocks<ParallelDim, RANK>::apply(hfield, block_size_y);
+  const unsigned int nblocks_x = (sendcnt+block_size_x-1)/block_size_x;
+  const unsigned int nblocks_y = get_n_hic_blocks<ParallelDim, RANK>::apply(hfield, block_size_y);
 
   dim3 threads(block_size_x, block_size_y);
-  dim3 blocks((sendcnt+block_size_x-1)/block_size_x, nblocks_y);
-  cudaDeviceSynchronize();
-  cudaError_t err = cudaGetLastError();
-  if (err != cudaSuccess) {
-    std::string msg = std::string("Error synchronizing device")+ cudaGetErrorString(err);
+  dim3 blocks(nblocks_x, nblocks_y);
+  HIC_CALL(hicDeviceSynchronize());
+  hicError_t err = hicGetLastError();
+  if (err != hicSuccess) {
+    std::string msg = std::string("Error synchronizing device")+ hicGetErrorString(err);
     throw_Exception(msg);
   }
 
   pack_kernel<ParallelDim, DATA_TYPE, RANK><<<blocks,threads>>>(sendcnt, sendmap.data(), sendmap.size(), dfield, send_buffer, send_buffer_size);
-  err = cudaGetLastError();
-  if (err != cudaSuccess)
+  err = hicGetLastError();
+  if (err != hicSuccess)
     throw_Exception("Error launching GPU packing kernel");
 
-  cudaDeviceSynchronize();
-  err = cudaGetLastError();
-  if (err != cudaSuccess) {
-    std::string msg = std::string("Error synchronizing device")+ cudaGetErrorString(err);
+  HIC_CALL(hicDeviceSynchronize());
+  err = hicGetLastError();
+  if (err != hicSuccess) {
+    std::string msg = std::string("Error synchronizing device")+ hicGetErrorString(err);
     throw_Exception(msg);
   }
 
 }
 
 template<int ParallelDim, typename DATA_TYPE, int RANK>
-void halo_packer_cuda<ParallelDim, DATA_TYPE, RANK>::unpack(const int recvcnt, array::SVector<int> const & recvmap,
+void halo_packer_hic<ParallelDim, DATA_TYPE, RANK>::unpack(const int recvcnt, array::SVector<int> const & recvmap,
                    const DATA_TYPE* recv_buffer , int recv_buffer_size,
                    array::ArrayView<DATA_TYPE, RANK> &hfield, array::ArrayView<DATA_TYPE, RANK> &dfield)
 {
   const unsigned int block_size_x = 32;
   const unsigned int block_size_y = (RANK==1) ? 1 : 4;
 
-  unsigned int nblocks_y = get_n_cuda_blocks<ParallelDim, RANK>::apply(hfield, block_size_y);
+  unsigned int nblocks_y = get_n_hic_blocks<ParallelDim, RANK>::apply(hfield, block_size_y);
 
   dim3 threads(block_size_x, block_size_y);
   dim3 blocks((recvcnt+block_size_x-1)/block_size_x, nblocks_y);
 
-  cudaDeviceSynchronize();
-  cudaError_t err = cudaGetLastError();
-  if (err != cudaSuccess) {
-    std::string msg = std::string("Error synchronizing device")+ cudaGetErrorString(err);
+  HIC_CALL(hicDeviceSynchronize());
+  hicError_t err = hicGetLastError();
+  if (err != hicSuccess) {
+    std::string msg = std::string("Error synchronizing device")+ hicGetErrorString(err);
     throw_Exception(msg);
   }
 
   unpack_kernel<ParallelDim, DATA_TYPE, RANK><<<blocks,threads>>>(recvcnt, recvmap.data(), recvmap.size(), recv_buffer, recv_buffer_size, dfield);
 
-  err = cudaGetLastError();
-  if (err != cudaSuccess) {
-    std::string msg = std::string("Error launching GPU packing kernel")+ cudaGetErrorString(err);
+  err = hicGetLastError();
+  if (err != hicSuccess) {
+    std::string msg = std::string("Error launching GPU packing kernel")+ hicGetErrorString(err);
     throw_Exception(msg);
   }
 
-  cudaDeviceSynchronize();
-  err = cudaGetLastError();
-  if (err != cudaSuccess) {
-    std::string msg = std::string("Error synchronizing device")+ cudaGetErrorString(err);
+  HIC_CALL(hicDeviceSynchronize());
+  err = hicGetLastError();
+  if (err != hicSuccess) {
+    std::string msg = std::string("Error synchronizing device")+ hicGetErrorString(err);
     throw_Exception(msg);
   }
 }
@@ -220,11 +233,11 @@ void halo_packer_cuda<ParallelDim, DATA_TYPE, RANK>::unpack(const int recvcnt, a
 
 
 #define EXPLICIT_TEMPLATE_INSTANTIATION( ParallelDim, RANK )         \
-  template class halo_packer_cuda<ParallelDim, int,           RANK>; \
-  template class halo_packer_cuda<ParallelDim, long,          RANK>; \
-  template class halo_packer_cuda<ParallelDim, long unsigned, RANK>; \
-  template class halo_packer_cuda<ParallelDim, float,         RANK>; \
-  template class halo_packer_cuda<ParallelDim, double,        RANK>;
+  template class halo_packer_hic<ParallelDim, int,           RANK>; \
+  template class halo_packer_hic<ParallelDim, long,          RANK>; \
+  template class halo_packer_hic<ParallelDim, long unsigned, RANK>; \
+  template class halo_packer_hic<ParallelDim, float,         RANK>; \
+  template class halo_packer_hic<ParallelDim, double,        RANK>;
 
 #define EXPLICIT_TEMPLATE_INSTANTIATION_RANK(RANK) \
   ATLAS_REPEAT_MACRO(RANK, EXPLICIT_TEMPLATE_INSTANTIATION, RANK)
diff --git a/src/atlas/parallel/acc/acc.cc b/src/atlas/parallel/acc/acc.cc
new file mode 100644
index 000000000..8a600aea5
--- /dev/null
+++ b/src/atlas/parallel/acc/acc.cc
@@ -0,0 +1,65 @@
+/*
+ * (C) Copyright 2024- ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#include "acc.h"
+
+#include "atlas/library/defines.h"
+
+#if ATLAS_HAVE_ACC
+#include "atlas_acc_support/atlas_acc_map_data.h"
+#endif
+
+namespace atlas::acc {
+
+int devices() {
+#if ATLAS_HAVE_ACC
+    static int num_devices = [](){
+        auto devicetype = atlas_acc_get_device_type();
+        int _num_devices = atlas_acc_get_num_devices();
+        if (_num_devices == 1 && devicetype == atlas_acc_device_host) {
+          --_num_devices;
+	}
+	return _num_devices;
+    }();
+    return num_devices;
+#else
+    return 0;
+#endif
+}
+
+void map(void* host_data, void* device_data, std::size_t bytes) {
+#if ATLAS_HAVE_ACC
+    atlas_acc_map_data(host_data, device_data, bytes);
+#endif
+}
+void unmap(void* host_data) {
+#if ATLAS_HAVE_ACC
+    atlas_acc_unmap_data(host_data);
+#endif
+}
+
+bool is_present(void* host_data, std::size_t bytes) {
+#if ATLAS_HAVE_ACC
+    return atlas_acc_is_present(host_data, bytes);
+#else
+    return false;
+#endif
+}
+
+void* deviceptr(void* host_data) {
+#if ATLAS_HAVE_ACC
+    return atlas_acc_deviceptr(host_data);
+#else
+    return nullptr;
+#endif
+}
+
+}
+
diff --git a/src/atlas/array/gridtools/GridToolsArrayView.cu b/src/atlas/parallel/acc/acc.h
similarity index 55%
rename from src/atlas/array/gridtools/GridToolsArrayView.cu
rename to src/atlas/parallel/acc/acc.h
index fe607a93b..3a10f1e25 100644
--- a/src/atlas/array/gridtools/GridToolsArrayView.cu
+++ b/src/atlas/parallel/acc/acc.h
@@ -1,5 +1,5 @@
 /*
- * (C) Copyright 2013 ECMWF.
+ * (C) Copyright 2024- ECMWF.
  *
  * This software is licensed under the terms of the Apache Licence Version 2.0
  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
@@ -8,4 +8,16 @@
  * nor does it submit to any jurisdiction.
  */
 
-#include "GridToolsArrayView.cc"
+#pragma once
+#include <cstdlib>
+
+namespace atlas::acc {
+
+int devices();
+void map(void* host_data, void* device_data, std::size_t bytes);
+void unmap(void* host_data);
+bool is_present(void* host_data, std::size_t bytes);
+void* deviceptr(void* host_data);
+
+}
+
diff --git a/src/atlas/runtime/trace/Timings.cc b/src/atlas/runtime/trace/Timings.cc
index a477969d6..cbc847213 100644
--- a/src/atlas/runtime/trace/Timings.cc
+++ b/src/atlas/runtime/trace/Timings.cc
@@ -274,8 +274,8 @@ void TimingsRegistry::report(std::ostream& out, const eckit::Configuration& conf
         }
     }
 
-    size_t max_title_length(0);
-    size_t max_location_length(0);
+    size_t max_title_length(2);
+    size_t max_location_length(9);
     size_t max_nest(0);
     long max_count(0);
     double max_seconds(0);
diff --git a/src/atlas/util/Allocate.cc b/src/atlas/util/Allocate.cc
index b93446abb..58f2648d6 100644
--- a/src/atlas/util/Allocate.cc
+++ b/src/atlas/util/Allocate.cc
@@ -16,9 +16,7 @@
 #include "atlas/library/config.h"
 #include "atlas/runtime/Exception.h"
 
-#if ATLAS_HAVE_CUDA
-#include <cuda_runtime.h>
-#endif
+#include "hic/hic.h"
 
 namespace atlas {
 namespace util {
@@ -27,50 +25,66 @@ namespace util {
 namespace detail {
 //------------------------------------------------------------------------------
 
-void allocate_cudamanaged(void** ptr, size_t size) {
-#if ATLAS_HAVE_CUDA
-    cudaError_t err = cudaMallocManaged(ptr, size);
-    if (err != cudaSuccess)
-        throw_AssertionFailed("failed to allocate GPU memory", Here());
-#else
-    *ptr = malloc(size);
-#endif
-}
-
-void deallocate_cudamanaged(void* ptr) {
-#if ATLAS_HAVE_CUDA
-    cudaError_t err = cudaDeviceSynchronize();
-    if (err != cudaSuccess)
-        throw_AssertionFailed("failed to synchronize memory", Here());
-
-    err = cudaFree(ptr);
-    // The following throws an invalid device memory
-    if (err != cudaSuccess)
-        throw_AssertionFailed("failed to free GPU memory", Here());
-#else
-    free(ptr);
-#endif
+static int devices() {
+    static int devices_ = [](){
+        int n = 0;
+        auto err = hicGetDeviceCount(&n);
+        if (err != hicSuccess) {
+            n = 0;
+            static_cast<void>(hicGetLastError());
+        }
+        return n;
+    }();
+    return devices_;
+}
+
+void allocate_managed(void** ptr, size_t bytes) {
+    if constexpr (not ATLAS_HAVE_GPU) {
+        return allocate_host(ptr, bytes);
+    }
+    if (devices() == 0) {
+        return allocate_host(ptr, bytes);
+    }
+    HIC_CALL(hicMallocManaged(ptr, bytes));
+}
+
+void deallocate_managed(void* ptr, size_t bytes) {
+    if constexpr (not ATLAS_HAVE_GPU) {
+        return deallocate_host(ptr, bytes);
+    }
+    if (devices() == 0) {
+        return deallocate_host(ptr, bytes);
+    }
+    HIC_CALL(hicDeviceSynchronize());
+    HIC_CALL(hicFree(ptr));
 }
 
-void allocate_cuda(void** ptr, size_t size) {
-#if ATLAS_HAVE_CUDA
-    cudaError_t err = cudaMalloc(ptr, size);
-    if (err != cudaSuccess)
-        throw_AssertionFailed("failed to allocate GPU memory", Here());
-#else
-    *ptr = malloc(size);
-#endif
+void allocate_device(void** ptr, size_t bytes) {
+    if constexpr (not ATLAS_HAVE_GPU) {
+        return allocate_host(ptr, bytes);
+    }
+    if (devices() == 0) {
+        return allocate_host(ptr, bytes);
+    }
+    HIC_CALL(hicMalloc(ptr, bytes));
 }
 
-void deallocate_cuda(void* ptr) {
-    deallocate_cudamanaged(ptr);
+void deallocate_device(void* ptr, size_t bytes) {
+    if constexpr (not ATLAS_HAVE_GPU) {
+        return deallocate_host(ptr, bytes);
+    }
+    if (devices() == 0) {
+        return deallocate_host(ptr, bytes);
+    }
+    HIC_CALL(hicDeviceSynchronize());
+    HIC_CALL(hicFree(ptr));
 }
 
-void allocate_host(void** ptr, size_t size) {
-    *ptr = malloc(size);
+void allocate_host(void** ptr, size_t bytes) {
+    *ptr = malloc(bytes);
 }
 
-void deallocate_host(void* ptr) {
+void deallocate_host(void* ptr, size_t /*bytes*/) {
     free(ptr);
 }
 
@@ -91,8 +105,17 @@ void atlas__allocate_managedmem_int(int*& a, size_t N) {
 void atlas__allocate_managedmem_long(long*& a, size_t N) {
     allocate_managedmem(a, N);
 }
-void atlas__deallocate_managedmem(void*& a) {
-    delete_managedmem(a);
+void atlas__deallocate_managedmem_double(double*& a, size_t N) {
+    delete_managedmem(a, N);
+}
+void atlas__deallocate_managedmem_float(float*& a, size_t N) {
+    delete_managedmem(a, N);
+}
+void atlas__deallocate_managedmem_int(int*& a, size_t N) {
+    delete_managedmem(a, N);
+}
+void atlas__deallocate_managedmem_long(long*& a, size_t N) {
+    delete_managedmem(a, N);
 }
 }
 
diff --git a/src/atlas/util/Allocate.h b/src/atlas/util/Allocate.h
index cac37e40a..b8fac7179 100644
--- a/src/atlas/util/Allocate.h
+++ b/src/atlas/util/Allocate.h
@@ -18,28 +18,28 @@ namespace util {
 //------------------------------------------------------------------------------
 
 namespace detail {
-void allocate_cudamanaged(void** ptr, size_t size);
-void deallocate_cudamanaged(void* ptr);
+void allocate_managed(void** ptr, size_t bytes);
+void deallocate_managed(void* ptr, size_t bytes);
 
-void allocate_cuda(void** ptr, size_t size);
-void deallocate_cuda(void* ptr);
+void allocate_device(void** ptr, size_t bytes);
+void deallocate_device(void* ptr, size_t bytes);
 
-void allocate_host(void** ptr, size_t size);
-void deallocate_host(void* ptr);
+void allocate_host(void** ptr, size_t bytes);
+void deallocate_host(void* ptr, size_t bytes);
 
 }  // namespace detail
 
 template <typename T>
 void allocate_managedmem(T*& data, size_t N) {
     if (N != 0) {
-        detail::allocate_cudamanaged(reinterpret_cast<void**>(&data), N * sizeof(T));
+        detail::allocate_managed(reinterpret_cast<void**>(&data), N * sizeof(T));
     }
 }
 
 template <typename T>
-void delete_managedmem(T*& data) {
+void delete_managedmem(T*& data, size_t N) {
     if (data) {
-        detail::deallocate_cudamanaged(data);
+        detail::deallocate_managed(data, N * sizeof(T));
         data = nullptr;
     }
 }
@@ -47,14 +47,14 @@ void delete_managedmem(T*& data) {
 template <typename T>
 void allocate_devicemem(T*& data, size_t N) {
     if (N != 0) {
-        detail::allocate_cuda(reinterpret_cast<void**>(&data), N * sizeof(T));
+        detail::allocate_device(reinterpret_cast<void**>(&data), N * sizeof(T));
     }
 }
 
 template <typename T>
-void delete_devicemem(T*& data) {
+void delete_devicemem(T*& data, size_t N) {
     if (data) {
-        detail::deallocate_cuda(data);
+        detail::deallocate_device(data, N * sizeof(T));
         data = nullptr;
     }
 }
@@ -67,9 +67,9 @@ void allocate_hostmem(T*& data, size_t N) {
 }
 
 template <typename T>
-void delete_hostmem(T*& data) {
+void delete_hostmem(T*& data, size_t N) {
     if (data) {
-        detail::deallocate_host(data);
+        detail::deallocate_host(data, N * sizeof(T));
         data = nullptr;
     }
 }
@@ -82,7 +82,10 @@ void atlas__allocate_managedmem_double(double*& a, size_t N);
 void atlas__allocate_managedmem_float(float*& a, size_t N);
 void atlas__allocate_managedmem_int(int*& a, size_t N);
 void atlas__allocate_managedmem_long(long*& a, size_t N);
-void atlas__deallocate_managedmem(void*& a);
+void atlas__deallocate_managedmem_double(double*& a, size_t N);
+void atlas__deallocate_managedmem_float(float*& a, size_t N);
+void atlas__deallocate_managedmem_int(int*& a, size_t N);
+void atlas__deallocate_managedmem_long(long*& a, size_t N);
 }
 
 //------------------------------------------------------------------------------
diff --git a/src/atlas/util/GPUClonable.h b/src/atlas/util/GPUClonable.h
index fcee74129..704a94429 100644
--- a/src/atlas/util/GPUClonable.h
+++ b/src/atlas/util/GPUClonable.h
@@ -12,9 +12,7 @@
 
 #include "atlas/library/config.h"
 
-#if ATLAS_HAVE_CUDA
-#include <cuda_runtime.h>
-#endif
+#include "hic/hic.h"
 
 namespace atlas {
 namespace util {
@@ -22,30 +20,28 @@ namespace util {
 template <typename Base>
 struct GPUClonable {
     GPUClonable(Base* base_ptr): base_ptr_(base_ptr), gpu_object_ptr_(nullptr) {
-#if ATLAS_HAVE_CUDA
-        cudaMalloc(&gpu_object_ptr_, sizeof(Base));
-#endif
+        if constexpr (ATLAS_HAVE_GPU) {
+            HIC_CALL(hicMalloc(&gpu_object_ptr_, sizeof(Base)));
+        }
     }
 
     ~GPUClonable() {
-        if (gpu_object_ptr_) {
-#if ATLAS_HAVE_CUDA
-            cudaFree(gpu_object_ptr_);
-#endif
+        if (gpu_object_ptr_ != nullptr) {
+            HIC_CALL(hicFree(gpu_object_ptr_));
         }
     }
 
     Base* gpu_object_ptr() { return static_cast<Base*>(gpu_object_ptr_); }
 
     void updateDevice() {
-#if ATLAS_HAVE_CUDA
-        cudaMemcpy(gpu_object_ptr_, base_ptr_, sizeof(Base), cudaMemcpyHostToDevice);
-#endif
+        if constexpr (ATLAS_HAVE_GPU) {
+            HIC_CALL(hicMemcpy(gpu_object_ptr_, base_ptr_, sizeof(Base), hicMemcpyHostToDevice));
+        }
     }
     void updateHost() {
-#if ATLAS_HAVE_CUDA
-        cudaMemcpy(base_ptr_, gpu_object_ptr_, sizeof(Base), cudaMemcpyDeviceToHost);
-#endif
+        if constexpr (ATLAS_HAVE_GPU) {
+            HIC_CALL(hicMemcpy(base_ptr_, gpu_object_ptr_, sizeof(Base), hicMemcpyDeviceToHost));
+        }
     }
 
     Base* base_ptr_;
diff --git a/src/atlas/util/PackVectorFields.cc b/src/atlas/util/PackVectorFields.cc
new file mode 100644
index 000000000..640a1d46b
--- /dev/null
+++ b/src/atlas/util/PackVectorFields.cc
@@ -0,0 +1,229 @@
+/*
+ * (C) Crown Copyright 2024 Met Office
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ */
+
+#include "atlas/util/PackVectorFields.h"
+
+#include <map>
+#include <string>
+#include <type_traits>
+#include <vector>
+
+#include "atlas/array.h"
+#include "atlas/array/helpers/ArrayForEach.h"
+#include "atlas/functionspace.h"
+#include "atlas/option.h"
+#include "atlas/runtime/Exception.h"
+#include "atlas/util/Config.h"
+#include "eckit/config/LocalConfiguration.h"
+
+namespace atlas {
+namespace util {
+
+namespace {
+
+using eckit::LocalConfiguration;
+
+using array::DataType;
+using array::helpers::arrayForEachDim;
+
+void addOrReplaceField(FieldSet& fieldSet, const Field& field) {
+  const auto fieldName = field.name();
+  if (fieldSet.has(fieldName)) {
+    fieldSet[fieldName] = field;
+  } else {
+    fieldSet.add(field);
+  }
+}
+
+Field& getOrCreateField(FieldSet& fieldSet, const FunctionSpace& functionSpace,
+                        const Config& config) {
+  const auto fieldName = config.getString("name");
+  if (!fieldSet.has(fieldName)) {
+    fieldSet.add(functionSpace.createField(config));
+  }
+  return fieldSet[fieldName];
+}
+
+void checkFieldCompatibility(const Field& componentField,
+                             const Field& vectorField) {
+  ATLAS_ASSERT(componentField.functionspace().size() ==
+               vectorField.functionspace().size());
+  ATLAS_ASSERT(componentField.levels() == vectorField.levels());
+  ATLAS_ASSERT(componentField.variables() == 0);
+  ATLAS_ASSERT(vectorField.variables() > 0);
+
+  const auto checkStandardShape = [](const Field& field) {
+    // Check for "standard" Atlas field shape.
+    auto dim = 0;
+    const auto rank = field.rank();
+    const auto shape = field.shape();
+    if (field.functionspace().size() != shape[dim++]) {
+      return false;
+    }
+    if (const auto levels = field.levels();
+        levels && (dim >= rank || levels != shape[dim++])) {
+      return false;
+    }
+    if (const auto variables = field.variables();
+        variables && (dim >= rank || variables != shape[dim++])) {
+      return false;
+    }
+    if (dim != rank) {
+      return false;
+    }
+    return true;
+  };
+
+  ATLAS_ASSERT(checkStandardShape(componentField));
+  ATLAS_ASSERT(checkStandardShape(vectorField));
+}
+
+template <typename ComponentField, typename VectorField, typename Functor>
+void copyFieldData(ComponentField& componentField, VectorField& vectorField,
+                const Functor& copier) {
+  checkFieldCompatibility(componentField, vectorField);
+
+  const auto copyArrayData = [&](auto value, auto rank) {
+    // Resolve value-type and rank from arguments.
+    using Value = decltype(value);
+    constexpr auto Rank = decltype(rank)::value;
+
+    // Iterate over fields.
+    auto vectorView = array::make_view<Value, Rank>(vectorField);
+    auto componentView = array::make_view<Value, Rank - 1>(componentField);
+    constexpr auto Dims = std::make_integer_sequence<int, Rank - 1>{};
+    arrayForEachDim(Dims, execution::par, std::tie(componentView, vectorView),
+                    copier);
+  };
+
+  const auto selectRank = [&](auto value) {
+    switch (vectorField.rank()) {
+      case 2:
+        return copyArrayData(value, std::integral_constant<int, 2>{});
+      case 3:
+        return copyArrayData(value, std::integral_constant<int, 3>{});
+      default:
+        ATLAS_THROW_EXCEPTION("Unsupported vector field rank: " +
+                              std::to_string(vectorField.rank()));
+    }
+  };
+
+  const auto selectType = [&]() {
+    switch (vectorField.datatype().kind()) {
+      case DataType::kind<double>():
+        return selectRank(double{});
+      case DataType::kind<float>():
+        return selectRank(float{});
+      case DataType::kind<long>():
+        return selectRank(long{});
+      case DataType::kind<int>():
+        return selectRank(int{});
+      default:
+        ATLAS_THROW_EXCEPTION("Unknown datatype: " +
+                              std::to_string(vectorField.datatype().kind()));
+    }
+  };
+
+  selectType();
+}
+
+}  // namespace
+
+FieldSet pack_vector_fields(const FieldSet& fields, FieldSet packedFields) {
+  // Get the number of variables for each vector field.
+  auto vectorSizeMap = std::map<std::string, idx_t>{};
+  for (const auto& field : fields) {
+    auto vectorFieldName = std::string{};
+    if (field.metadata().get("vector_field_name", vectorFieldName)) {
+      ++vectorSizeMap[vectorFieldName];
+    }
+  }
+  auto vectorIndexMap = std::map<std::string, idx_t>{};
+
+  // Pack vector fields.
+  for (const auto& field : fields) {
+    auto vectorFieldName = std::string{};
+    if (!field.metadata().get("vector_field_name", vectorFieldName)) {
+      // Not a vector component field.
+      addOrReplaceField(packedFields, field);
+      continue;
+    }
+
+    // Field is vector field component.
+    const auto& componentField = field;
+
+    // Get or create vector field.
+    const auto vectorFieldConfig =
+        option::name(vectorFieldName) |
+        option::levels(componentField.levels()) |
+        option::vector(vectorSizeMap[vectorFieldName]) |
+        option::datatype(componentField.datatype());
+    auto& vectorField = getOrCreateField(
+        packedFields, componentField.functionspace(), vectorFieldConfig);
+
+    // Copy field data.
+    const auto vectorIndex = vectorIndexMap[vectorFieldName]++;
+    const auto copier = [&](auto&& componentElem, auto&& vectorElem) {
+      vectorElem(vectorIndex) = componentElem;
+    };
+    copyFieldData(componentField, vectorField, copier);
+
+    // Copy metadata.
+    const auto componentFieldMetadata = componentField.metadata();
+    auto componentFieldMetadataVector = std::vector<LocalConfiguration>{};
+    vectorField.metadata().get("component_field_metadata",
+                               componentFieldMetadataVector);
+    componentFieldMetadataVector.push_back(componentFieldMetadata);
+    vectorField.metadata().set("component_field_metadata",
+                               componentFieldMetadataVector);
+  }
+  return packedFields;
+}
+
+FieldSet unpack_vector_fields(const FieldSet& fields, FieldSet unpackedFields) {
+  for (const auto& field : fields) {
+    auto componentFieldMetadataVector = std::vector<LocalConfiguration>{};
+    if (!field.metadata().get("component_field_metadata",
+                              componentFieldMetadataVector)) {
+      // Not a vector field.
+      addOrReplaceField(unpackedFields, field);
+      continue;
+    }
+
+    // Field is vector.
+    const auto& vectorField = field;
+
+    auto vectorIndex = 0;
+    for (const auto& componentFieldMetadata : componentFieldMetadataVector) {
+
+      // Get or create field.
+      auto componentFieldName = std::string{};
+      componentFieldMetadata.get("name", componentFieldName);
+      const auto componentFieldConfig =
+          option::name(componentFieldName) |
+          option::levels(vectorField.levels()) |
+          option::datatype(vectorField.datatype());
+      auto& componentField = getOrCreateField(
+          unpackedFields, vectorField.functionspace(), componentFieldConfig);
+
+      // Copy field data.
+      const auto copier = [&](auto&& componentElem, auto&& vectorElem) {
+        componentElem = vectorElem(vectorIndex);
+      };
+      copyFieldData(componentField, vectorField, copier);
+
+      // Copy metadata.
+      componentField.metadata() = componentFieldMetadata;
+
+      ++vectorIndex;
+    }
+  }
+  return unpackedFields;
+}
+
+}  // namespace util
+}  // namespace atlas
diff --git a/src/atlas/util/PackVectorFields.h b/src/atlas/util/PackVectorFields.h
new file mode 100644
index 000000000..fc77da6c5
--- /dev/null
+++ b/src/atlas/util/PackVectorFields.h
@@ -0,0 +1,40 @@
+/*
+ * (C) Crown Copyright 2024 Met Office
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ */
+
+#pragma once
+
+#include "atlas/field.h"
+
+namespace eckit {
+class LocalConfiguration;
+}
+
+namespace atlas {
+namespace util {
+
+/// @brief   Packs vector field components into vector fields
+///
+/// @details Iterates through @param fields and creates vector fields from any
+///          component field with the "vector name" string metadata. These, as
+///          well as any present scalar fields, are added to the return-value
+///          field set.
+///          Note, a mutable @param packedFields field set can be supplied if
+///          one needs to guarantee the order of the packed fields
+FieldSet pack_vector_fields(const FieldSet& fields,
+                            FieldSet packedFields = FieldSet{});
+
+/// @brief   Unpacks vector field into vector field components.
+///
+/// @details Undoes "pack" operation when a set of packed fields are supplied
+///          as @param fields. A mutable @param unpackedFields field set can be
+///          supplied if one needs to guarantee the order of the unpacked
+///          fields.
+FieldSet unpack_vector_fields(const FieldSet& fields,
+                              FieldSet unpackedFields = FieldSet{});
+
+}  // namespace util
+}  // namespace atlas
diff --git a/src/atlas_acc_support/CMakeLists.txt b/src/atlas_acc_support/CMakeLists.txt
index ffe144a37..7bcf8ac06 100644
--- a/src/atlas_acc_support/CMakeLists.txt
+++ b/src/atlas_acc_support/CMakeLists.txt
@@ -32,7 +32,6 @@ if( atlas_HAVE_ACC )
     ecbuild_add_library( TARGET atlas_acc_support SOURCES atlas_acc_map_data.c )
     target_compile_options( atlas_acc_support PRIVATE ${ACC_C_FLAGS} )
     target_link_libraries( atlas_acc_support PRIVATE ${ACC_C_FLAGS} )
-
   endif()
 
 endif()
diff --git a/src/atlas_acc_support/atlas_acc_map_data.c b/src/atlas_acc_support/atlas_acc_map_data.c
index 6b8a148e7..826579686 100644
--- a/src/atlas_acc_support/atlas_acc_map_data.c
+++ b/src/atlas_acc_support/atlas_acc_map_data.c
@@ -17,12 +17,111 @@
 #endif
 
 #include <openacc.h>
+#include <string.h>
 
-void atlas_acc_map_data(void* cpu_ptr, void* gpu_ptr, unsigned long size) {
-  acc_map_data(cpu_ptr, gpu_ptr, size);
+#include "atlas_acc_map_data.h"
+
+void atlas_acc_map_data(void* cpu_ptr, void* gpu_ptr, unsigned long bytes) {
+  acc_map_data(cpu_ptr, gpu_ptr, bytes);
 }
 
 
 void atlas_acc_unmap_data(void* cpu_ptr) {
   acc_unmap_data(cpu_ptr);
 }
+
+int atlas_acc_is_present(void* cpu_ptr, unsigned long bytes) {
+  return acc_is_present(cpu_ptr, bytes);
+}
+
+void* atlas_acc_deviceptr(void* cpu_ptr) {
+  return acc_deviceptr(cpu_ptr);
+}
+
+atlas_acc_device_t atlas_acc_get_device_type() {
+  acc_device_t device_type = acc_get_device_type();
+  if (device_type == acc_device_host || device_type == acc_device_none) {
+    return atlas_acc_device_host;
+  }
+  return atlas_acc_device_not_host;
+}
+
+#define BUFFER_PRINTF(buffer, ...) snprintf(buffer + strlen(buffer), sizeof(buffer) - strlen(buffer), __VA_ARGS__);
+
+const char* atlas_acc_version_str() {
+  static char buffer[16];
+  if( strlen(buffer) != 0 ) {
+    return buffer;
+  }
+
+  BUFFER_PRINTF(buffer, "%i", _OPENACC);
+  switch( _OPENACC ) {
+    case 201111: BUFFER_PRINTF(buffer, " (1.0)"); break;
+    case 201306: BUFFER_PRINTF(buffer, " (2.0)"); break;
+    case 201308: BUFFER_PRINTF(buffer, " (2.0)"); break;
+    case 201510: BUFFER_PRINTF(buffer, " (2.5)"); break;
+    case 201711: BUFFER_PRINTF(buffer, " (2.6)"); break;
+    default: break;
+  }
+  return buffer;
+}
+const char* atlas_acc_info_str() {
+    static char buffer[1024];
+    if( strlen(buffer) != 0 ) {
+      return buffer;
+    }
+    // Platform information
+    acc_device_t devicetype = acc_get_device_type();
+    int num_devices = acc_get_num_devices(devicetype);
+    BUFFER_PRINTF(buffer, "    OpenACC version: %s\n", atlas_acc_version_str());
+    if (devicetype == acc_device_host ) {
+      BUFFER_PRINTF(buffer, "    No OpenACC GPU devices available\n");
+      return buffer;
+    }
+    BUFFER_PRINTF(buffer, "    Number of OpenACC devices: %i\n", num_devices);
+    int device_num  = acc_get_device_num(devicetype);
+    BUFFER_PRINTF(buffer, "    OpenACC Device number: %i\n", device_num);
+    BUFFER_PRINTF(buffer, "    OpenACC acc_device_t (enum value): %d", devicetype);
+    switch( devicetype ) {
+      case acc_device_host: BUFFER_PRINTF(buffer, " (acc_device_host)"); break;
+      case acc_device_none: BUFFER_PRINTF(buffer, " (acc_device_none)"); break;
+      case acc_device_not_host: BUFFER_PRINTF(buffer, " (acc_device_not_host)"); break;
+      case acc_device_nvidia: BUFFER_PRINTF(buffer, " (acc_device_nvidia)"); break;
+      default: break;
+    }
+    BUFFER_PRINTF(buffer, "\n");
+
+    // acc_get_property, acc_get_property_string introduced with OpenACC 2.6
+#if _OPENACC >= 201711
+    long int    mem             = acc_get_property(       device_num, devicetype, acc_property_memory);
+    long int    free_mem        = acc_get_property(       device_num, devicetype, acc_property_free_memory);
+    const char *property_name   = acc_get_property_string(device_num, devicetype, acc_property_name);
+    const char *property_vendor = acc_get_property_string(device_num, devicetype, acc_property_vendor );
+    const char *property_driver = acc_get_property_string(device_num, devicetype, acc_property_driver );
+    // if (mem > 0)
+    {
+      BUFFER_PRINTF(buffer, "    Memory on OpenACC device: %li\n", mem);
+    }
+    // if (free_mem > 0)
+    {
+       BUFFER_PRINTF(buffer, "    Free Memory on OpenACC device: %li\n", free_mem);
+    }
+    // if (property_name != NULL)
+    {
+      BUFFER_PRINTF(buffer, "    OpenACC device name: %s\n", property_name);
+    }
+    // if (property_vendor != NULL)
+    {
+      BUFFER_PRINTF(buffer, "    OpenACC device vendor: %s\n", property_vendor);
+    }
+    // if (property_driver != NULL)
+    {
+      BUFFER_PRINTF(buffer, "    OpenACC device driver: %s\n", property_driver);
+    }
+#endif
+    return buffer;
+}
+
+int atlas_acc_get_num_devices() {
+  return acc_get_num_devices(acc_get_device_type());
+}
diff --git a/src/atlas_acc_support/atlas_acc_map_data.h b/src/atlas_acc_support/atlas_acc_map_data.h
index 4fa1208c2..ba5990968 100644
--- a/src/atlas_acc_support/atlas_acc_map_data.h
+++ b/src/atlas_acc_support/atlas_acc_map_data.h
@@ -14,8 +14,24 @@
 extern "C" {
 #endif
 
-void atlas_acc_map_data(void* cpu_ptr, void* gpu_ptr, unsigned long size);
+typedef enum {
+    atlas_acc_device_host = 0,
+    atlas_acc_device_not_host = 1
+} atlas_acc_device_t;
+
+void atlas_acc_map_data(void* cpu_ptr, void* gpu_ptr, unsigned long bytes);
 void atlas_acc_unmap_data(void* cpu_ptr);
+int atlas_acc_is_present(void* cpu_ptr, unsigned long bytes);
+void* atlas_acc_deviceptr(void* cpu_ptr);
+atlas_acc_device_t atlas_acc_get_device_type();
+
+int atlas_acc_devices();
+
+const char* atlas_acc_version_str();
+
+const char* atlas_acc_info_str();
+
+int atlas_acc_get_num_devices();
 
 #ifdef __cplusplus
 }
diff --git a/src/atlas_f/CMakeLists.txt b/src/atlas_f/CMakeLists.txt
index 737a5b590..2e9a97e31 100644
--- a/src/atlas_f/CMakeLists.txt
+++ b/src/atlas_f/CMakeLists.txt
@@ -277,6 +277,14 @@ ecbuild_add_library( TARGET atlas_f
         $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
 )
 
+if( HAVE_ACC )
+  target_link_options( atlas_f INTERFACE
+      $<$<LINK_LANG_AND_ID:C,NVHPC>:SHELL:-acc=gpu>
+      $<$<LINK_LANG_AND_ID:CXX,NVHPC>:SHELL:-acc=gpu>
+      $<$<LINK_LANG_AND_ID:Fortran,NVHPC>:SHELL:-acc=gpu>
+      $<$<LINK_LANG_AND_ID:CUDA,NVIDIA>:SHELL:-acc=gpu> )
+endif()
+
 fckit_target_preprocess_fypp( atlas_f
   DEPENDS
     ${CMAKE_CURRENT_SOURCE_DIR}/atlas_f.h.in
diff --git a/src/atlas_f/atlas_f.h.in b/src/atlas_f/atlas_f.h.in
index cea48ee0e..3511bca12 100644
--- a/src/atlas_f/atlas_f.h.in
+++ b/src/atlas_f/atlas_f.h.in
@@ -18,6 +18,7 @@
 
 #define ATLAS_HAVE_OMP    @atlas_HAVE_OMP_Fortran@
 #define ATLAS_HAVE_ACC    @atlas_HAVE_ACC@
+#define ATLAS_HAVE_GPU    @atlas_HAVE_GPU@
 #define ATLAS_BITS_GLOBAL @ATLAS_BITS_GLOBAL@
 #define ATLAS_BITS_LOCAL  @ATLAS_BITS_LOCAL@
 
diff --git a/src/atlas_f/util/atlas_allocate_module.F90 b/src/atlas_f/util/atlas_allocate_module.F90
index 3cc1d3a7a..1bda17e81 100644
--- a/src/atlas_f/util/atlas_allocate_module.F90
+++ b/src/atlas_f/util/atlas_allocate_module.F90
@@ -231,7 +231,7 @@ subroutine atlas_deallocate_managedmem_real64_r1( A )
   use, intrinsic :: iso_c_binding
   use atlas_allocate_c_binding
   real(c_double), pointer :: a(:)
-  call atlas__deallocate_managedmem( c_loc_real64(A(1)) )
+  call atlas__deallocate_managedmem_double( c_loc_real64(A(1)), size(A,KIND=c_size_t) )
   nullify( a )
 end subroutine
 
@@ -239,7 +239,7 @@ subroutine atlas_deallocate_managedmem_real32_r1( A )
   use, intrinsic :: iso_c_binding
   use atlas_allocate_c_binding
   real(c_float), pointer :: a(:)
-  call atlas__deallocate_managedmem( c_loc_real32(A(1)) )
+  call atlas__deallocate_managedmem_float( c_loc_real32(A(1)), size(A,KIND=c_size_t) )
   nullify( a )
 end subroutine
 
@@ -247,7 +247,7 @@ subroutine atlas_deallocate_managedmem_int32_r1( A )
   use, intrinsic :: iso_c_binding
   use atlas_allocate_c_binding
   integer(c_int), pointer :: a(:)
-  call atlas__deallocate_managedmem( c_loc_int32(A(1)) )
+  call atlas__deallocate_managedmem_int( c_loc_int32(A(1)), size(A,KIND=c_size_t) )
   nullify( a )
 end subroutine
 
@@ -255,7 +255,7 @@ subroutine atlas_deallocate_managedmem_int64_r1( A )
   use, intrinsic :: iso_c_binding
   use atlas_allocate_c_binding
   integer(c_long), pointer :: a(:)
-  call atlas__deallocate_managedmem( c_loc_int64(A(1)) )
+  call atlas__deallocate_managedmem_long( c_loc_int64(A(1)), size(A,KIND=c_size_t) )
   nullify( a )
 end subroutine
 
@@ -263,7 +263,7 @@ subroutine atlas_deallocate_managedmem_real64_r2( A )
   use, intrinsic :: iso_c_binding
   use atlas_allocate_c_binding
   real(c_double), pointer :: a(:,:)
-  call atlas__deallocate_managedmem( c_loc_real64(A(1,1)) )
+  call atlas__deallocate_managedmem_double( c_loc_real64(A(1,1)), size(A,KIND=c_size_t) )
   nullify( a )
 end subroutine
 
@@ -271,7 +271,7 @@ subroutine atlas_deallocate_managedmem_real32_r2( A )
   use, intrinsic :: iso_c_binding
   use atlas_allocate_c_binding
   real(c_float), pointer :: a(:,:)
-  call atlas__deallocate_managedmem( c_loc_real32(A(1,1)) )
+  call atlas__deallocate_managedmem_float( c_loc_real32(A(1,1)), size(A,KIND=c_size_t) )
   nullify( a )
 end subroutine
 
@@ -279,7 +279,7 @@ subroutine atlas_deallocate_managedmem_int32_r2( A )
   use, intrinsic :: iso_c_binding
   use atlas_allocate_c_binding
   integer(c_int), pointer :: a(:,:)
-  call atlas__deallocate_managedmem( c_loc_int32(A(1,1)) )
+  call atlas__deallocate_managedmem_int( c_loc_int32(A(1,1)), size(A,KIND=c_size_t) )
   nullify( a )
 end subroutine
 
@@ -287,7 +287,7 @@ subroutine atlas_deallocate_managedmem_int64_r2( A )
   use, intrinsic :: iso_c_binding
   use atlas_allocate_c_binding
   integer(c_long), pointer :: a(:,:)
-  call atlas__deallocate_managedmem( c_loc_int64(A(1,1)) )
+  call atlas__deallocate_managedmem_long( c_loc_int64(A(1,1)), size(A,KIND=c_size_t) )
   nullify( a )
 end subroutine
 
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index 33da2af95..cbbe7536a 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -38,29 +38,33 @@ endif()
 
 ######################################################
 
-# Macro atlas_add_cuda_test
-# Envisioned to become part of ecbuild_add_test with a CUDA option
-# in a future ecbuild release
-macro( atlas_add_cuda_test )
+# Macro atlas_add_hic_test
+macro( atlas_add_hic_test )
   set( options           "" )
   set( single_value_args TARGET )
   set( multi_value_args  SOURCES LIBS ENVIRONMENT )
   cmake_parse_arguments( _PAR "${options}" "${single_value_args}" "${multi_value_args}"  ${_FIRST_ARG} ${ARGN} )
 
   if(_PAR_UNPARSED_ARGUMENTS)
-    ecbuild_critical("Unknown keywords given to atlas_add_cuda_test(): \"${_PAR_UNPARSED_ARGUMENTS}\"")
+    ecbuild_critical("Unknown keywords given to atlas_add_hic_test(): \"${_PAR_UNPARSED_ARGUMENTS}\"")
   endif()
 
   if( HAVE_CUDA AND HAVE_TESTS )
 
-    ecbuild_debug("atlas_add_cuda_test: Adding test ${_PAR_TARGET}")
+    ecbuild_debug("atlas_add_hic_test: Adding test ${_PAR_TARGET}")
 
     list( APPEND _PAR_ENVIRONMENT "ATLAS_RUN_NGPUS=1" )
     list( REMOVE_DUPLICATES _PAR_ENVIRONMENT )
 
     list( APPEND _libs ${_PAR_LIBS} )
     if( _libs )
-      ecbuild_debug("atlas_add_cuda_test: Test ${_PAR_TARGET} explicitly links with libraries ${_libs}")
+      ecbuild_debug("atlas_add_hic_test: Test ${_PAR_TARGET} explicitly links with libraries ${_libs}")
+    endif()
+
+    if( HAVE_CUDA )
+      set_source_files_properties(${_PAR_SOURCES} PROPERTIES LANGUAGE CUDA)
+    elseif( HAVE_HIP )
+      set_source_files_properties(${_PAR_SOURCES} PROPERTIES LANGUAGE HIP)
     endif()
 
     ecbuild_add_test( TARGET      ${_PAR_TARGET}
@@ -72,10 +76,6 @@ macro( atlas_add_cuda_test )
   endif()
 endmacro()
 
-if( HAVE_CUDA )
-  set( ATLAS_TEST_ENVIRONMENT "ATLAS_RUN_NGPUS=1" )
-endif()
-
 add_subdirectory( util )
 add_subdirectory( runtime )
 
diff --git a/src/tests/acc/CMakeLists.txt b/src/tests/acc/CMakeLists.txt
index 768805fff..0e323611f 100644
--- a/src/tests/acc/CMakeLists.txt
+++ b/src/tests/acc/CMakeLists.txt
@@ -20,7 +20,7 @@ if( HAVE_CUDA AND HAVE_TESTS AND HAVE_FCTEST AND HAVE_ACC )
     LINKER_LANGUAGE Fortran
     ENVIRONMENT ${ATLAS_TEST_ENVIRONMENT} ATLAS_RUN_NGPUS=1
   )
-  target_link_libraries( atlas_test_unified_memory_with_openacc ${ACC_Fortran_FLAGS} CUDA::cudart )
+  target_link_libraries( atlas_test_unified_memory_with_openacc ${ACC_Fortran_FLAGS} hic )
 
   set_tests_properties( atlas_test_unified_memory_with_openacc PROPERTIES LABELS "gpu;acc")
 
diff --git a/src/tests/acc/fctest_unified_memory_with_openacc_cxx.cc b/src/tests/acc/fctest_unified_memory_with_openacc_cxx.cc
index 47c0e7dc5..eb251572b 100644
--- a/src/tests/acc/fctest_unified_memory_with_openacc_cxx.cc
+++ b/src/tests/acc/fctest_unified_memory_with_openacc_cxx.cc
@@ -8,10 +8,10 @@
  * nor does it submit to any jurisdiction.
  */
 
-#include <cuda_runtime.h>
+#include "hic/hic.h"
 
 extern "C" {
 void allocate_unified_impl(double** a, int N) {
-    cudaMallocManaged(a, N * sizeof(double));
+    hicMallocManaged(a, N * sizeof(double));
 }
 }
diff --git a/src/tests/array/CMakeLists.txt b/src/tests/array/CMakeLists.txt
index 3a5583223..3915caca7 100644
--- a/src/tests/array/CMakeLists.txt
+++ b/src/tests/array/CMakeLists.txt
@@ -61,22 +61,22 @@ if( CMAKE_BUILD_TYPE MATCHES "DEBUG" )
   set ( CMAKE_NVCC_FLAGS "-G" )
 endif()
 
-atlas_add_cuda_test(
+atlas_add_hic_test(
   TARGET atlas_test_array_kernel
-  SOURCES test_array_kernel.cu
+  SOURCES test_array_kernel.hic
   LIBS atlas
   ENVIRONMENT ${ATLAS_TEST_ENVIRONMENT}
 )
-atlas_add_cuda_test(
+atlas_add_hic_test(
   TARGET atlas_test_vector_kernel
-  SOURCES test_vector_kernel.cu
+  SOURCES test_vector_kernel.hic
   LIBS atlas
   ENVIRONMENT ${ATLAS_TEST_ENVIRONMENT}
 )
 
-atlas_add_cuda_test(
+atlas_add_hic_test(
   TARGET atlas_test_svector_kernel
-  SOURCES test_svector_kernel.cu
+  SOURCES test_svector_kernel.hic
   LIBS atlas
   ENVIRONMENT ${ATLAS_TEST_ENVIRONMENT}
 )
diff --git a/src/tests/array/test_array.cc b/src/tests/array/test_array.cc
index 9ce8cbec9..290e47e0e 100644
--- a/src/tests/array/test_array.cc
+++ b/src/tests/array/test_array.cc
@@ -19,6 +19,8 @@
 #include "atlas/array/gridtools/GridToolsMakeView.h"
 #endif
 
+#include "hic/hic.h"
+
 using namespace atlas::array;
 
 namespace atlas {
@@ -553,10 +555,29 @@ CASE("test_wrap") {
     EXPECT(view(2) == 19);
 }
 
+static int devices() {
+    static int devices_ = [](){
+        int n = 0;
+        auto err = hicGetDeviceCount(&n);
+        if (err != hicSuccess) {
+            n = 0;
+            static_cast<void>(hicGetLastError());
+        }
+        return n;
+    }();
+    return devices_;
+}
+
 CASE("test_acc_map") {
     Array* ds = Array::create<double>(2, 3, 4);
-    EXPECT_NO_THROW(ds->accMap());
-    EXPECT(ds->accMapped() == ATLAS_HAVE_ACC);
+    EXPECT_NO_THROW(ds->allocateDevice());
+    if( ds->deviceAllocated() ) {
+        EXPECT_NO_THROW(ds->accMap());
+        EXPECT_EQ(ds->accMapped(), std::min(devices(),1));
+    }
+    else {
+        Log::warning() << "WARNING: Array could not be allocated on device, so acc_map could not be tested" << std::endl;
+    }
     delete ds;
 }
 
diff --git a/src/tests/array/test_array_kernel.cu b/src/tests/array/test_array_kernel.hic
similarity index 92%
rename from src/tests/array/test_array_kernel.cu
rename to src/tests/array/test_array_kernel.hic
index 8f3ba678a..14935f46c 100644
--- a/src/tests/array/test_array_kernel.cu
+++ b/src/tests/array/test_array_kernel.hic
@@ -8,7 +8,7 @@
  * does it submit to any jurisdiction.
  */
 
-#include <cuda_runtime.h>
+#include "hic/hic.h"
 #include "tests/AtlasTestEnvironment.h"
 #include "atlas/array.h"
 #include "atlas/array/MakeView.h"
@@ -21,10 +21,10 @@ namespace test {
 
 #define REQUIRE_CUDA_SUCCESS(msg) \
 do { \
-  cudaError_t err = cudaPeekAtLastError(); \
-  if (err != cudaSuccess ) { \
+  hicError_t err = hicPeekAtLastError(); \
+  if (err != hicSuccess ) { \
     throw eckit::testing::TestException("REQUIRE_CUDA_SUCCESS ["+std::string(msg)+"] failed:\n"\
-          + std::string(cudaGetErrorString(err)) , Here()); \
+          + std::string(hicGetErrorString(err)) , Here()); \
   } \
 } while(false)
 
@@ -70,7 +70,7 @@ CASE( "test_array" )
 
    REQUIRE_CUDA_SUCCESS("kernel_ex");
 
-   cudaDeviceSynchronize();
+   hicDeviceSynchronize();
 
    ds->updateHost();
 
@@ -108,7 +108,7 @@ CASE( "test_array_loop" )
 
    REQUIRE_CUDA_SUCCESS("loop_kernel_ex");
 
-   cudaDeviceSynchronize();
+   hicDeviceSynchronize();
 
    ds->updateHost();
 
diff --git a/src/tests/array/test_svector_kernel.cu b/src/tests/array/test_svector_kernel.hic
similarity index 84%
rename from src/tests/array/test_svector_kernel.cu
rename to src/tests/array/test_svector_kernel.hic
index 79631133b..0865e34ff 100644
--- a/src/tests/array/test_svector_kernel.cu
+++ b/src/tests/array/test_svector_kernel.hic
@@ -8,7 +8,7 @@
  * does it submit to any jurisdiction.
  */
 
-#include <cuda_runtime.h>
+#include "hic/hic.h"
 
 #include "atlas/library/config.h"
 #include "tests/AtlasTestEnvironment.h"
@@ -45,17 +45,17 @@ CASE( "test_svector" )
     EXPECT( list_ints.size() == 2);
 
     bool *result;
-    cudaError_t err = cudaMallocManaged(&result, sizeof(bool));
+    hicError_t err = hicMallocManaged(&result, sizeof(bool));
 
-    if(err != cudaSuccess)
+    if(err != hicSuccess)
         throw_AssertionFailed("failed to allocate GPU memory");
 
     *result=true;
     kernel_exe<<<1,1>>>(list_ints.data(), list_ints.size(), 0, result);
-    cudaDeviceSynchronize();
+    hicDeviceSynchronize();
 
-    err = cudaGetLastError();
-    if(err != cudaSuccess)
+    err = hicGetLastError();
+    if(err != hicSuccess)
         throw_AssertionFailed("failed to execute kernel");
 
     EXPECT( *result );
@@ -81,9 +81,9 @@ CASE( "test_svector_resize" )
     EXPECT( list_ints.size() == 5);
 
     bool *result;
-    cudaError_t err = cudaMallocManaged(&result, sizeof(bool));
+    hicError_t err = hicMallocManaged(&result, sizeof(bool));
 
-    if(err != cudaSuccess)
+    if(err != hicSuccess)
         throw_AssertionFailed("failed to allocate GPU memory");
 
     *result=true;
@@ -92,10 +92,10 @@ CASE( "test_svector_resize" )
     list_ints[4] = 4;
 
     kernel_exe<<<1,1>>>(list_ints.data(), list_ints.size(), 3, result);
-    cudaDeviceSynchronize();
+    hicDeviceSynchronize();
 
-    err = cudaGetLastError();
-    if(err != cudaSuccess)
+    err = hicGetLastError();
+    if(err != hicSuccess)
         throw_AssertionFailed("failed to execute kernel");
 
     EXPECT( *result );
diff --git a/src/tests/array/test_vector_kernel.cu b/src/tests/array/test_vector_kernel.hic
similarity index 89%
rename from src/tests/array/test_vector_kernel.cu
rename to src/tests/array/test_vector_kernel.hic
index 18b0eb2b4..dc8442fbf 100644
--- a/src/tests/array/test_vector_kernel.cu
+++ b/src/tests/array/test_vector_kernel.hic
@@ -8,7 +8,7 @@
  * does it submit to any jurisdiction.
  */
 
-#include <cuda_runtime.h>
+#include "hic/hic.h"
 
 #include "tests/AtlasTestEnvironment.h"
 
@@ -85,13 +85,13 @@ CASE( "test_vector_kernel" )
     VectorView<int_gpu*> list_ints_d = make_device_vector_view(list_ints);
 
     VectorView<int_gpu*>* list_ints_dp;
-    cudaMalloc((void**)(&list_ints_dp), sizeof(VectorView<int_gpu*>));
+    hicMalloc((void**)(&list_ints_dp), sizeof(VectorView<int_gpu*>));
 
-    cudaMemcpy(list_ints_dp, &list_ints_d, sizeof(VectorView<int_gpu*>), cudaMemcpyHostToDevice);
+    hicMemcpy(list_ints_dp, &list_ints_d, sizeof(VectorView<int_gpu*>), hicMemcpyHostToDevice);
 
     kernel_ex<<<1,1>>>(list_ints_dp);
 
-    if( cudaPeekAtLastError() != cudaSuccess) std::cout << "ERROR " << std::endl;
+    if( hicPeekAtLastError() != hicSuccess) std::cout << "ERROR " << std::endl;
     list_ints.updateHost();
 
     EXPECT( list_ints_h[0]->val_ == 8 );
diff --git a/src/tests/field/fctest_field_host.F90 b/src/tests/field/fctest_field_host.F90
index 955dd159c..fcc2ed690 100644
--- a/src/tests/field/fctest_field_host.F90
+++ b/src/tests/field/fctest_field_host.F90
@@ -10,6 +10,7 @@
 ! @author Willem Deconinck
 
 #include "fckit/fctest.h"
+#include "atlas/atlas_f.h"
 
 ! -----------------------------------------------------------------------------
 
@@ -49,7 +50,9 @@ module fcta_Field_fxt
 call field%data(view)
 
 FCTEST_CHECK( .not. field%host_needs_update() )
+#if ! ATLAS_HAVE_GPU
 FCTEST_CHECK( .not. field%device_needs_update() )
+#endif
 
 call field%update_device()
 FCTEST_CHECK( .not. field%device_needs_update() )
diff --git a/src/tests/field/test_field_acc.cc b/src/tests/field/test_field_acc.cc
index 99e2343b3..e998f5dda 100644
--- a/src/tests/field/test_field_acc.cc
+++ b/src/tests/field/test_field_acc.cc
@@ -12,7 +12,7 @@
 #error This file needs to be compiled with OpenACC support
 #endif
 
-#include <cuda_runtime.h>
+#include "hic/hic.h"
 #include <openacc.h>
 
 #include "atlas/field/Field.h"
@@ -35,10 +35,10 @@ CASE("test_acc") {
     *c_ptr = 5;
 
     int* d_ptr;
-    cudaMalloc(&d_ptr, sizeof(int));
+    hicMalloc(&d_ptr, sizeof(int));
     acc_map_data(c_ptr, d_ptr, sizeof(int));
 
-    cudaMemcpy(d_ptr, c_ptr, sizeof(int), cudaMemcpyHostToDevice);
+    hicMemcpy(d_ptr, c_ptr, sizeof(int), hicMemcpyHostToDevice);
 
 #pragma acc kernels present(c_ptr)
     {
@@ -47,7 +47,7 @@ CASE("test_acc") {
 
     EXPECT_EQ( *c_ptr, 5. );
 
-    cudaMemcpy(c_ptr, d_ptr, sizeof(int), cudaMemcpyDeviceToHost);
+    hicMemcpy(c_ptr, d_ptr, sizeof(int), hicMemcpyDeviceToHost);
     EXPECT_EQ( *c_ptr, 2. );
 }
 
diff --git a/src/tests/interpolation/CMakeLists.txt b/src/tests/interpolation/CMakeLists.txt
index 82005b229..a5ac0e86a 100644
--- a/src/tests/interpolation/CMakeLists.txt
+++ b/src/tests/interpolation/CMakeLists.txt
@@ -75,6 +75,14 @@ ecbuild_add_executable( TARGET atlas_test_interpolation_structured2D
   NOINSTALL
 )
 
+ecbuild_add_test( TARGET atlas_test_interpolation_structured2D_regional
+  SOURCES  test_interpolation_structured2D_regional.cc
+  LIBS     atlas
+  MPI      2
+  CONDITION eckit_HAVE_MPI
+  ENVIRONMENT ${ATLAS_TEST_ENVIRONMENT}
+)
+
 ecbuild_add_test( TARGET atlas_test_interpolation_non_linear
   SOURCES  test_interpolation_non_linear.cc
   LIBS     atlas
diff --git a/src/tests/interpolation/test_interpolation_k_nearest_neighbours.cc b/src/tests/interpolation/test_interpolation_k_nearest_neighbours.cc
index 0ce3cd190..641005fb6 100644
--- a/src/tests/interpolation/test_interpolation_k_nearest_neighbours.cc
+++ b/src/tests/interpolation/test_interpolation_k_nearest_neighbours.cc
@@ -1,9 +1,5 @@
 /*
-<<<<<<< HEAD
- * (C) Copyright 1996- ECMWF.
-=======
  * (C) Copyright 2013 ECMWF.
->>>>>>> develop
  *
  * This software is licensed under the terms of the Apache Licence Version 2.0
  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
diff --git a/src/tests/interpolation/test_interpolation_spherical_vector.cc b/src/tests/interpolation/test_interpolation_spherical_vector.cc
index 95281a880..16dbdabf9 100644
--- a/src/tests/interpolation/test_interpolation_spherical_vector.cc
+++ b/src/tests/interpolation/test_interpolation_spherical_vector.cc
@@ -95,6 +95,14 @@ struct FunctionSpaceFixtures {
             {"gaussian_mesh", generateNodeColums("O48", "structured")},
             {"structured_columns",
              functionspace::StructuredColumns(Grid("O48"), option::halo(1))},
+            {"structured_columns_classic",
+             functionspace::StructuredColumns(Grid("F48"), option::halo(1))},
+            {"structured_columns_classic_halo2",
+             functionspace::StructuredColumns(Grid("F48"), option::halo(2))},
+            {"structured_columns_classic_highres_halo2",
+             functionspace::StructuredColumns(Grid("F96"), option::halo(2))},
+            {"structured_columns_halo2",
+             functionspace::StructuredColumns(Grid("O48"), option::halo(2))},
             {"structured_columns_lowres",
              functionspace::StructuredColumns(Grid("O24"), option::halo(1))},
             {"structured_columns_hires",
@@ -126,7 +134,9 @@ struct InterpSchemeFixtures {
     static const auto structuredLinear = option::type("structured-linear2D") |
                                          option::halo(1) |
                                          Config("adjoint", true);
-
+    static const auto structuredCubic = option::type("structured-bicubic") |
+                                        option::halo(2) |
+                                        Config("adjoint", true);
     static const auto sphericalVector =
         option::type("spherical-vector") | Config("adjoint", true);
 
@@ -134,12 +144,15 @@ struct InterpSchemeFixtures {
         {"cubedsphere_bilinear", cubedsphereBilinear},
         {"finite_element", finiteElement},
         {"structured_linear", structuredLinear},
+        {"structured_cubic", structuredCubic},
         {"cubedsphere_bilinear_spherical",
          sphericalVector | Config("scheme", cubedsphereBilinear)},
         {"finite_element_spherical",
          sphericalVector | Config("scheme", finiteElement)},
         {"structured_linear_spherical",
-         sphericalVector | Config("scheme", structuredLinear)}};
+         sphericalVector | Config("scheme", structuredLinear)},
+        {"structured_cubic_spherical",
+         sphericalVector | Config("scheme", structuredCubic)}};
     return interpSchemes.at(fixture);
   }
 };
@@ -254,9 +267,9 @@ void testInterpolation(const Config& config) {
             calcError(targetColumn, errorColumn);
           }
           else if constexpr (Rank == 3) {
-              ArrayForEach<0>::apply(std::tie(targetColumn, errorColumn),
-                                     calcError);
-            }
+            ArrayForEach<0>::apply(std::tie(targetColumn, errorColumn),
+                                   calcError);
+          }
         });
 
     EXPECT_APPROX_EQ(maxError, 0., config.getDouble("tol"));
@@ -296,7 +309,8 @@ void testInterpolation(const Config& config) {
   }
 }
 
-CASE("cubed sphere vector interpolation (3d-field, 2-vector)") {
+
+CASE("cubed sphere CS-LFR-48 vector interpolation (3d-field, 2-vector)") {
   const auto config =
       Config("source_fixture", "cubedsphere_mesh")
           .set("target_fixture", "gaussian_mesh")
@@ -308,7 +322,7 @@ CASE("cubed sphere vector interpolation (3d-field, 2-vector)") {
   testInterpolation<Rank3dField>((config));
 }
 
-CASE("cubed sphere vector interpolation (3d-field, 3-vector)") {
+CASE("cubed sphere CS-LFR-48 vector interpolation (3d-field, 3-vector)") {
   const auto config =
       Config("source_fixture", "cubedsphere_mesh")
           .set("target_fixture", "gaussian_mesh")
@@ -320,6 +334,36 @@ CASE("cubed sphere vector interpolation (3d-field, 3-vector)") {
   testInterpolation<Rank3dField>((config));
 }
 
+CASE("cubed sphere CS-LFR-48 (spherical vector) to empty point cloud") {
+    const auto config =
+        Config("source_fixture", "cubedsphere_mesh")
+            .set("target_fixture", "empty_point_cloud")
+            .set("field_spec_fixture", "2vector")
+            .set("interp_fixture", "cubedsphere_bilinear_spherical");
+
+    testInterpolation<Rank2dField>((config));
+}
+
+CASE("cubed sphere CS-LFR-48 to empty point cloud") {
+  const auto config =
+      Config("source_fixture", "cubedsphere_mesh")
+          .set("target_fixture", "empty_point_cloud")
+          .set("field_spec_fixture", "2vector")
+          .set("interp_fixture", "cubedsphere_bilinear");
+
+  testInterpolation<Rank2dField>((config));
+}
+
+
+CASE("finite element to empty point cloud") {
+  const auto config = Config("source_fixture", "gaussian_mesh")
+                          .set("target_fixture", "cubedsphere_mesh")
+                          .set("field_spec_fixture", "2vector")
+                          .set("interp_fixture", "finite_element");
+
+  testInterpolation<Rank2dField>((config));
+}
+
 CASE("finite element vector interpolation (2d-field, 2-vector)") {
   const auto config = Config("source_fixture", "gaussian_mesh")
                           .set("target_fixture", "cubedsphere_mesh")
@@ -331,60 +375,77 @@ CASE("finite element vector interpolation (2d-field, 2-vector)") {
   testInterpolation<Rank2dField>((config));
 }
 
-CASE("structured columns vector interpolation (2d-field, 2-vector)") {
-  const auto config = Config("source_fixture", "structured_columns")
-                          .set("target_fixture", "cubedsphere_mesh")
-                          .set("field_spec_fixture", "2vector")
-                          .set("interp_fixture", "structured_linear_spherical")
-                          .set("file_id", "spherical_vector_sc")
-                          .set("tol", 0.00017);
+CASE("structured columns F48 cubic vector spherical interpolation (3d-field, 2-vector)") {
+  const auto config =
+      Config("source_fixture", "structured_columns_classic_halo2")
+          .set("target_fixture", "cubedsphere_mesh")
+          .set("field_spec_fixture", "2vector")
+          .set("interp_fixture", "structured_cubic_spherical")
+          .set("file_id", "spherical_cubic_vector_classic_sc")
+          .set("tol", 0.0000082);
 
-  testInterpolation<Rank2dField>((config));
+  testInterpolation<Rank3dField>((config));
 }
 
-CASE("structured columns vector interpolation (2d-field, 2-vector, low-res)") {
-  const auto config = Config("source_fixture", "structured_columns_lowres")
-                          .set("target_fixture", "gaussian_mesh")
-                          .set("field_spec_fixture", "2vector")
-                          .set("interp_fixture", "structured_linear_spherical")
-                          .set("file_id", "spherical_vector_sc_lr")
-                          .set("tol", 0.00056);
+CASE("structured columns F96 cubic vector spherical interpolation (2d-field, 2-vector)") {
+  const auto config =
+      Config("source_fixture", "structured_columns_classic_highres_halo2")
+          .set("target_fixture", "cubedsphere_mesh")
+          .set("field_spec_fixture", "2vector")
+          .set("interp_fixture", "structured_cubic_spherical")
+          .set("file_id", "spherical_2D_cubic_vector_highres_classic_sc")
+          .set("tol", 0.000000425);
 
   testInterpolation<Rank2dField>((config));
 }
 
-CASE("structured columns vector interpolation (2d-field, 2-vector, hi-res)") {
-  const auto config = Config("source_fixture", "structured_columns_hires")
+CASE("structured columns F96 cubic vector spherical interpolation (3d-field, 2-vector)") {
+  const auto config =
+      Config("source_fixture", "structured_columns_classic_highres_halo2")
+          .set("target_fixture", "cubedsphere_mesh")
+          .set("field_spec_fixture", "2vector")
+          .set("interp_fixture", "structured_cubic_spherical")
+          .set("file_id", "spherical_3D_cubic_vector_highres_classic_sc")
+          .set("tol", 0.00000085);
+
+  testInterpolation<Rank3dField>((config));
+}
+
+CASE("structured columns O24 linear vector interpolation (2d-field, 2-vector)") {
+  const auto config = Config("source_fixture", "structured_columns_lowres")
                           .set("target_fixture", "gaussian_mesh")
                           .set("field_spec_fixture", "2vector")
                           .set("interp_fixture", "structured_linear_spherical")
-                          .set("file_id", "spherical_vector_sc_hr")
-                          .set("tol", 0.000044);
+                          .set("file_id", "spherical_vector_sc_lr")
+                          .set("tol", 0.00056);
 
   testInterpolation<Rank2dField>((config));
 }
 
-CASE("cubed sphere (spherical vector) to empty point cloud") {
-    const auto config =
-        Config("source_fixture", "cubedsphere_mesh")
-            .set("target_fixture", "empty_point_cloud")
-            .set("field_spec_fixture", "2vector")
-            .set("interp_fixture", "cubedsphere_bilinear_spherical");
+CASE("structured columns O48 cubic vector spherical interpolation (3d-field, 2-vector)") {
+  const auto config =
+      Config("source_fixture", "structured_columns_halo2")
+          .set("target_fixture", "cubedsphere_mesh")
+          .set("field_spec_fixture", "2vector")
+          .set("interp_fixture", "structured_cubic_spherical")
+          .set("file_id", "spherical_cubic_vector_sc3")
+          .set("tol",  0.000007);
 
-    testInterpolation<Rank2dField>((config));
+  testInterpolation<Rank3dField>((config));
 }
 
-CASE("cubed sphere to empty point cloud") {
-  const auto config =
-      Config("source_fixture", "cubedsphere_mesh")
-          .set("target_fixture", "empty_point_cloud")
-          .set("field_spec_fixture", "2vector")
-          .set("interp_fixture", "cubedsphere_bilinear");
+CASE("structured columns O48 linear vector interpolation (2d-field, 2-vector)") {
+  const auto config = Config("source_fixture", "structured_columns")
+                          .set("target_fixture", "cubedsphere_mesh")
+                          .set("field_spec_fixture", "2vector")
+                          .set("interp_fixture", "structured_linear_spherical")
+                          .set("file_id", "spherical_vector_sc")
+                          .set("tol", 0.00017);
 
   testInterpolation<Rank2dField>((config));
 }
 
-CASE("structured columns to empty point cloud") {
+CASE("structured columns O48 to empty point cloud") {
   const auto config = Config("source_fixture", "structured_columns")
                           .set("target_fixture", "empty_point_cloud")
                           .set("field_spec_fixture", "2vector")
@@ -393,11 +454,13 @@ CASE("structured columns to empty point cloud") {
   testInterpolation<Rank2dField>((config));
 }
 
-CASE("finite element to empty point cloud") {
-  const auto config = Config("source_fixture", "gaussian_mesh")
-                          .set("target_fixture", "cubedsphere_mesh")
+CASE("structured columns O96 vector interpolation (2d-field, 2-vector, hi-res)") {
+  const auto config = Config("source_fixture", "structured_columns_hires")
+                          .set("target_fixture", "gaussian_mesh")
                           .set("field_spec_fixture", "2vector")
-                          .set("interp_fixture", "finite_element");
+                          .set("interp_fixture", "structured_linear_spherical")
+                          .set("file_id", "spherical_vector_sc_hr")
+                          .set("tol", 0.000044);
 
   testInterpolation<Rank2dField>((config));
 }
diff --git a/src/tests/interpolation/test_interpolation_structured2D_regional.cc b/src/tests/interpolation/test_interpolation_structured2D_regional.cc
new file mode 100644
index 000000000..8bba8692e
--- /dev/null
+++ b/src/tests/interpolation/test_interpolation_structured2D_regional.cc
@@ -0,0 +1,217 @@
+/*
+ * (C) Copyright 2024 Meteorologisk Institutt
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ */
+
+#include "atlas/array.h"
+#include "atlas/field.h"
+#include "atlas/functionspace/StructuredColumns.h"
+#include "atlas/grid.h"
+#include "atlas/interpolation.h"
+#include "atlas/option.h"
+#include "atlas/util/Config.h"
+
+#include "tests/AtlasTestEnvironment.h"
+
+using atlas::functionspace::StructuredColumns;
+using atlas::util::Config;
+
+namespace atlas {
+namespace test {
+
+//-----------------------------------------------------------------------------
+
+static Config gridConfigs() {
+    Config gridConfigs;
+
+    Config projectionConfig;
+    projectionConfig.set("type", "lambert_conformal_conic");
+    projectionConfig.set("latitude0", 56.3);
+    projectionConfig.set("longitude0", 0.0);
+
+    Config gridConfig;
+    gridConfig.set("type", "regional");
+    std::vector<double> lonlat = {9.9, 56.3};
+    gridConfig.set("lonlat(centre)", lonlat);
+    gridConfig.set("projection", projectionConfig);
+
+    const size_t sourceNx = 21;
+    const size_t sourceNy = 31;
+    const double sourceDx = 8.0e3;
+    const double sourceDy = 9.0e3;
+
+    const size_t xFactor = 4;
+    const size_t yFactor = 3;
+
+    const size_t targetNx = (sourceNx-1)*xFactor+1;
+    const size_t targetNy = (sourceNy-1)*yFactor+1;
+    const double targetDx = static_cast<double>(sourceNx-1)/static_cast<double>(targetNx-1)*sourceDx;
+    const double targetDy = static_cast<double>(sourceNy-1)/static_cast<double>(targetNy-1)*sourceDy;
+
+    gridConfig.set("nx", sourceNx);
+    gridConfig.set("ny", sourceNy);
+    gridConfig.set("dx", sourceDx);
+    gridConfig.set("dy", sourceDy);
+    gridConfigs.set("source", gridConfig);
+    gridConfigs.set("source normalization", (sourceNx-1)*(sourceNy-1));
+
+    gridConfig.set("nx", targetNx);
+    gridConfig.set("ny", targetNy);
+    gridConfig.set("dx", targetDx);
+    gridConfig.set("dy", targetDy);
+    gridConfigs.set("target", gridConfig);
+    gridConfigs.set("target normalization", (targetNx-1)*(targetNy-1));
+
+    return gridConfigs;
+}
+
+// Dot product
+double dotProd(const Field& field01, const Field& field02) {
+  double dprod{};
+
+  const size_t ndim = field01.levels() > 0 ? 2 : 1;
+  if (ndim == 1) {
+    const auto field01_view = array::make_view<double, 1>(field01);
+    const auto field02_view = array::make_view<double, 1>(field02);
+    for (idx_t i=0; i < field01_view.shape(0); ++i) {
+      dprod += field01_view(i) * field02_view(i);
+    }
+  } else if (ndim == 2) {
+    const auto field01_view = array::make_view<double, 2>(field01);
+    const auto field02_view = array::make_view<double, 2>(field02);
+    for (idx_t l=0; l < field01_view.shape(1); ++l) {
+      for (idx_t i=0; i < field01_view.shape(0); ++i) {
+        dprod += field01_view(i, l) * field02_view(i, l);
+      }
+    }
+  }
+  eckit::mpi::comm().allReduceInPlace(dprod, eckit::mpi::Operation::SUM);
+
+  return dprod;
+}
+
+
+CASE("test_interpolation_structured2D_regional_1d") {
+    Grid sourceGrid(gridConfigs().getSubConfiguration("source"));
+    Grid targetGrid(gridConfigs().getSubConfiguration("target"));
+
+    StructuredColumns sourceFs(sourceGrid, option::halo(1));
+    StructuredColumns targetFs(targetGrid, option::halo(1));
+
+    Interpolation interpolation(Config("type", "regional-linear-2d"), sourceFs, targetFs);
+
+    auto sourceField = sourceFs.createField<double>(Config("name", "source"));
+    auto targetField = targetFs.createField<double>(Config("name", "target"));
+
+    // Accuracy test
+    const auto sourceIView = array::make_view<int, 1>(sourceFs.index_i());
+    const auto sourceJView = array::make_view<int, 1>(sourceFs.index_j());
+    auto sourceView = array::make_view<double, 1>(sourceField);
+    const auto sourceGhostView = atlas::array::make_view<int, 1>(sourceFs.ghost());
+    sourceView.assign(0.0);
+    for (idx_t i = 0; i < sourceFs.size(); ++i) {
+      if (sourceGhostView(i) == 0) {
+        sourceView(i) = static_cast<double>((sourceIView(i)-1)*(sourceJView(i)-1))
+                       /static_cast<double>(gridConfigs().getInt("source normalization"));
+      }
+    }
+
+    interpolation.execute(sourceField, targetField);
+
+    const auto targetIView = array::make_view<int, 1>(targetFs.index_i());
+    const auto targetJView = array::make_view<int, 1>(targetFs.index_j());
+    const auto targetView = array::make_view<double, 1>(targetField);
+    const auto targetGhostView = atlas::array::make_view<int, 1>(targetFs.ghost());
+    const double tolerance = 1.e-12;
+    for (idx_t i = 0; i < targetFs.size(); ++i) {
+      if (targetGhostView(i) == 0) {
+        const double targetTest = static_cast<double>((targetIView(i)-1)*(targetJView(i)-1))
+                                 /static_cast<double>(gridConfigs().getInt("target normalization"));
+        EXPECT_APPROX_EQ(targetView(i), targetTest, tolerance);
+      }
+    }
+
+    // Adjoint test
+    auto targetAdjoint = targetFs.createField<double>();
+    array::make_view<double, 1>(targetAdjoint).assign(array::make_view<double, 1>(targetField));
+    targetAdjoint.adjointHaloExchange();
+
+    auto sourceAdjoint = sourceFs.createField<double>();
+    array::make_view<double, 1>(sourceAdjoint).assign(0.);
+    interpolation.execute_adjoint(sourceAdjoint, targetAdjoint);
+
+    const auto yDotY    = dotProd(targetField, targetField);
+    const auto xDotXAdj = dotProd(sourceField, sourceAdjoint);
+
+    EXPECT_APPROX_EQ(yDotY / xDotXAdj, 1., 1e-14);
+}
+
+
+CASE("test_interpolation_structured2D_regional_2d") {
+    Grid sourceGrid(gridConfigs().getSubConfiguration("source"));
+    Grid targetGrid(gridConfigs().getSubConfiguration("target"));
+
+    const idx_t nlevs = 2;
+    StructuredColumns sourceFs(sourceGrid, option::halo(1) | option::levels(nlevs));
+    StructuredColumns targetFs(targetGrid, option::halo(1) | option::levels(nlevs));
+
+    Interpolation interpolation(Config("type", "regional-linear-2d"), sourceFs, targetFs);
+
+    auto sourceField = sourceFs.createField<double>(Config("name", "source"));
+    auto targetField = targetFs.createField<double>(Config("name", "target"));
+
+    // Accuracy test
+    const auto sourceIView = array::make_view<int, 1>(sourceFs.index_i());
+    const auto sourceJView = array::make_view<int, 1>(sourceFs.index_j());
+    auto sourceView = array::make_view<double, 2>(sourceField);
+    const auto sourceGhostView = atlas::array::make_view<int, 1>(sourceFs.ghost());
+    sourceView.assign(0.0);
+    for (idx_t i = 0; i < sourceFs.size(); ++i) {
+      if (sourceGhostView(i) == 0) {
+        for (idx_t k = 0; k < nlevs; ++k) {
+          sourceView(i, k) = static_cast<double>((sourceIView(i)-1)*(sourceJView(i)-1))
+                           /static_cast<double>(gridConfigs().getInt("source normalization"));
+        }
+      }
+    }
+
+    interpolation.execute(sourceField, targetField);
+
+    const auto targetIView = array::make_view<int, 1>(targetFs.index_i());
+    const auto targetJView = array::make_view<int, 1>(targetFs.index_j());
+    const auto targetView = array::make_view<double, 2>(targetField);
+    const auto targetGhostView = atlas::array::make_view<int, 1>(targetFs.ghost());
+    const double tolerance = 1.e-12;
+    for (idx_t i = 0; i < targetFs.size(); ++i) {
+      if (targetGhostView(i) == 0) {
+        const double targetTest = static_cast<double>((targetIView(i)-1)*(targetJView(i)-1))
+                                 /static_cast<double>(gridConfigs().getInt("target normalization"));
+        for (idx_t k = 0; k < nlevs; ++k) {
+          EXPECT_APPROX_EQ(targetView(i, k), targetTest, tolerance);
+        }
+      }
+    }
+
+    // Adjoint test
+    auto targetAdjoint = targetFs.createField<double>();
+    array::make_view<double, 2>(targetAdjoint).assign(array::make_view<double, 2>(targetField));
+    targetAdjoint.adjointHaloExchange();
+
+    auto sourceAdjoint = sourceFs.createField<double>();
+    array::make_view<double, 2>(sourceAdjoint).assign(0.);
+    interpolation.execute_adjoint(sourceAdjoint, targetAdjoint);
+
+    const auto yDotY    = dotProd(targetField, targetField);
+    const auto xDotXAdj = dotProd(sourceField, sourceAdjoint);
+
+    EXPECT_APPROX_EQ(yDotY / xDotXAdj, 1., 1e-14);
+}
+
+}  // namespace test
+}  // namespace atlas
+
+int main(int argc, char** argv) {
+    return atlas::test::run(argc, argv);
+}
diff --git a/src/tests/mesh/CMakeLists.txt b/src/tests/mesh/CMakeLists.txt
index f9d7825e3..95aa8916b 100644
--- a/src/tests/mesh/CMakeLists.txt
+++ b/src/tests/mesh/CMakeLists.txt
@@ -175,9 +175,9 @@ endforeach()
 
 
 
-atlas_add_cuda_test(
+atlas_add_hic_test(
   TARGET atlas_test_connectivity_kernel
-  SOURCES test_connectivity_kernel.cu
+  SOURCES test_connectivity_kernel.hic
   LIBS atlas
   ENVIRONMENT ${ATLAS_TEST_ENVIRONMENT}
 )
diff --git a/src/tests/mesh/test_connectivity_kernel.cu b/src/tests/mesh/test_connectivity_kernel.hic
similarity index 94%
rename from src/tests/mesh/test_connectivity_kernel.cu
rename to src/tests/mesh/test_connectivity_kernel.hic
index f6e08e301..d897da28b 100644
--- a/src/tests/mesh/test_connectivity_kernel.cu
+++ b/src/tests/mesh/test_connectivity_kernel.hic
@@ -8,7 +8,7 @@
  * does it submit to any jurisdiction.
  */
 
-#include <cuda_runtime.h>
+#include "hic/hic.h"
 #include "atlas/mesh/Connectivity.h"
 #include "tests/AtlasTestEnvironment.h"
 
@@ -83,7 +83,7 @@ CASE( "test_block_connectivity" )
     BlockConnectivity conn;
 
     bool* result;
-    cudaMallocManaged(&result, sizeof(bool));
+    hicMallocManaged(&result, sizeof(bool));
 
     *result = true;
 
@@ -104,7 +104,7 @@ CASE( "test_block_connectivity" )
 
     kernel_block<<<1,1>>>(conn, result);
 
-    cudaDeviceSynchronize();
+    hicDeviceSynchronize();
 
     EXPECT( *result == true );
 
@@ -127,12 +127,12 @@ CASE( "test_irregular_connectivity" )
     EXPECT(conn(0,0) == 1 IN_FORTRAN);
 
     bool* result;
-    cudaMallocManaged(&result, sizeof(bool));
+    hicMallocManaged(&result, sizeof(bool));
     *result = true;
 
     kernel_irr<<<1,1>>>(conn, result);
 
-    cudaDeviceSynchronize();
+    hicDeviceSynchronize();
 
     EXPECT( *result == true );
 
@@ -155,12 +155,12 @@ CASE( "test_multiblock_connectivity" )
 
     EXPECT(conn.block(0)(0,0) == 1 IN_FORTRAN);
     bool* result;
-    cudaMallocManaged(&result, sizeof(bool));
+    hicMallocManaged(&result, sizeof(bool));
     *result = true;
 
     kernel_multiblock<<<1,1>>>(conn, result);
 
-    cudaDeviceSynchronize();
+    hicDeviceSynchronize();
 
     EXPECT( *result == true );
 
diff --git a/src/tests/parallel/CMakeLists.txt b/src/tests/parallel/CMakeLists.txt
index 7922b7af5..4824b9ce1 100644
--- a/src/tests/parallel/CMakeLists.txt
+++ b/src/tests/parallel/CMakeLists.txt
@@ -30,6 +30,17 @@ ecbuild_add_test( TARGET atlas_test_haloexchange
   ENVIRONMENT ${ATLAS_TEST_ENVIRONMENT}
 )
 
+ecbuild_add_test( TARGET atlas_test_haloexchange_on_device
+  MPI        3
+  CONDITION  eckit_HAVE_MPI AND atlas_HAVE_GPU
+  SOURCES    test_haloexchange.cc
+  LIBS       atlas
+  ENVIRONMENT ${ATLAS_TEST_ENVIRONMENT} ATLAS_RUN_NGPUS=1
+)
+if( TEST atlas_test_haloexchange_on_device )
+  set_tests_properties ( atlas_test_haloexchange_on_device PROPERTIES LABELS "gpu")
+endif()
+
 ecbuild_add_test( TARGET atlas_test_gather
   MPI        3
   CONDITION  eckit_HAVE_MPI
diff --git a/src/tests/parallel/test_haloexchange.cc b/src/tests/parallel/test_haloexchange.cc
index 3bf2ccdd2..5009c4054 100644
--- a/src/tests/parallel/test_haloexchange.cc
+++ b/src/tests/parallel/test_haloexchange.cc
@@ -13,6 +13,8 @@
 #include <memory>
 #include <sstream>
 
+#include "hic/hic.h"
+
 #include "atlas/array.h"
 #include "atlas/array/ArrayView.h"
 #include "atlas/array/MakeView.h"
@@ -105,7 +107,7 @@ struct validate {
 };
 
 struct Fixture {
-    Fixture(bool on_device): on_device_(on_device) {
+    Fixture(bool _on_device = false): on_device(_on_device) {
         int nnodes_c[] = {5, 6, 7};
         nb_nodes       = vec(nnodes_c);
         N              = nb_nodes[mpi::comm().rank()];
@@ -147,7 +149,7 @@ struct Fixture {
     std::vector<POD> gidx;
 
     int N;
-    bool on_device_;
+    bool on_device;
 };
 
 //-----------------------------------------------------------------------------
@@ -159,11 +161,15 @@ void test_rank0_arrview(Fixture& f) {
         arrv(j) = (size_t(f.part[j]) != mpi::comm().rank() ? 0 : f.gidx[j]);
     }
 
-    arr.syncHostDevice();
+    if (f.on_device) {
+        arr.updateDevice();
+    }
 
-    f.halo_exchange.execute<POD, 1>(arr, f.on_device_);
+    f.halo_exchange.execute<POD, 1>(arr, f.on_device);
 
-    arr.syncHostDevice();
+    if( f.on_device ) {
+        arr.updateHost();
+    }
 
     switch (mpi::comm().rank()) {
         case 0: {
@@ -192,11 +198,15 @@ void test_rank1(Fixture& f) {
         arrv(j, 1) = (size_t(f.part[j]) != mpi::comm().rank() ? 0 : f.gidx[j] * 100);
     }
 
-    arr.syncHostDevice();
+    if (f.on_device) {
+        arr.updateDevice();
+    }
 
-    f.halo_exchange.execute<POD, 2>(arr, f.on_device_);
+    f.halo_exchange.execute<POD, 2>(arr, f.on_device);
 
-    arr.syncHostDevice();
+    if (f.on_device) {
+        arr.updateHost();
+    }
 
     switch (mpi::comm().rank()) {
         case 0: {
@@ -226,8 +236,6 @@ void test_rank1_strided_v1(Fixture& f) {
         arrv_t(j, 1) = (size_t(f.part[j]) != mpi::comm().rank() ? 0 : f.gidx[j] * 100);
     }
 
-    arr_t.syncHostDevice();
-
     // create a wrap array where we fake the strides in a way that the second
     // dimension
     // (number of components) contains only one component but the associated
@@ -240,19 +248,21 @@ void test_rank1_strided_v1(Fixture& f) {
         array::ArraySpec {
             array::make_shape(f.N, 1),
 #if ATLAS_GRIDTOOLS_STORAGE_BACKEND_CUDA
-                array::make_strides(32, 1)
-        }
+            array::make_strides(32, 1)
 #else
-                array::make_strides(2, 1)
-        }
+            array::make_strides(2, 1)
 #endif
-        ));
+        }));
 
-    arr->syncHostDevice();
+    if (f.on_device) {
+        arr->updateDevice();
+    }
 
-    f.halo_exchange.execute<POD, 2>(*arr, f.on_device_);
+    f.halo_exchange.execute<POD, 2>(*arr, f.on_device);
 
-    arr->syncHostDevice();
+    if (f.on_device) {
+        arr->updateHost();
+    }
 
     switch (mpi::comm().rank()) {
         case 0: {
@@ -282,8 +292,6 @@ void test_rank1_strided_v2(Fixture& f) {
         arrv_t(j, 1) = (size_t(f.part[j]) != mpi::comm().rank() ? 0 : f.gidx[j] * 100);
     }
 
-    arr_t.syncHostDevice();
-
     // create a wrap array where we fake the strides in a way that the second
     // dimension
     // (number of components) contains only one component but the associated
@@ -295,9 +303,9 @@ void test_rank1_strided_v2(Fixture& f) {
         &(arrv_t(0, 1)), array::ArraySpec {
             array::make_shape(f.N, 1),
 #if ATLAS_GRIDTOOLS_STORAGE_BACKEND_CUDA
-                array::make_strides(32, 1)
+            array::make_strides(32, 1)
 #else
-                     array::make_strides(2, 1)
+            array::make_strides(2, 1)
 #endif
         }));
 
@@ -333,11 +341,15 @@ void test_rank2(Fixture& f) {
         }
     }
 
-    arr.syncHostDevice();
+    if (f.on_device) {
+        arr.updateDevice();
+    }
 
-    f.halo_exchange.execute<POD, 3>(arr, f.on_device_);
+    f.halo_exchange.execute<POD, 3>(arr, f.on_device);
 
-    arr.syncHostDevice();
+    if (f.on_device) {
+        arr.updateHost();
+    }
 
     switch (mpi::comm().rank()) {
         case 0: {
@@ -373,23 +385,26 @@ void test_rank2_l1(Fixture& f) {
                 (size_t(f.part[p]) != mpi::comm().rank() ? 0 : f.gidx[p] * std::pow(10, i));
         }
     }
-    arr_t.syncHostDevice();
 
     std::unique_ptr<array::Array> arr(array::Array::wrap<POD>(
         arrv_t.data(), array::ArraySpec {
             array::make_shape(f.N, 1, 2),
 #if ATLAS_GRIDTOOLS_STORAGE_BACKEND_CUDA
-                array::make_strides(96, 32, 1)
+            array::make_strides(96, 32, 1)
 #else
-         array::make_strides(6, 2, 1)
+            array::make_strides(6, 2, 1)
 #endif
         }));
 
-    arr_t.syncHostDevice();
+    if (f.on_device) {
+        arr->updateDevice();
+    }
 
-    f.halo_exchange.execute<POD, 3>(*arr, false);
+    f.halo_exchange.execute<POD, 3>(*arr, f.on_device);
 
-    arr_t.syncHostDevice();
+    if (f.on_device) {
+        arr->updateHost();
+    }
 
     switch (mpi::comm().rank()) {
         case 0: {
@@ -426,7 +441,6 @@ void test_rank2_l1(Fixture& f) {
 }
 
 void test_rank2_l2_v2(Fixture& f) {
-#if ATLAS_GRIDTOOLS_STORAGE_BACKEND_HOST
     // Test rank 2 halo-exchange
     array::ArrayT<POD> arr_t(f.N, 3, 2);
     array::ArrayView<POD, 3> arrv_t = array::make_host_view<POD, 3>(arr_t);
@@ -443,13 +457,22 @@ void test_rank2_l2_v2(Fixture& f) {
         &arrv_t(0, 1, 1), array::ArraySpec {
             array::make_shape(f.N, 1, 1),
 #if ATLAS_GRIDTOOLS_STORAGE_BACKEND_CUDA
-                array::make_strides(192, 32, 1)
+            array::make_strides(192, 32, 1)
 #else
-                     array::make_strides(6, 2, 1)
+            array::make_strides(6, 2, 1)
 #endif
         }));
 
-    f.halo_exchange.execute<POD, 3>(*arr, f.on_device_);
+
+    if (f.on_device) {
+        arr->updateDevice();
+    }
+
+    f.halo_exchange.execute<POD, 3>(*arr, f.on_device);
+
+    if (f.on_device) {
+        arr->updateHost();
+    }
 
     switch (mpi::comm().rank()) {
         case 0: {
@@ -483,11 +506,9 @@ void test_rank2_l2_v2(Fixture& f) {
             break;
         }
     }
-#endif
 }
 
 void test_rank2_v2(Fixture& f) {
-#if ATLAS_GRIDTOOLS_STORAGE_BACKEND_HOST
     array::ArrayT<POD> arr_t(f.N, 3, 2);
     array::ArrayView<POD, 3> arrv_t = array::make_view<POD, 3>(arr_t);
     for (int p = 0; p < f.N; ++p) {
@@ -503,13 +524,21 @@ void test_rank2_v2(Fixture& f) {
         &arrv_t(0, 0, 1), array::ArraySpec {
             array::make_shape(f.N, 3, 1),
 #if ATLAS_GRIDTOOLS_STORAGE_BACKEND_CUDA
-                array::make_strides(192, 32, 2)
+            array::make_strides(192, 32, 2)
 #else
-                     array::make_strides(6, 2, 2)
+            array::make_strides(6, 2, 2)
 #endif
         }));
 
-    f.halo_exchange.execute<POD, 3>(*arr, f.on_device_);
+    if (f.on_device) {
+        arr->updateDevice();
+    }
+
+    f.halo_exchange.execute<POD, 3>(*arr, f.on_device);
+
+    if (f.on_device) {
+        arr->updateHost();
+    }
 
     switch (mpi::comm().rank()) {
         case 0: {
@@ -543,18 +572,21 @@ void test_rank2_v2(Fixture& f) {
             break;
         }
     }
-#endif
 }
 
 void test_rank0_wrap(Fixture& f) {
     std::unique_ptr<array::Array> arr(array::Array::wrap<POD>(f.gidx.data(), array::make_shape(f.N)));
     array::ArrayView<POD, 1> arrv = array::make_view<POD, 1>(*arr);
 
-    arr->syncHostDevice();
+    if (f.on_device) {
+        arr->updateDevice();
+    }
 
-    f.halo_exchange.execute<POD, 1>(*arr, f.on_device_);
+    f.halo_exchange.execute<POD, 1>(*arr, f.on_device);
 
-    arr->syncHostDevice();
+    if (f.on_device) {
+        arr->updateHost();
+    }
 
     switch (mpi::comm().rank()) {
         case 0: {
@@ -642,7 +674,6 @@ void test_rank2_paralleldim2(Fixture& f) {
 }
 
 void test_rank1_cinterface(Fixture& f) {
-#if ATLAS_GRIDTOOLS_STORAGE_BACKEND_HOST
     array::ArrayT<POD> arr(f.N, 2);
     array::ArrayView<POD, 2> arrv = array::make_host_view<POD, 2>(arr);
     for (int j = 0; j < f.N; ++j) {
@@ -650,8 +681,6 @@ void test_rank1_cinterface(Fixture& f) {
         arrv(j, 1) = (size_t(f.part[j]) != mpi::comm().rank() ? 0 : f.gidx[j] * 100);
     }
 
-    arr.syncHostDevice();
-
     int shapes[2]  = {(int)arrv.shape(0), (int)arrv.shape(1)};
     int strides[2] = {(int)arrv.stride(0), (int)arrv.stride(1)};
 
@@ -674,11 +703,10 @@ void test_rank1_cinterface(Fixture& f) {
             break;
         }
     }
-#endif
 }
 
 CASE("test_haloexchange") {
-    Fixture f(false);
+    Fixture f;
 
     SECTION("test_rank0_arrview") { test_rank0_arrview(f); }
 
@@ -701,20 +729,46 @@ CASE("test_haloexchange") {
     SECTION("test_rank1_paralleldim_1") { test_rank1_paralleldim1(f); }
 
     SECTION("test_rank2_paralleldim_2") { test_rank2_paralleldim2(f); }
+
     SECTION("test_rank1_cinterface") { test_rank1_cinterface(f); }
+}
 
-#if ATLAS_GRIDTOOLS_STORAGE_BACKEND_CUDA
-    f.on_device_ = true;
+#if ATLAS_HAVE_GPU
+
+//-----------------------------------------------------------------------------
+
+static int devices() {
+    static int devices_ = [](){
+        int n = 0;
+        auto err = hicGetDeviceCount(&n);
+        if (err != hicSuccess) {
+            n = 0;
+            static_cast<void>(hicGetLastError());
+        }
+        return n;
+    }();
+    return devices_;
+}
+
+CASE("test_haloexchange on device") {
+    if (devices() == 0) {
+        Log::warning() << "\"test_haloexchange on device skipped\": No devices available" << std::endl;
+        return;
+    }
+
+    bool on_device = true;
+    Fixture f(on_device);
 
     SECTION("test_rank0_arrview") { test_rank0_arrview(f); }
 
     SECTION("test_rank1") { test_rank1(f); }
 
     SECTION("test_rank2") { test_rank2(f); }
-    SECTION("test_rank0_wrap") { test_rank0_wrap(f); }
 
-#endif
+    SECTION("test_rank0_wrap") { test_rank0_wrap(f); }
 }
+#endif
+
 
 //-----------------------------------------------------------------------------
 
diff --git a/src/tests/parallel/test_haloexchange_adjoint.cc b/src/tests/parallel/test_haloexchange_adjoint.cc
index 816b68441..392bc3575 100644
--- a/src/tests/parallel/test_haloexchange_adjoint.cc
+++ b/src/tests/parallel/test_haloexchange_adjoint.cc
@@ -183,11 +183,15 @@ void test_rank0_arrview(Fixture& f) {
         }
     }
 
-    arr.syncHostDevice();
+    if (f.on_device_) {
+        arr.syncHostDevice();
+    }
 
     f.halo_exchange_std->execute_adjoint<POD, 1>(arr, f.on_device_);
 
-    arr.syncHostDevice();
+    if (f.on_device_) {
+        arr.syncHostDevice();
+    }
 
     switch (mpi::comm().rank()) {
         case 0: {
@@ -240,11 +244,15 @@ void test_rank0_arrview_adj_test(Fixture& f) {
         arrv(j)      = arrv_init(j);
     }
 
-    arr.syncHostDevice();
+    if (f.on_device_) {
+        arr.syncHostDevice();
+    }
 
     f.halo_exchange_std->execute<POD, 1>(arr, f.on_device_);
 
-    arr.syncHostDevice();
+    if (f.on_device_) {
+        arr.syncHostDevice();
+    }
 
     // sum1
     POD sum1(0);
@@ -252,11 +260,15 @@ void test_rank0_arrview_adj_test(Fixture& f) {
         sum1 += arrv(j) * arrv(j);
     }
 
-    arr.syncHostDevice();
+    if (f.on_device_) {
+        arr.syncHostDevice();
+    }
 
     f.halo_exchange_std->execute_adjoint<POD, 1>(arr, f.on_device_);
 
-    arr.syncHostDevice();
+    if (f.on_device_) {
+        arr.syncHostDevice();
+    }
 
     // sum2
     POD sum2(0);
@@ -299,11 +311,15 @@ void test_rank1(Fixture& f) {
         }
     }
 
-    arr.syncHostDevice();
+    if (f.on_device_) {
+        arr.syncHostDevice();
+    }
 
     f.halo_exchange_std->execute_adjoint<POD, 2>(arr, f.on_device_);
 
-    arr.syncHostDevice();
+    if (f.on_device_) {
+        arr.syncHostDevice();
+    }
 
     switch (mpi::comm().rank()) {
         case 0: {
@@ -336,11 +352,15 @@ void test_rank1_adj_test(Fixture& f) {
         arrv(j, 1ul)      = arrv_init(j, 1ul);
     }
 
-    arr.syncHostDevice();
+    if (f.on_device_) {
+        arr.syncHostDevice();
+    }
 
     f.halo_exchange_std->execute<POD, 2>(arr, f.on_device_);
 
-    arr.syncHostDevice();
+    if (f.on_device_) {
+        arr.syncHostDevice();
+    }
 
     // sum1
     POD sum1(0);
@@ -350,11 +370,15 @@ void test_rank1_adj_test(Fixture& f) {
         }
     }
 
-    arr.syncHostDevice();
+    if (f.on_device_) {
+        arr.syncHostDevice();
+    }
 
     f.halo_exchange_std->execute_adjoint<POD, 2>(arr, f.on_device_);
 
-    arr.syncHostDevice();
+    if (f.on_device_) {
+        arr.syncHostDevice();
+    }
 
     // sum2
     POD sum2(0);
@@ -399,8 +423,6 @@ void test_rank1_strided_v1(Fixture& f) {
         }
     }
 
-    arr_t.syncHostDevice();
-
     // create a wrap array where we fake the strides in a way that the second
     // dimension
     // (number of components) contains only one component but the associated
@@ -460,9 +482,6 @@ void test_rank1_strided_v1_adj_test(Fixture& f) {
         arrv_t(j, 1ul)      = arrv_init_t(j, 1ul);
     }
 
-    arr_init_t.syncHostDevice();
-    arr_t.syncHostDevice();
-
     // create a wrap array where we fake the strides in a way that the second
     // dimension
     // (number of components) contains only one component but the associated
@@ -483,11 +502,15 @@ void test_rank1_strided_v1_adj_test(Fixture& f) {
 #endif
         ));
 
-    arr->syncHostDevice();
+    if (f.on_device_) {
+        arr->syncHostDevice();
+    }
 
     f.halo_exchange_std->execute<POD, 2>(*arr, f.on_device_);
 
-    arr->syncHostDevice();
+    if (f.on_device_) {
+        arr->syncHostDevice();
+    }
 
     // sum1
     POD sum1(0);
@@ -497,11 +520,15 @@ void test_rank1_strided_v1_adj_test(Fixture& f) {
         }
     }
 
-    arr->syncHostDevice();
+    if (f.on_device_) {
+        arr->syncHostDevice();
+    }
 
     f.halo_exchange_std->execute_adjoint<POD, 2>(*arr, f.on_device_);
 
-    arr->syncHostDevice();
+    if (f.on_device_) {
+        arr->syncHostDevice();
+    }
 
     // sum2
     POD sum2(0);
@@ -546,8 +573,6 @@ void test_rank1_strided_v2(Fixture& f) {
         }
     }
 
-    arr_t.syncHostDevice();
-
     // create a wrap array where we fake the strides in a way that the second
     // dimension
     // (number of components) contains only one component but the associated
@@ -565,6 +590,10 @@ void test_rank1_strided_v2(Fixture& f) {
 #endif
         }));
 
+    if (f.on_device_) {
+        arr->syncHostDevice();
+    }
+
     f.halo_exchange_std->execute_adjoint<POD, 2>(*arr, false);
 
     switch (mpi::comm().rank()) {
@@ -599,9 +628,6 @@ void test_rank1_strided_v2_adj_test(Fixture& f) {
         arrv_t(j, 1ul)      = arrv_init_t(j, 1ul);
     }
 
-    arr_init_t.syncHostDevice();
-    arr_t.syncHostDevice();
-
     // create a wrap array where we fake the strides in a way that the second
     // dimension
     // (number of components) contains only one component but the associated
@@ -619,11 +645,15 @@ void test_rank1_strided_v2_adj_test(Fixture& f) {
 #endif
         }));
 
-    arr->syncHostDevice();
+    if (f.on_device_) {
+        arr->syncHostDevice();
+    }
 
     f.halo_exchange_std->execute<POD, 2>(*arr, f.on_device_);
 
-    arr->syncHostDevice();
+    if (f.on_device_) {
+        arr->syncHostDevice();
+    }
 
     // sum1
     POD sum1(0);
@@ -633,11 +663,15 @@ void test_rank1_strided_v2_adj_test(Fixture& f) {
         }
     }
 
-    arr->syncHostDevice();
+    if (f.on_device_) {
+        arr->syncHostDevice();
+    }
 
     f.halo_exchange_std->execute_adjoint<POD, 2>(*arr, f.on_device_);
 
-    arr->syncHostDevice();
+    if (f.on_device_) {
+        arr->syncHostDevice();
+    }
 
     // sum2
     POD sum2(0);
@@ -692,11 +726,15 @@ void test_rank2(Fixture& f) {
         }
     }
 
-    arr.syncHostDevice();
+    if (f.on_device_) {
+        arr.syncHostDevice();
+    }
 
     f.halo_exchange_std->execute_adjoint<POD, 3>(arr, f.on_device_);
 
-    arr.syncHostDevice();
+    if (f.on_device_) {
+        arr.syncHostDevice();
+    }
 
     switch (mpi::comm().rank()) {
         case 0: {
@@ -738,7 +776,9 @@ void test_rank2_adj_test(Fixture& f) {
         }
     }
 
-    arr.syncHostDevice();
+    if (f.on_device_) {
+        arr.syncHostDevice();
+    }
 
     f.halo_exchange_std->execute<POD, 3>(arr, f.on_device_);
 
@@ -752,11 +792,15 @@ void test_rank2_adj_test(Fixture& f) {
         }
     }
 
-    arr.syncHostDevice();
+    if (f.on_device_) {
+        arr.syncHostDevice();
+    }
 
     f.halo_exchange_std->execute_adjoint<POD, 3>(arr, f.on_device_);
 
-    arr.syncHostDevice();
+    if (f.on_device_) {
+        arr.syncHostDevice();
+    }
 
     // sum2
     POD sum2(0);
@@ -810,8 +854,6 @@ void test_rank2_l1(Fixture& f) {
         }
     }
 
-    arr_t.syncHostDevice();
-
     std::unique_ptr<array::Array> arr(array::Array::wrap<POD>(
         arrv_t.data(), array::ArraySpec {
             array::make_shape(f.N, 1, 2),
@@ -822,11 +864,8 @@ void test_rank2_l1(Fixture& f) {
 #endif
         }));
 
-    arr_t.syncHostDevice();
-
     f.halo_exchange_std->execute_adjoint<POD, 3>(*arr, false);
 
-    arr_t.syncHostDevice();
 
     switch (mpi::comm().rank()) {
         case 0: {
@@ -877,7 +916,6 @@ void test_rank2_l1_adj_test(Fixture& f) {
             arrv_t(p, i, static_cast<std::size_t>(1)) = arrv_init_t(p, i, static_cast<std::size_t>(1));
         }
     }
-    arr_t.syncHostDevice();
 
     std::unique_ptr<array::Array> arr(array::Array::wrap<POD>(
         arrv_t.data(), array::ArraySpec {
@@ -889,12 +927,8 @@ void test_rank2_l1_adj_test(Fixture& f) {
 #endif
         }));
 
-    arr_t.syncHostDevice();
-
     f.halo_exchange_std->execute<POD, 3>(*arr, false);
 
-    arr_t.syncHostDevice();
-
     // sum1
     POD sum1(0);
     for (std::size_t p = 0; p < static_cast<std::size_t>(f.N); ++p) {
@@ -905,12 +939,8 @@ void test_rank2_l1_adj_test(Fixture& f) {
         }
     }
 
-    arr->syncHostDevice();
-
     f.halo_exchange_std->execute_adjoint<POD, 3>(*arr, false);
 
-    arr->syncHostDevice();
-
     // sum2
     POD sum2(0);
     for (std::size_t p = 0; p < static_cast<std::size_t>(f.N); ++p) {
@@ -1153,7 +1183,8 @@ void test_rank2_v2(Fixture& f) {
 }
 
 void test_rank0_wrap(Fixture& f) {
-    std::unique_ptr<array::Array> arr(array::Array::wrap<POD>(f.gidx.data(), array::make_shape(f.N)));
+    std::vector<POD> existing = f.gidx;
+    std::unique_ptr<array::Array> arr(array::Array::wrap<POD>(existing.data(), array::make_shape(f.N)));
     array::ArrayView<POD, 1> arrv = array::make_view<POD, 1>(*arr);
     switch (mpi::comm().rank()) {
         case 0: {
@@ -1179,11 +1210,15 @@ void test_rank0_wrap(Fixture& f) {
         }
     }
 
-    arr->syncHostDevice();
+    if (f.on_device_) {
+        arr->syncHostDevice();
+    }
 
     f.halo_exchange_std->execute_adjoint<POD, 1>(*arr, f.on_device_);
 
-    arr->syncHostDevice();
+    if (f.on_device_) {
+        arr->syncHostDevice();
+    }
 
     switch (mpi::comm().rank()) {
         case 0: {
@@ -1202,6 +1237,7 @@ void test_rank0_wrap(Fixture& f) {
             break;
         }
     }
+    arr->deallocateDevice();
 }
 
 void test_rank0_wrap_adj_test(Fixture& f) {
@@ -1226,7 +1262,9 @@ void test_rank0_wrap_adj_test(Fixture& f) {
         arrv_init(j) = arrv(j);
     }
 
-    arr->syncHostDevice();
+    if (f.on_device_) {
+        arr->syncHostDevice();
+    }
 
     f.halo_exchange_std->execute<POD, 1>(*arr, f.on_device_);
 
@@ -1236,11 +1274,15 @@ void test_rank0_wrap_adj_test(Fixture& f) {
         sum1 += arrv(j) * arrv(j);
     }
 
-    arr->syncHostDevice();
+    if (f.on_device_) {
+        arr->syncHostDevice();
+    }
 
     f.halo_exchange_std->execute_adjoint<POD, 1>(*arr, f.on_device_);
 
-    arr->syncHostDevice();
+    if (f.on_device_) {
+        arr->syncHostDevice();
+    }
 
     // sum2
     POD sum2(0);
@@ -1485,7 +1527,9 @@ void test_rank1_cinterface(Fixture& f) {
         }
     }
 
-    arr.syncHostDevice();
+    if (f.on_device_) {
+        arr.syncHostDevice();
+    }
 
     int shapes[2]  = {(int)arrv.shape(0), (int)arrv.shape(1)};
     int strides[2] = {(int)arrv.stride(0), (int)arrv.stride(1)};
diff --git a/src/tests/util/CMakeLists.txt b/src/tests/util/CMakeLists.txt
index 47d8b6def..7c7262ccf 100644
--- a/src/tests/util/CMakeLists.txt
+++ b/src/tests/util/CMakeLists.txt
@@ -94,3 +94,12 @@ ecbuild_add_test( TARGET atlas_test_unitsphere
   ENVIRONMENT ${ATLAS_TEST_ENVIRONMENT}
 )
 
+ecbuild_add_test( TARGET atlas_test_pack_vector_fields
+  SOURCES test_pack_vector_fields.cc
+  LIBS atlas
+  ENVIRONMENT ${ATLAS_TEST_ENVIRONMENT}
+)
+
+
+
+
diff --git a/src/tests/util/test_pack_vector_fields.cc b/src/tests/util/test_pack_vector_fields.cc
new file mode 100644
index 000000000..3c6ea89df
--- /dev/null
+++ b/src/tests/util/test_pack_vector_fields.cc
@@ -0,0 +1,235 @@
+/*
+ * (C) Crown Copyright 2024 Met Office
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ */
+
+#include <string>
+#include <vector>
+
+#include "atlas/array.h"
+#include "atlas/field.h"
+#include "atlas/functionspace.h"
+#include "atlas/grid.h"
+#include "atlas/option.h"
+#include "atlas/util/Config.h"
+#include "atlas/util/PackVectorFields.h"
+#include "tests/AtlasTestEnvironment.h"
+
+namespace atlas {
+namespace test {
+
+FieldSet setFields(const FunctionSpace& functionSpace,
+                   const std::vector<util::Config>& fieldConfigs) {
+  auto fields = FieldSet{};
+
+  // Set unique values to all field elements.
+  auto value = 0;
+  for (const auto& fieldConfig : fieldConfigs) {
+    auto field = fields.add(functionSpace.createField(fieldConfig));
+    for (auto arrayIdx = size_t{0}; arrayIdx < field.size(); arrayIdx++) {
+      field->data<float>()[arrayIdx] = value++;
+    }
+    field.metadata().set("comment", "This field is made with love.");
+    auto vectorFieldName = std::string{};
+    if (fieldConfig.get("vector_field_name", vectorFieldName)) {
+      field.metadata().set("vector_field_name", vectorFieldName);
+    }
+  }
+  return fields;
+}
+
+FieldSet createOrderedTestFields() {
+  const auto grid = Grid("O16");
+  const auto functionSpace = functionspace::StructuredColumns(grid);
+
+  // Note: vector components 0 and 1 are contiguous in field set.
+  auto fieldConfigs = std::vector<util::Config>{};
+  fieldConfigs.push_back(option::name("scalar") | option::levels(1) |
+                         option::datatype(DataType::kind<float>()));
+  fieldConfigs.push_back(option::name("vector_component_0") |
+                         option::levels(1) |
+                         option::datatype(DataType::kind<float>()) |
+                         util::Config{"vector_field_name", "vector"});
+  fieldConfigs.push_back(option::name("vector_component_1") |
+                         option::levels(1) |
+                         option::datatype(DataType::kind<float>()) |
+                         util::Config{"vector_field_name", "vector"});
+
+  return setFields(functionSpace, fieldConfigs);
+}
+
+FieldSet createUnorderedTestFields() {
+  auto fields = FieldSet{};
+
+  const auto grid = Grid("O16");
+  const auto functionSpace = functionspace::StructuredColumns(grid);
+
+  // Note: vector components 0 and 1 are not contiguous in field set.
+  auto fieldConfigs = std::vector<util::Config>{};
+  fieldConfigs.push_back(option::name("vector_component_0") |
+                         option::datatype(DataType::kind<float>()) |
+                         util::Config{"vector_field_name", "vector"});
+  fieldConfigs.push_back(option::name("scalar") |
+                         option::datatype(DataType::kind<float>()));
+  fieldConfigs.push_back(option::name("vector_component_1") |
+                         option::datatype(DataType::kind<float>()) |
+                         util::Config{"vector_field_name", "vector"});
+
+  return setFields(functionSpace, fieldConfigs);
+}
+
+FieldSet createInconsistentRankFields() {
+  auto fields = FieldSet{};
+
+  const auto grid = Grid("O16");
+  const auto functionSpace = functionspace::StructuredColumns(grid);
+
+  // Note: vector components 0 and 1 have different ranks.
+  auto fieldConfigs = std::vector<util::Config>{};
+  fieldConfigs.push_back(option::name("vector_component_0") |
+                         option::levels(10) |
+                         option::datatype(DataType::kind<float>()) |
+                         util::Config{"vector_field_name", "vector"});
+  fieldConfigs.push_back(option::name("scalar") |
+                         option::datatype(DataType::kind<float>()));
+  fieldConfigs.push_back(option::name("vector_component_1") |
+                         option::datatype(DataType::kind<float>()) |
+                         util::Config{"vector_field_name", "vector"});
+
+  return setFields(functionSpace, fieldConfigs);
+}
+
+FieldSet createInconsistentDatatypeFields() {
+  auto fields = FieldSet{};
+
+  const auto grid = Grid("O16");
+  const auto functionSpace = functionspace::StructuredColumns(grid);
+
+  // Note: vector components 0 and 1 have different datatypes.
+  auto fieldConfigs = std::vector<util::Config>{};
+  fieldConfigs.push_back(option::name("vector_component_0") |
+                         option::datatype(DataType::kind<int>()) |
+                         util::Config{"vector_field_name", "vector"});
+  fieldConfigs.push_back(option::name("scalar") |
+                         option::datatype(DataType::kind<float>()));
+  fieldConfigs.push_back(option::name("vector_component_1") |
+                         option::datatype(DataType::kind<float>()) |
+                         util::Config{"vector_field_name", "vector"});
+
+  return setFields(functionSpace, fieldConfigs);
+}
+
+FieldSet createInconsistentLevelsFields() {
+  auto fields = FieldSet{};
+
+  const auto grid = Grid("O16");
+  const auto functionSpace = functionspace::StructuredColumns(grid);
+
+  // Note: vector components 0 and 1 have different number of levels.
+  auto fieldConfigs = std::vector<util::Config>{};
+  fieldConfigs.push_back(option::name("vector_component_0") |
+                         option::levels(10) |
+                         option::datatype(DataType::kind<float>()) |
+                         util::Config{"vector_field_name", "vector"});
+  fieldConfigs.push_back(option::name("scalar") |
+                         option::datatype(DataType::kind<float>()));
+  fieldConfigs.push_back(option::name("vector_component_1") |
+                         option::levels(20) |
+                         option::datatype(DataType::kind<float>()) |
+                         util::Config{"vector_field_name", "vector"});
+
+  return setFields(functionSpace, fieldConfigs);
+}
+
+FieldSet createInconsistentVariablesFields() {
+  auto fields = FieldSet{};
+
+  const auto grid = Grid("O16");
+  const auto functionSpace = functionspace::StructuredColumns(grid);
+
+  // Note: vector components 0 and 1 have different number of variables.
+  auto fieldConfigs = std::vector<util::Config>{};
+  fieldConfigs.push_back(option::name("vector_component_0") |
+                         option::datatype(DataType::kind<float>()) |
+                         option::variables(2) |
+                         util::Config{"vector_field_name", "vector"});
+  fieldConfigs.push_back(option::name("scalar") |
+                         option::datatype(DataType::kind<float>()));
+  fieldConfigs.push_back(option::name("vector_component_1") |
+                         option::datatype(DataType::kind<float>()) |
+                         util::Config{"vector_field_name", "vector"});
+
+  return setFields(functionSpace, fieldConfigs);
+}
+
+void checkTestFields(const FieldSet& fields) {
+  auto value = 0;
+  for (const auto& field : fields) {
+    for (auto arrayIdx = size_t{0}; arrayIdx < field.size(); arrayIdx++) {
+      EXPECT(field->data<float>()[arrayIdx] == value++);
+    }
+    EXPECT(field.metadata().get<std::string>("comment") ==
+           "This field is made with love.");
+  }
+}
+
+CASE("Basic pack and unpack") {
+  const auto fields = createOrderedTestFields();
+
+  const auto packedFields = util::pack_vector_fields(fields);
+
+  EXPECT(!packedFields.has("vector_component_0"));
+  EXPECT(!packedFields.has("vector_component_1"));
+  EXPECT(packedFields.has("vector"));
+  EXPECT(packedFields.has("scalar"));
+
+  const auto unpackedFields = util::unpack_vector_fields(packedFields);
+
+  EXPECT(unpackedFields.has("vector_component_0"));
+  EXPECT(unpackedFields.has("vector_component_1"));
+  EXPECT(!unpackedFields.has("vector"));
+  EXPECT(unpackedFields.has("scalar"));
+
+  checkTestFields(unpackedFields);
+}
+
+CASE("unpack into existing field set") {
+  auto fields = createUnorderedTestFields();
+
+  const auto packedFields = util::pack_vector_fields(fields);
+
+  EXPECT(!packedFields.has("vector_component_0"));
+  EXPECT(!packedFields.has("vector_component_1"));
+  EXPECT(packedFields.has("vector"));
+  EXPECT(packedFields.has("scalar"));
+
+  // Need to unpack into existing field to guarantee field order is preserved.
+  array::make_view<float, 1>(fields["vector_component_0"]).assign(0.);
+  array::make_view<float, 1>(fields["vector_component_1"]).assign(0.);
+  util::unpack_vector_fields(packedFields, fields);
+
+  EXPECT(fields.has("vector_component_0"));
+  EXPECT(fields.has("vector_component_1"));
+  EXPECT(!fields.has("vector"));
+  EXPECT(fields.has("scalar"));
+
+  checkTestFields(fields);
+}
+
+CASE("check that bad inputs throw") {
+  // Try to apply pack to inconsistent field sets.
+  EXPECT_THROWS(util::pack_vector_fields(createInconsistentRankFields()));
+  EXPECT_THROWS(
+      util::pack_vector_fields(createInconsistentDatatypeFields()));
+  EXPECT_THROWS(
+      util::pack_vector_fields(createInconsistentLevelsFields()));
+  EXPECT_THROWS(
+      util::pack_vector_fields(createInconsistentVariablesFields()));
+}
+
+}  // namespace test
+}  // namespace atlas
+
+int main(int argc, char** argv) { return atlas::test::run(argc, argv); }
diff --git a/tools/atlas-run b/tools/atlas-run
index cd5d91252..39a062bd5 100755
--- a/tools/atlas-run
+++ b/tools/atlas-run
@@ -55,10 +55,13 @@ then
 elif command_exists srun ; then
   LAUNCH="srun ${ATLAS_RUN_MPI_ARGS}"
   if [ -n "${ATLAS_RUN_NGPUS}" ]; then
-    LAUNCH="${LAUNCH} --gres=gpu:${ATLAS_RUN_NGPUS}"
+    LAUNCH="${LAUNCH} --gpus-per-task=${ATLAS_RUN_NGPUS}"
   fi
   if [ -z "${ATLAS_RUN_NPROCS}" ]; then
     LAUNCH="${LAUNCH} -n 1"
+    if [ -n "${SLURM_GPUS}" ]; then
+      LAUNCH="${LAUNCH} --gpus-per-task=1"
+    fi
   else
     LAUNCH="${LAUNCH} -n ${ATLAS_RUN_NPROCS}"
   fi
diff --git a/tools/c2f.py b/tools/c2f.py
index cb30c421c..652b65935 100755
--- a/tools/c2f.py
+++ b/tools/c2f.py
@@ -7,9 +7,8 @@
 # granted to it by virtue of its status as an intergovernmental organisation nor
 # does it submit to any jurisdiction.
 
-#! /usr/bin/env python
+#! /usr/bin/env python3
 
-from __future__ import print_function
 from argparse import ArgumentParser
 import re
 import time
@@ -50,7 +49,7 @@
     'logical(c_bool)':'c_bool'
 }
 
-function_signature = re.compile('''^
+function_signature = re.compile(r'''^
 (                                            #1 Type
   (const\s+)?                                #2 Leading const
   ([^\*\&\s\[]+)                             #3
@@ -72,7 +71,7 @@ def __init__(self,msg):
 
 class Argument:
 
-    arg_signature = re.compile('''^
+    arg_signature = re.compile(r'''^
     (                                            #1 Type
       (const\s+)?                                #2 Leading const
       ([^\*\&\s\[]+)                             #3
@@ -180,9 +179,6 @@ def __init__(self,line):
                 except KeyError:
                     raise ParsingFailed("Could not parse return type for statement "+line)
 
-    def __nonzero__(self): # python 2
-        return self.type != "void"
-
     def __bool__(self): # python 3
         return self.type != "void"
         
@@ -304,9 +300,9 @@ def statements(self):
 
 with open(input,'r') as file:
     content = file.read()
-    externC = re.compile('^extern\s+"C"(\s*{)?')
+    externC = re.compile(r'^extern\s+"C"(\s*{)?')
     
-    regex_externC = [re.compile('^extern\s+"C"(\s*{)?'),re.compile('^{')]
+    regex_externC = [re.compile(r'^extern\s+"C"(\s*{)?'),re.compile('^{')]
     in_externC = [False,False]
 
     for line in content.splitlines():