diff --git a/CMakeLists.txt b/CMakeLists.txt
index c03f5c78..2830e644 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -154,15 +154,12 @@ include(cmake/register_models.cmake)
 register_model(serial SERIAL SerialStream.cpp)
 register_model(omp OMP OMPStream.cpp)
 register_model(ocl OCL OCLStream.cpp)
-register_model(std-data STD_DATA STDDataStream.cpp)
-register_model(std-indices STD_INDICES STDIndicesStream.cpp)
-register_model(std-ranges STD_RANGES STDRangesStream.cpp)
+register_model(std STD STDStream.cpp)
 register_model(hip HIP HIPStream.cpp)
 register_model(cuda CUDA CUDAStream.cu)
 register_model(kokkos KOKKOS KokkosStream.cpp)
 register_model(sycl SYCL SYCLStream.cpp)
-register_model(sycl2020-acc SYCL2020 SYCLStream2020.cpp)
-register_model(sycl2020-usm SYCL2020 SYCLStream2020.cpp)
+register_model(sycl2020 SYCL2020 SYCLStream2020.cpp)
 register_model(acc ACC ACCStream.cpp)
 # defining RAJA collides with the RAJA namespace so USE_RAJA
 register_model(raja USE_RAJA RAJAStream.cpp)
diff --git a/src/Stream.h b/src/Stream.h
index c8c6af1c..f233f54a 100644
--- a/src/Stream.h
+++ b/src/Stream.h
@@ -7,11 +7,14 @@
 
 #pragma once
 
+#include <cstdint>
 #include <array>
 #include <vector>
 #include <string>
 #include "benchmark.h"
 
+using std::intptr_t;
+
 template <class T>
 class Stream
 {
diff --git a/src/StreamModels.h b/src/StreamModels.h
index b13d5b34..820c08a4 100644
--- a/src/StreamModels.h
+++ b/src/StreamModels.h
@@ -3,12 +3,8 @@
 
 #if defined(CUDA)
 #include "CUDAStream.h"
-#elif defined(STD_DATA)
-#include "STDDataStream.h"
-#elif defined(STD_INDICES)
-#include "STDIndicesStream.h"
-#elif defined(STD_RANGES)
-#include "STDRangesStream.hpp"
+#elif defined(STD)
+#include "STDStream.h"
 #elif defined(TBB)
 #include "TBBStream.hpp"
 #elif defined(THRUST)
@@ -63,17 +59,9 @@ std::unique_ptr<Stream<T>> make_stream(Args... args) {
   // Use the Kokkos implementation
   return std::make_unique<KokkosStream<T>>(args...);
 
-#elif defined(STD_DATA)
+#elif defined(STD)
   // Use the C++ STD data-oriented implementation
-  return std::make_unique<STDDataStream<T>>(args...);
-
-#elif defined(STD_INDICES)
-  // Use the C++ STD index-oriented implementation
-  return std::make_unique<STDIndicesStream<T>>(args...);
-
-#elif defined(STD_RANGES)
-  // Use the C++ STD ranges implementation
-  return std::make_unique<STDRangesStream<T>>(args...);
+  return std::make_unique<STDStream<T>>(args...);
 
 #elif defined(TBB)
   // Use the C++20 implementation
diff --git a/src/ci-test-compile.sh b/src/ci-test-compile.sh
index 57b89afb..d10e8eef 100755
--- a/src/ci-test-compile.sh
+++ b/src/ci-test-compile.sh
@@ -152,9 +152,10 @@ build_gcc() {
       *)   dpl_conditional_flags="-DFETCH_ONEDPL=ON -DFETCH_TBB=ON -DUSE_TBB=ON -DCXX_EXTRA_FLAGS=-D_GLIBCXX_USE_TBB_PAR_BACKEND=0" ;;
     esac
     # some distributions like Ubuntu bionic implements std par with TBB, so conditionally link it here
-    run_build $name "${GCC_CXX:?}" std-data    "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl"
-    run_build $name "${GCC_CXX:?}" std-indices "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl"
-    run_build $name "${GCC_CXX:?}" std-ranges  "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl"
+    run_build $name "${GCC_CXX:?}" std    "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl -DSTDIMPL=DATA17"
+    # Requires GCC 14 and newer CMake for C++23 support    
+    #run_build $name "${GCC_CXX:?}" std    "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl -DSTDIMPL=DATA23"
+    run_build $name "${GCC_CXX:?}" std    "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl -DSTDIMPL=INDICES"
   done
 
   run_build $name "${GCC_CXX:?}" tbb "$cxx -DONE_TBB_DIR=$TBB_LIB"
@@ -251,9 +252,11 @@ build_clang() {
       OFF) dpl_conditional_flags="-DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}" ;;
       *)   dpl_conditional_flags="-DFETCH_ONEDPL=ON -DFETCH_TBB=ON -DUSE_TBB=ON -DCXX_EXTRA_FLAGS=-D_GLIBCXX_USE_TBB_PAR_BACKEND=0"  ;;
     esac
-    run_build $name "${CLANG_CXX:?}" std-data     "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl"
-    run_build $name "${CLANG_CXX:?}" std-indices  "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl"
-    # run_build $name "${CLANG_CXX:?}" std-ranges "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl" # not yet supported
+    run_build $name "${CLANG_CXX:?}" std  "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl -DSTDIMPL=DATA17"
+    # Requires GCC 14 and newer CMake for C++23 support
+    # run_build $name "${CLANG_CXX:?}" std  "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl -DSTDIMPL=DATA23"
+    # TODO: clang is too old
+    #run_build $name "${CLANG_CXX:?}" std  "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl -DSTDIMPL=INDICES"    
   done
 
   run_build $name "${CLANG_CXX:?}" tbb "$cxx -DONE_TBB_DIR=$TBB_LIB"
@@ -270,8 +273,10 @@ build_clang() {
 build_nvhpc() {
   local name="nvhpc_build"
   local cxx="-DCMAKE_CXX_COMPILER=${NVHPC_NVCXX:?}"
-  run_build $name "${NVHPC_NVCXX:?}" std-data "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY"
-  run_build $name "${NVHPC_NVCXX:?}" std-indices "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY"
+  run_build $name "${NVHPC_NVCXX:?}" std "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY -DSTDIMPL=DATA17"
+  # Requires GCC 14 and newer CMake for C++23 support
+  # run_build $name "${NVHPC_NVCXX:?}" std "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY -DSTDIMPL=DATA23"
+  run_build $name "${NVHPC_NVCXX:?}" std "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY -DSTDIMPL=INDICES"  
 
   run_build $name "${NVHPC_NVCXX:?}" acc "$cxx -DTARGET_DEVICE=gpu -DTARGET_PROCESSOR=px -DCUDA_ARCH=$NV_ARCH_CCXY"
   run_build $name "${NVHPC_NVCXX:?}" acc "$cxx -DTARGET_DEVICE=multicore -DTARGET_PROCESSOR=zen"
diff --git a/src/dpl_shim.h b/src/dpl_shim.h
index 226693bd..9b8a7acc 100644
--- a/src/dpl_shim.h
+++ b/src/dpl_shim.h
@@ -29,12 +29,15 @@ T *alloc_raw(size_t size) { return sycl::malloc_shared<T>(size, exe_policy.queue
 template<typename T>
 void dealloc_raw(T *ptr) { sycl::free(ptr, exe_policy.queue()); }
 
+#define WORKAROUND
+
 #else
 
 // auto exe_policy = dpl::execution::seq;
 // auto exe_policy = dpl::execution::par;
 static constexpr auto exe_policy = dpl::execution::par_unseq;
 #define USE_STD_PTR_ALLOC_DEALLOC
+#define WORKAROUND
 
 #endif
 
diff --git a/src/main.cpp b/src/main.cpp
index 604e14a2..af9fac6c 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -332,13 +332,13 @@ void check_solution(const size_t num_times, T const* a, T const* b, T const* c,
   size_t failed = 0;
   T max_rel = std::numeric_limits<T>::epsilon() * T(100.0);
   T max_rel_dot = std::numeric_limits<T>::epsilon() * T(10000000.0);
-  auto check = [&](const char* name, T is, T should, T max_rel, size_t i = size_t(-1)) {
+  auto check = [&](const char* name, T is, T should, T mrel, size_t i = size_t(-1)) {
     // Relative difference:
     T diff = std::abs(is - should);
     T abs_is = std::abs(is);
     T abs_sh = std::abs(should);
     T largest = std::max(abs_is, abs_sh);
-    T same = diff <= largest * max_rel;
+    T same = diff <= largest * mrel;
     if (!same || std::isnan(is)) {
       ++failed;
       if (failed > 10) return;
@@ -346,7 +346,8 @@ void check_solution(const size_t num_times, T const* a, T const* b, T const* c,
       if (i != size_t(-1)) std::cerr << "[" << i << "]";
       std::cerr << ": " << is << " (is) != " << should
 		<< " (should)" << ", diff=" << diff << " > "
-		<< largest * max_rel << std::endl;
+		<< largest * mrel << " (largest=" << largest
+		<< ", max_rel=" << mrel << ")" << std::endl;
     }
   };
 
@@ -360,9 +361,9 @@ void check_solution(const size_t num_times, T const* a, T const* b, T const* c,
 
   // Calculate the L^infty-norm relative error
   for (size_t i = 0; i < array_size; ++i) {
-    check("a", a[i], goldA, i, max_rel);
-    check("b", b[i], goldB, i, max_rel);
-    check("c", c[i], goldC, i, max_rel);
+    check("a", a[i], goldA, max_rel, i);
+    check("b", b[i], goldB, max_rel, i);
+    check("c", c[i], goldC, max_rel, i);
   }
 
   if (failed > 0 && !silence_errors)
diff --git a/src/std-data/STDDataStream.cpp b/src/std-data/STDDataStream.cpp
deleted file mode 100644
index 8c280f8a..00000000
--- a/src/std-data/STDDataStream.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
-// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
-// Updated 2021 by University of Bristol
-//
-// For full license terms please see the LICENSE file distributed with this
-// source code
-
-#include "STDDataStream.h"
-
-template <class T>
-STDDataStream<T>::STDDataStream(BenchId bs, const intptr_t array_size, const int device_id,
-				T initA, T initB, T initC)
-  noexcept : array_size{array_size},
-  a(alloc_raw<T>(array_size)), b(alloc_raw<T>(array_size)), c(alloc_raw<T>(array_size))
-{
-    std::cout << "Backing storage typeid: " << typeid(a).name() << std::endl;
-#ifdef USE_ONEDPL
-    std::cout << "Using oneDPL backend: ";
-#if ONEDPL_USE_DPCPP_BACKEND
-    std::cout << "SYCL USM (device=" << exe_policy.queue().get_device().get_info<sycl::info::device::name>() << ")";
-#elif ONEDPL_USE_TBB_BACKEND
-    std::cout << "TBB " TBB_VERSION_STRING;
-#elif ONEDPL_USE_OPENMP_BACKEND
-    std::cout << "OpenMP";
-#else
-    std::cout << "Default";
-#endif
-    std::cout << std::endl;
-#endif
-    init_arrays(initA, initB, initC);
-}
-
-template<class T>
-STDDataStream<T>::~STDDataStream() {
-  dealloc_raw(a);
-  dealloc_raw(b);
-  dealloc_raw(c);
-}
-
-template <class T>
-void STDDataStream<T>::init_arrays(T initA, T initB, T initC)
-{
-  std::fill(exe_policy, a, a + array_size, initA);
-  std::fill(exe_policy, b, b + array_size, initB);
-  std::fill(exe_policy, c, c + array_size, initC);
-}
-
-template <class T>
-void STDDataStream<T>::get_arrays(T const*& h_a, T const*& h_b, T const*& h_c)
-{
-  h_a = a;
-  h_b = b;
-  h_c = c;
-}
-
-template <class T>
-void STDDataStream<T>::copy()
-{
-  // c[i] = a[i]
-  std::copy(exe_policy, a, a + array_size, c);
-}
-
-template <class T>
-void STDDataStream<T>::mul()
-{
-  //  b[i] = scalar * c[i];
-  std::transform(exe_policy, c, c + array_size, b, [scalar = startScalar](T ci){ return scalar*ci; });
-}
-
-template <class T>
-void STDDataStream<T>::add()
-{
-  //  c[i] = a[i] + b[i];
-  std::transform(exe_policy, a, a + array_size, b, c, std::plus<T>());
-}
-
-template <class T>
-void STDDataStream<T>::triad()
-{
-  //  a[i] = b[i] + scalar * c[i];
-  std::transform(exe_policy, b, b + array_size, c, a, [scalar = startScalar](T bi, T ci){ return bi+scalar*ci; });
-}
-
-template <class T>
-void STDDataStream<T>::nstream()
-{
-  //  a[i] += b[i] + scalar * c[i];
-  //  Need to do in two stages with C++11 STL.
-  //  1: a[i] += b[i]
-  //  2: a[i] += scalar * c[i];
-  std::transform(exe_policy, a, a + array_size, b, a, [](T ai, T bi){ return ai + bi; });
-  std::transform(exe_policy, a, a + array_size, c, a, [scalar = startScalar](T ai, T ci){ return ai + scalar*ci; });
-}
-   
-
-template <class T>
-T STDDataStream<T>::dot()
-{
-  // sum = 0; sum += a[i]*b[i]; return sum;
-  return std::transform_reduce(exe_policy, a, a + array_size, b, T{});
-}
-
-void listDevices(void)
-{
-  std::cout << "Listing devices is not supported by the Parallel STL" << std::endl;
-}
-
-std::string getDeviceName(const int)
-{
-  return std::string("Device name unavailable");
-}
-
-std::string getDeviceDriver(const int)
-{
-  return std::string("Device driver unavailable");
-}
-template class STDDataStream<float>;
-template class STDDataStream<double>;
diff --git a/src/std-indices/STDIndicesStream.cpp b/src/std-indices/STDIndicesStream.cpp
deleted file mode 100644
index 4f8efe20..00000000
--- a/src/std-indices/STDIndicesStream.cpp
+++ /dev/null
@@ -1,128 +0,0 @@
-// Copyright (c) 2021 Tom Deakin and Tom Lin
-// University of Bristol HPC
-//
-// For full license terms please see the LICENSE file distributed with this
-// source code
-
-#include "STDIndicesStream.h"
-
-#ifndef ALIGNMENT
-#define ALIGNMENT (2*1024*1024) // 2MB
-#endif
-
-template <class T>
-STDIndicesStream<T>::STDIndicesStream(BenchId bs, const intptr_t array_size, const int device_id,
-				      T initA, T initB, T initC)
-noexcept : array_size{array_size}, range(0, array_size),
-  a(alloc_raw<T>(array_size)), b(alloc_raw<T>(array_size)), c(alloc_raw<T>(array_size))
-{
-    std::cout << "Backing storage typeid: " << typeid(a).name() << std::endl;
-#ifdef USE_ONEDPL
-    std::cout << "Using oneDPL backend: ";
-#if ONEDPL_USE_DPCPP_BACKEND
-    std::cout << "SYCL USM (device=" << exe_policy.queue().get_device().get_info<sycl::info::device::name>() << ")";
-#elif ONEDPL_USE_TBB_BACKEND
-    std::cout << "TBB " TBB_VERSION_STRING;
-#elif ONEDPL_USE_OPENMP_BACKEND
-    std::cout << "OpenMP";
-#else
-    std::cout << "Default";
-#endif
-    std::cout << std::endl;
-#endif
-    init_arrays(initA, initB, initC);
-}
-
-template<class T>
-STDIndicesStream<T>::~STDIndicesStream() {
-  dealloc_raw(a);
-  dealloc_raw(b);
-  dealloc_raw(c);
-}
-
-template <class T>
-void STDIndicesStream<T>::init_arrays(T initA, T initB, T initC)
-{
-  std::fill(exe_policy, a, a + array_size, initA);
-  std::fill(exe_policy, b, b + array_size, initB);
-  std::fill(exe_policy, c, c + array_size, initC);
-}
-
-template <class T>
-void STDIndicesStream<T>::get_arrays(T const*& h_a, T const*& h_b, T const*& h_c)
-{
-  h_a = a;
-  h_b = b;
-  h_c = c;
-}
-
-template <class T>
-void STDIndicesStream<T>::copy()
-{
-  // c[i] = a[i]
-  std::copy(exe_policy, a, a + array_size, c);
-}
-
-template <class T>
-void STDIndicesStream<T>::mul()
-{
-  //  b[i] = scalar * c[i];
-  std::transform(exe_policy, range.begin(), range.end(), b, [c = this->c, scalar = startScalar](intptr_t i) {
-    return scalar * c[i];
-  });
-}
-
-template <class T>
-void STDIndicesStream<T>::add()
-{
-  //  c[i] = a[i] + b[i];
-  std::transform(exe_policy, range.begin(), range.end(), c, [a = this->a, b = this->b](intptr_t i) {
-    return a[i] + b[i];
-  });
-}
-
-template <class T>
-void STDIndicesStream<T>::triad()
-{
-  //  a[i] = b[i] + scalar * c[i];
-  std::transform(exe_policy, range.begin(), range.end(), a, [b = this->b, c = this->c, scalar = startScalar](intptr_t i) {
-    return b[i] + scalar * c[i];
-  });
-}
-
-template <class T>
-void STDIndicesStream<T>::nstream()
-{
-  //  a[i] += b[i] + scalar * c[i];
-  //  Need to do in two stages with C++11 STL.
-  //  1: a[i] += b[i]
-  //  2: a[i] += scalar * c[i];
-  std::transform(exe_policy, range.begin(), range.end(), a, [a = this->a, b = this->b, c = this->c, scalar = startScalar](intptr_t i) {
-    return a[i] + b[i] + scalar * c[i];
-  });
-}
-   
-
-template <class T>
-T STDIndicesStream<T>::dot()
-{
-  // sum = 0; sum += a[i]*b[i]; return sum;
-  return std::transform_reduce(exe_policy, a, a + array_size, b, T{});
-}
-
-void listDevices(void)
-{
-  std::cout << "Listing devices is not supported by the Parallel STL" << std::endl;
-}
-
-std::string getDeviceName(const int)
-{
-  return std::string("Device name unavailable");
-}
-
-std::string getDeviceDriver(const int)
-{
-  return std::string("Device driver unavailable");
-}
-template class STDIndicesStream<float>;
-template class STDIndicesStream<double>;
diff --git a/src/std-indices/STDIndicesStream.h b/src/std-indices/STDIndicesStream.h
deleted file mode 100644
index 7a43b1ec..00000000
--- a/src/std-indices/STDIndicesStream.h
+++ /dev/null
@@ -1,97 +0,0 @@
-// Copyright (c) 2021 Tom Deakin and Tom Lin
-// University of Bristol HPC
-//
-// For full license terms please see the LICENSE file distributed with this
-// source code
-
-#pragma once
-#include "dpl_shim.h"
-
-#include <iostream>
-#include <stdexcept>
-#include "Stream.h"
-
-#define IMPLEMENTATION_STRING "STD (index-oriented)"
-
-// A lightweight counting iterator which will be used by the STL algorithms
-// NB: C++ <= 17 doesn't have this built-in, and it's only added later in ranges-v3 (C++2a) which this
-// implementation doesn't target
-template <typename N>
-class ranged {
-public:
-  class iterator {
-    friend class ranged;
-    public:
-    using difference_type = N;
-    using value_type = N;
-    using pointer = const N*;
-    using reference = N;
-    using iterator_category = std::random_access_iterator_tag;
-
-    // XXX This is not part of the iterator spec, it gets picked up by oneDPL if enabled.
-    // Without this, the DPL SYCL backend collects the iterator data on the host and copies to the device.
-    // This type is unused for any nother STL impl.
-    using is_passed_directly = std::true_type;
-
-    reference operator *() const { return i_; }
-    iterator &operator ++() { ++i_; return *this; }
-    iterator operator ++(int) { iterator copy(*this); ++i_; return copy; }
-
-    iterator &operator --() { --i_; return *this; }
-    iterator operator --(int) { iterator copy(*this); --i_; return copy; }
-
-    iterator &operator +=(N by) { i_+=by; return *this; }
-
-    value_type operator[](const difference_type &i) const { return i_ + i; }
-
-    difference_type operator-(const iterator &it) const { return i_ - it.i_; }
-    iterator operator+(const value_type v) const { return iterator(i_ + v); }
-
-    bool operator ==(const iterator &other) const { return i_ == other.i_; }
-    bool operator !=(const iterator &other) const { return i_ != other.i_; }
-    bool operator < (const iterator &other) const { return i_ < other.i_; }
-
-    protected:
-      explicit iterator(N start) : i_ (start) {}
-
-    private:
-        N i_;
-  };
-
-  [[nodiscard]] iterator begin() const { return begin_; }
-  [[nodiscard]] iterator end() const { return end_; }
-  ranged(N begin, N end) : begin_(begin), end_(end) {}
-private:
-  iterator begin_;
-  iterator end_;
-};
-
-template <class T>
-class STDIndicesStream : public Stream<T>
-{
-  protected:
-    // Size of arrays
-    intptr_t array_size;
-
-    // induction range
-    ranged<intptr_t> range;
-
-    // Device side pointers
-    T *a, *b, *c;
-
-  public:
-    STDIndicesStream(BenchId bs, const intptr_t array_size, const int device_id,
-		     T initA, T initB, T initC) noexcept;
-    ~STDIndicesStream();
-
-    void copy() override;
-    void add() override;
-    void mul() override;
-    void triad() override;
-    void nstream() override;
-    T dot() override;
-
-    void get_arrays(T const*& a, T const*& b, T const*& c) override;
-    void init_arrays(T initA, T initB, T initC);
-};
-
diff --git a/src/std-indices/model.cmake b/src/std-indices/model.cmake
deleted file mode 100644
index 60ef575f..00000000
--- a/src/std-indices/model.cmake
+++ /dev/null
@@ -1,53 +0,0 @@
-
-register_flag_optional(CMAKE_CXX_COMPILER
-        "Any CXX compiler that is supported by CMake detection"
-        "c++")
-
-register_flag_optional(NVHPC_OFFLOAD
-        "Enable offloading support (via the non-standard `-stdpar`) for the new NVHPC SDK.
-         The values are Nvidia architectures in ccXY format will be passed in via `-gpu=` (e.g `cc70`)
-
-         Possible values are:
-           cc35  - Compile for compute capability 3.5
-           cc50  - Compile for compute capability 5.0
-           cc60  - Compile for compute capability 6.0
-           cc62  - Compile for compute capability 6.2
-           cc70  - Compile for compute capability 7.0
-           cc72  - Compile for compute capability 7.2
-           cc75  - Compile for compute capability 7.5
-           cc80  - Compile for compute capability 8.0
-           ccall - Compile for all supported compute capabilities"
-        "")
-
-register_flag_optional(USE_TBB
-        "Link against an in-tree oneTBB via FetchContent_Declare, see top level CMakeLists.txt for details."
-        "OFF")
-
-register_flag_optional(USE_ONEDPL
-        "Link oneDPL which implements C++17 executor policies (via execution_policy_tag) for different backends.
-
-        Possible values are:
-          OPENMP - Implements policies using OpenMP.
-                   CMake will handle any flags needed to enable OpenMP if the compiler supports it.
-          TBB    - Implements policies using TBB.
-                   TBB must be linked via USE_TBB or be available in LD_LIBRARY_PATH.
-          DPCPP  - Implements policies through SYCL2020.
-                   This requires the DPC++ compiler (other SYCL compilers are untested), required SYCL flags are added automatically."
-        "OFF")
-
-macro(setup)
-    set(CMAKE_CXX_STANDARD 17)
-    if (NVHPC_OFFLOAD)
-        set(NVHPC_FLAGS -stdpar -gpu=${NVHPC_OFFLOAD})
-        # propagate flags to linker so that it links with the gpu stuff as well
-        register_append_cxx_flags(ANY ${NVHPC_FLAGS})
-        register_append_link_flags(${NVHPC_FLAGS})
-    endif ()
-    if (USE_TBB)
-        register_link_library(TBB::tbb)
-    endif ()
-    if (USE_ONEDPL)
-        register_definitions(USE_ONEDPL)
-        register_link_library(oneDPL)
-    endif ()
-endmacro()
diff --git a/src/std-ranges/STDRangesStream.cpp b/src/std-ranges/STDRangesStream.cpp
deleted file mode 100644
index 02bd56b2..00000000
--- a/src/std-ranges/STDRangesStream.cpp
+++ /dev/null
@@ -1,158 +0,0 @@
-// Copyright (c) 2020 Tom Deakin
-// University of Bristol HPC
-//
-// For full license terms please see the LICENSE file distributed with this
-// source code
-
-#include "STDRangesStream.hpp"
-#include <ranges>
-
-#ifndef ALIGNMENT
-#define ALIGNMENT (2*1024*1024) // 2MB
-#endif
-
-template <class T>
-STDRangesStream<T>::STDRangesStream(BenchId bs, const intptr_t array_size, const int device_id,
-				    T initA, T initB, T initC)
-  noexcept : array_size{array_size},
-	     a(alloc_raw<T>(array_size)), b(alloc_raw<T>(array_size)), c(alloc_raw<T>(array_size))
-{
-    std::cout << "Backing storage typeid: " << typeid(a).name() << std::endl;
-#ifdef USE_ONEDPL
-    std::cout << "Using oneDPL backend: ";
-#if ONEDPL_USE_DPCPP_BACKEND
-    std::cout << "SYCL USM (device=" << exe_policy.queue().get_device().get_info<sycl::info::device::name>() << ")";
-#elif ONEDPL_USE_TBB_BACKEND
-    std::cout << "TBB " TBB_VERSION_STRING;
-#elif ONEDPL_USE_OPENMP_BACKEND
-    std::cout << "OpenMP";
-#else
-    std::cout << "Default";
-#endif
-    std::cout << std::endl;
-#endif
-    init_arrays(initA, initB, initC);
-}
-
-template<class T>
-STDRangesStream<T>::~STDRangesStream() {
-  dealloc_raw(a);
-  dealloc_raw(b);
-  dealloc_raw(c);
-}
-
-template <class T>
-void STDRangesStream<T>::init_arrays(T initA, T initB, T initC)
-{
-  std::for_each_n(
-    exe_policy,
-    std::views::iota((intptr_t)0).begin(), array_size, // loop range
-    [=, this] (intptr_t i) {
-      a[i] = initA;
-      b[i] = initB;
-      c[i] = initC;
-    }
-  );
-}
-
-template <class T>
-void STDRangesStream<T>::get_arrays(T const*& h_a, T const*& h_b, T const*& h_c)
-{
-  h_a = a;
-  h_b = b;
-  h_c = c;
-}
-
-template <class T>
-void STDRangesStream<T>::copy()
-{
-  std::for_each_n(
-    exe_policy,
-    std::views::iota((intptr_t)0).begin(), array_size,
-    [=, this] (intptr_t i) {
-      c[i] = a[i];
-    }
-  );
-}
-
-template <class T>
-void STDRangesStream<T>::mul()
-{
-  const T scalar = startScalar;
-
-  std::for_each_n(
-    exe_policy,
-    std::views::iota((intptr_t)0).begin(), array_size,
-    [=, this] (intptr_t i) {
-      b[i] = scalar * c[i];
-    }
-  );
-}
-
-template <class T>
-void STDRangesStream<T>::add()
-{
-  std::for_each_n(
-    exe_policy,
-    std::views::iota((intptr_t)0).begin(), array_size,
-    [=, this] (intptr_t i) {
-      c[i] = a[i] + b[i];
-    }
-  );
-}
-
-template <class T>
-void STDRangesStream<T>::triad()
-{
-  const T scalar = startScalar;
-
-  std::for_each_n(
-    exe_policy,
-    std::views::iota((intptr_t)0).begin(), array_size,
-    [=, this] (intptr_t i) {
-      a[i] = b[i] + scalar * c[i];
-    }
-  );
-}
-
-template <class T>
-void STDRangesStream<T>::nstream()
-{
-  const T scalar = startScalar;
-
-  std::for_each_n(
-    exe_policy,
-    std::views::iota((intptr_t)0).begin(), array_size,
-    [=, this] (intptr_t i) {
-      a[i] += b[i] + scalar * c[i];
-    }
-  );
-}
-
-template <class T>
-T STDRangesStream<T>::dot()
-{
-  // sum += a[i] * b[i];
-  return
-    std::transform_reduce(
-      exe_policy,
-      a, a + array_size, b, T{});
-}
-
-void listDevices(void)
-{
-  std::cout << "C++20 does not expose devices" << std::endl;
-}
-
-std::string getDeviceName(const int)
-{
-  return std::string("Device name unavailable");
-}
-
-std::string getDeviceDriver(const int)
-{
-  return std::string("Device driver unavailable");
-}
-
-template class STDRangesStream<float>;
-template class STDRangesStream<double>;
diff --git a/src/std-ranges/STDRangesStream.hpp b/src/std-ranges/STDRangesStream.hpp
deleted file mode 100644
index da04f1f4..00000000
--- a/src/std-ranges/STDRangesStream.hpp
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright (c) 2020 Tom Deakin
-// University of Bristol HPC
-//
-// For full license terms please see the LICENSE file distributed with this
-// source code
-
-#pragma once
-#include "dpl_shim.h"
-
-#include <iostream>
-#include <stdexcept>
-#include "Stream.h"
-
-#define IMPLEMENTATION_STRING "STD C++ ranges"
-
-template <class T>
-class STDRangesStream : public Stream<T>
-{
-  protected:
-    // Size of arrays
-    intptr_t array_size;
-
-    // Device side pointers
-    T *a, *b, *c;
-
-  public:
-    STDRangesStream(BenchId bs, const intptr_t array_size, const int device_id,
-		    T initA, T initB, T initC) noexcept;
-    ~STDRangesStream();
-
-    void copy() override;
-    void add() override;
-    void mul() override;
-    void triad() override;
-    void nstream() override;
-    T dot() override;
-
-    void get_arrays(T const*& a, T const*& b, T const*& c) override;
-    void init_arrays(T initA, T initB, T initC);
-};
-
diff --git a/src/std-ranges/model.cmake b/src/std-ranges/model.cmake
deleted file mode 100644
index d7fd6a8b..00000000
--- a/src/std-ranges/model.cmake
+++ /dev/null
@@ -1,68 +0,0 @@
-
-register_flag_optional(CMAKE_CXX_COMPILER
-        "Any CXX compiler that is supported by CMake detection and supports C++20 Ranges"
-        "c++")
-
-register_flag_optional(NVHPC_OFFLOAD
-        "Enable offloading support (via the non-standard `-stdpar=gpu`) for the new NVHPC SDK.
-         The values are Nvidia architectures in ccXY format will be passed in via `-gpu=` (e.g `cc70`)
-
-         Possible values are:
-           cc35  - Compile for compute capability 3.5
-           cc50  - Compile for compute capability 5.0
-           cc60  - Compile for compute capability 6.0
-           cc62  - Compile for compute capability 6.2
-           cc70  - Compile for compute capability 7.0
-           cc72  - Compile for compute capability 7.2
-           cc75  - Compile for compute capability 7.5
-           cc80  - Compile for compute capability 8.0
-           ccall - Compile for all supported compute capabilities"
-        "")
-
-register_flag_optional(USE_TBB
-        "No-op if ONE_TBB_DIR is set. Link against an in-tree oneTBB via FetchContent_Declare, see top level CMakeLists.txt for details."
-        "OFF")
-
-register_flag_optional(USE_ONEDPL
-        "Link oneDPL which implements C++17 executor policies (via execution_policy_tag) for different backends.
-
-        Possible values are:
-          OPENMP - Implements policies using OpenMP.
-                   CMake will handle any flags needed to enable OpenMP if the compiler supports it.
-          TBB    - Implements policies using TBB.
-                   TBB must be linked via USE_TBB or be available in LD_LIBRARY_PATH.
-          DPCPP  - Implements policies through SYCL2020.
-                   This requires the DPC++ compiler (other SYCL compilers are untested), required SYCL flags are added automatically."
-        "OFF")
-
-macro(setup)
-
-    # TODO this needs to eventually be removed when CMake adds proper C++20 support or at least update the flag used here
-
-    # C++ 2a is too new, disable CMake's std flags completely:
-    set(CMAKE_CXX_EXTENSIONS OFF)
-    set(CMAKE_CXX_STANDARD_REQUIRED OFF)
-    unset(CMAKE_CXX_STANDARD) # drop any existing standard we have set by default
-    # and append our own:
-    register_append_cxx_flags(ANY -std=c++20)
-    if (NVHPC_OFFLOAD)
-        set(NVHPC_FLAGS -stdpar=gpu -gpu=${NVHPC_OFFLOAD})
-        # propagate flags to linker so that it links with the gpu stuff as well
-        register_append_cxx_flags(ANY ${NVHPC_FLAGS})
-        register_append_link_flags(${NVHPC_FLAGS})
-    endif ()
-    if (USE_TBB)
-        register_link_library(TBB::tbb)
-    endif ()
-    if (USE_ONEDPL)
-        register_definitions(USE_ONEDPL)
-        register_link_library(oneDPL)
-    endif ()
-endmacro()
-
-macro(setup_target NAME)
-    if (USE_ONEDPL)
-        target_compile_features(${NAME} INTERFACE cxx_std_20)
-        target_compile_features(oneDPL INTERFACE cxx_std_20)
-    endif ()
-endmacro()
diff --git a/src/std/STDStream.cpp b/src/std/STDStream.cpp
new file mode 100644
index 00000000..484ecda2
--- /dev/null
+++ b/src/std/STDStream.cpp
@@ -0,0 +1,208 @@
+// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+// Updated 2021 by University of Bristol
+//
+// For full license terms please see the LICENSE file distributed with this
+// source code
+
+#include "STDStream.h"
+#include <algorithm>
+#include <execution>
+
+#if defined(DATA23) || defined(INDICES)
+#include <ranges>
+#endif
+
+ // OneDPL workaround; TODO: remove this eventually
+#include "dpl_shim.h"
+
+#ifdef INDICES
+// NVHPC workaround: TODO: remove this eventually 
+#if defined(__NVCOMPILER) && defined(_NVHPC_STDPAR_GPU)
+#define WORKAROUND   
+#include <thrust/iterator/counting_iterator.h>
+auto counting_iter(intptr_t i) { return thrust::counting_iterator<intptr_t>(i); }
+auto counting_range(intptr_t b, intptr_t e) {
+  struct R {
+    thrust::counting_iterator<intptr_t> b, e;
+    thrust::counting_iterator<intptr_t> begin() { return b; }
+    thrust::counting_iterator<intptr_t> end() { return e; }
+  };
+  return R { .b = counting_iter(b), .e = counting_iter(e) };
+}
+#else  // NVHPC Workaround
+auto counting_iter(intptr_t i) { return std::views::iota(i).begin(); }
+auto counting_range(intptr_t b, intptr_t e) { return std::views::iota(b, e); }
+#endif // NVHPC Workaround
+#endif // INDICES  
+
+template <class T>
+STDStream<T>::STDStream(BenchId bs, const intptr_t array_size, const int device_id,
+			      T initA, T initB, T initC)
+  noexcept : array_size{array_size},
+  a(alloc_raw<T>(array_size)), b(alloc_raw<T>(array_size)), c(alloc_raw<T>(array_size))
+{
+    std::cout << "Backing storage typeid: " << typeid(a).name() << std::endl;
+#ifdef USE_ONEDPL
+    std::cout << "Using oneDPL backend: ";
+#if ONEDPL_USE_DPCPP_BACKEND
+    std::cout << "SYCL USM (device=" << exe_policy.queue().get_device().get_info<sycl::info::device::name>() << ")";
+#elif ONEDPL_USE_TBB_BACKEND
+    std::cout << "TBB " TBB_VERSION_STRING;
+#elif ONEDPL_USE_OPENMP_BACKEND
+    std::cout << "OpenMP";
+#else
+    std::cout << "Default";
+#endif
+    std::cout << std::endl;
+#endif
+
+#ifdef WORKAROUND
+    std::cout << "Non-conforming implementation: requires non-portable workarounds to run STREAM" << std::endl;
+#endif      
+    init_arrays(initA, initB, initC);
+}
+
+template<class T>
+STDStream<T>::~STDStream() {
+  dealloc_raw(a);
+  dealloc_raw(b);
+  dealloc_raw(c);
+}
+
+template <class T>
+void STDStream<T>::init_arrays(T initA, T initB, T initC)
+{
+  std::fill_n(exe_policy, a, array_size, initA);
+  std::fill_n(exe_policy, b, array_size, initB);
+  std::fill_n(exe_policy, c, array_size, initC);
+}
+
+template <class T>
+void STDStream<T>::get_arrays(T const*& h_a, T const*& h_b, T const*& h_c)
+{
+  h_a = a;
+  h_b = b;
+  h_c = c;
+}
+
+template <class T>
+void STDStream<T>::copy()
+{
+  // c[i] = a[i]
+#if defined(DATA17) || defined(DATA23)
+  std::copy(exe_policy, a, a + array_size, c);
+#elif INDICES
+  std::for_each_n(exe_policy, counting_iter(0), array_size, [a=a,c=c](intptr_t i) {
+      c[i] = a[i];
+  });
+#else
+  #error unimplemented
+#endif  
+}
+
+template <class T>
+void STDStream<T>::mul()
+{
+  //  b[i] = scalar * c[i];
+#if defined(DATA17) || defined(DATA23)  
+  std::transform(exe_policy, c, c + array_size, b, [](T ci){ return startScalar*ci; });
+#elif INDICES
+  std::for_each_n(exe_policy, counting_iter(0), array_size, [b=b, c=c](intptr_t i) {
+    b[i] = startScalar * c[i];
+  });
+#else
+  #error unimplemented
+#endif  
+}
+
+template <class T>
+void STDStream<T>::add()
+{
+  //  c[i] = a[i] + b[i];
+#if defined(DATA17) || defined(DATA23)  
+  std::transform(exe_policy, a, a + array_size, b, c, std::plus<T>());
+#elif INDICES
+  std::for_each_n(exe_policy, counting_iter(0), array_size, [a=a, b=b, c=c](intptr_t i) {
+      c[i] = a[i] + b[i];
+  });
+#else
+  #error unimplemented
+#endif  
+}
+
+template <class T>
+void STDStream<T>::triad()
+{
+  //  a[i] = b[i] + scalar * c[i];
+#if defined(DATA17) || defined(DATA23)
+  std::transform(exe_policy, b, b + array_size, c, a, [scalar = startScalar](T bi, T ci){ return bi+scalar*ci; });
+#elif INDICES
+  std::for_each_n(exe_policy, counting_iter(0), array_size, [a=a, b=b, c=c](intptr_t i) {
+      a[i] = b[i] + startScalar * c[i];
+  });
+#else
+  #error unimplemented
+#endif  
+}
+
+template <class T>
+void STDStream<T>::nstream()
+{
+  //  a[i] += b[i] + scalar * c[i];
+#if defined(DATA17) || defined(DATA23) // Until we can require GCC 14.1
+  //  Need to do in two round-trips with C++17 STL.
+  //  1: a[i] += b[i]
+  //  2: a[i] += scalar * c[i];
+  std::transform(exe_policy, a, a + array_size, b, a, [](T ai, T bi){ return ai + bi; });
+  std::transform(exe_policy, a, a + array_size, c, a, [](T ai, T ci){ return ai + startScalar*ci; });
+#elif DATA23
+  // Requires GCC 14.1 (Ubuntu 24.04):
+  auto as = std::ranges::subrange(a, a + array_size);
+  auto bs = std::ranges::subrange(b, b + array_size);
+  auto cs = std::ranges::subrange(c, c + array_size);
+  std::transform(exe_policy, r.begin(), r.end(), a, [](auto vs) {
+      auto [a, b, c] = vs;
+      return a + b + startScalar * c;
+  });
+#elif INDICES
+  std::for_each_n(exe_policy, counting_iter(0), array_size, [a=a,b=b,c=c](intptr_t i) {
+    a[i] += b[i] + startScalar * c[i];
+  });
+#else
+  #error unimplemented
+#endif  
+}
+   
+
+template <class T>
+T STDStream<T>::dot()
+{
+#if defined(DATA17) || defined(DATA23)  
+  // sum = 0; sum += a[i] * b[i]; return sum;
+  return std::transform_reduce(exe_policy, a, a + array_size, b, T{0});
+#elif INDICES
+  auto r = counting_range(intptr_t(0), array_size);
+  return std::transform_reduce(exe_policy, r.begin(), r.end(), T{0}, std::plus<T>{}, [a=a, b=b](intptr_t i) {
+      return a[i] * b[i];
+  });
+#else
+  #error unimplemented
+#endif
+}
+
+void listDevices(void)
+{
+  std::cout << "Listing devices is not supported by the Parallel STL" << std::endl;
+}
+
+std::string getDeviceName(const int)
+{
+  return std::string("Device name unavailable");
+}
+
+std::string getDeviceDriver(const int)
+{
+  return std::string("Device driver unavailable");
+}
+template class STDStream<float>;
+template class STDStream<double>;
diff --git a/src/std-data/STDDataStream.h b/src/std/STDStream.h
similarity index 69%
rename from src/std-data/STDDataStream.h
rename to src/std/STDStream.h
index 6db998b2..254d68d7 100644
--- a/src/std-data/STDDataStream.h
+++ b/src/std/STDStream.h
@@ -5,17 +5,26 @@
 // source code
 
 #pragma once
-#include "dpl_shim.h"
 
 #include <iostream>
 #include <stdexcept>
 #include "Stream.h"
 
-#define IMPLEMENTATION_STRING "STD (data-oriented)"
+#ifdef DATA17
+#define STDIMPL "DATA17"
+#elif DATA23
+#define STDIMPL "DATA23"
+#elif INDICES
+#define STDIMPL "INDICES"
+#else
+#error unimplemented
+#endif
+
+#define IMPLEMENTATION_STRING "STD (" STDIMPL ")"
 
 
 template <class T>
-class STDDataStream : public Stream<T>
+class STDStream : public Stream<T>
 {
   protected:
     // Size of arrays
@@ -25,9 +34,9 @@ class STDDataStream : public Stream<T>
     T *a, *b, *c;
 
   public:
-    STDDataStream(BenchId bs, const intptr_t array_size, const int device_id,
+    STDStream(BenchId bs, const intptr_t array_size, const int device_id,
 		  T initA, T initB, T initC) noexcept;
-    ~STDDataStream();
+    ~STDStream();
 
     void copy() override;
     void add() override;
diff --git a/src/std-data/model.cmake b/src/std/model.cmake
similarity index 74%
rename from src/std-data/model.cmake
rename to src/std/model.cmake
index 837d26bf..2c24015b 100644
--- a/src/std-data/model.cmake
+++ b/src/std/model.cmake
@@ -1,4 +1,3 @@
-
 register_flag_optional(CMAKE_CXX_COMPILER
         "Any CXX compiler that is supported by CMake detection"
         "c++")
@@ -16,8 +15,10 @@ register_flag_optional(NVHPC_OFFLOAD
            cc72  - Compile for compute capability 7.2
            cc75  - Compile for compute capability 7.5
            cc80  - Compile for compute capability 8.0
-           ccall - Compile for all supported compute capabilities"
-        "")
+           cc90  - Compile for compute capability 8.0
+           ccall - Compile for all supported compute capabilities
+           ccnative - Compiles for compute capability of current device"
+	"")
 
 register_flag_optional(USE_TBB
         "No-op if ONE_TBB_DIR is set. Link against an in-tree oneTBB via FetchContent_Declare, see top level CMakeLists.txt for details."
@@ -35,8 +36,23 @@ register_flag_optional(USE_ONEDPL
                    This requires the DPC++ compiler (other SYCL compilers are untested), required SYCL flags are added automatically."
         "OFF")
 
+register_flag_optional(STDIMPL
+  "Implementation strategy (default = DATA20):
+     DATA17  - Parallel algorithms over data (requires C++17).
+     DATA23  - (default) Parallel algorithms over data (requires C++20).
+     INDICES - Parallel algorithms over indices (requires C++20)."
+  "DATA20"
+)
+
 macro(setup)
-    set(CMAKE_CXX_STANDARD 17)
+    register_definitions(${STDIMPL})
+    if (${STDIMPL} STREQUAL "DATA17")
+      set(CMAKE_CXX_STANDARD 17)
+    elseif (${STDIMPL} STREQUAL "INDICES")
+      set(CMAKE_CXX_STANDARD 20)
+    elseif (${STDIMPL} STREQUAL "DATA23")
+      set(CMAKE_CXX_STANDARD 23)
+    endif ()
     if (NVHPC_OFFLOAD)
         set(NVHPC_FLAGS -stdpar=gpu -gpu=${NVHPC_OFFLOAD})
         # propagate flags to linker so that it links with the gpu stuff as well
@@ -44,7 +60,7 @@ macro(setup)
         register_append_link_flags(${NVHPC_FLAGS})
     endif ()
     if (USE_TBB)
-        register_link_library(TBB::tbb)
+      register_link_library(TBB::tbb)
     endif ()
     if (USE_ONEDPL)
         register_definitions(USE_ONEDPL)
diff --git a/src/sycl2020-acc/SYCLStream2020.cpp b/src/sycl2020-acc/SYCLStream2020.cpp
deleted file mode 100644
index d0f97e68..00000000
--- a/src/sycl2020-acc/SYCLStream2020.cpp
+++ /dev/null
@@ -1,286 +0,0 @@
-
-// Copyright (c) 2015-16 Tom Deakin, Simon McIntosh-Smith,
-// University of Bristol HPC
-//
-// For full license terms please see the LICENSE file distributed with this
-// source code
-
-#include "SYCLStream2020.h"
-
-#include <iostream>
-
-// Cache list of devices
-bool cached = false;
-std::vector<sycl::device> devices;
-void getDeviceList(void);
-
-template <class T>
-SYCLStream<T>::SYCLStream(BenchId bs, const intptr_t array_size, const int device_index,
-			  T initA, T initB, T initC)
-  : array_size(array_size),
-  d_a {array_size},
-  d_b {array_size},
-  d_c {array_size},
-  d_sum {1}
-{
-  if (!cached)
-    getDeviceList();
-
-  if (device_index >= devices.size())
-    throw std::runtime_error("Invalid device index");
-
-  sycl::device dev = devices[device_index];
-
-  // Print out device information
-  std::cout << "Using SYCL device " << getDeviceName(device_index) << std::endl;
-  std::cout << "Driver: " << getDeviceDriver(device_index) << std::endl;
-
-  // Check device can support FP64 if needed
-  if (sizeof(T) == sizeof(double))
-  {
-    if (!dev.has(sycl::aspect::fp64))
-    {
-      throw std::runtime_error("Device does not support double precision, please use --float");
-    }
-  }
-
-  queue = std::make_unique<sycl::queue>(dev, sycl::async_handler{[&](sycl::exception_list l)
-  {
-    bool error = false;
-    for(auto e: l)
-    {
-      try
-      {
-        std::rethrow_exception(e);
-      }
-      catch (sycl::exception e)
-      {
-        std::cout << e.what();
-        error = true;
-      }
-    }
-    if(error)
-    {
-      throw std::runtime_error("SYCL errors detected");
-    }
-  }});
-
-  // No longer need list of devices
-  devices.clear();
-  cached = true;
-
-  init_arrays(initA, initB, initC);
-}
-
-
-template <class T>
-void SYCLStream<T>::copy()
-{
-  queue->submit([&](sycl::handler &cgh)
-  {
-    sycl::accessor ka {d_a, cgh, sycl::read_only};
-    sycl::accessor kc {d_c, cgh, sycl::write_only};
-    cgh.parallel_for(sycl::range<1>{array_size}, [=](sycl::id<1> idx)
-    {
-      kc[idx] = ka[idx];
-    });
-  });
-  queue->wait();
-}
-
-template <class T>
-void SYCLStream<T>::mul()
-{
-  const T scalar = startScalar;
-  queue->submit([&](sycl::handler &cgh)
-  {
-    sycl::accessor kb {d_b, cgh, sycl::write_only};
-    sycl::accessor kc {d_c, cgh, sycl::read_only};
-    cgh.parallel_for(sycl::range<1>{array_size}, [=](sycl::id<1> idx)
-    {
-      kb[idx] = scalar * kc[idx];
-    });
-  });
-  queue->wait();
-}
-
-template <class T>
-void SYCLStream<T>::add()
-{
-  queue->submit([&](sycl::handler &cgh)
-  {
-    sycl::accessor ka {d_a, cgh, sycl::read_only};
-    sycl::accessor kb {d_b, cgh, sycl::read_only};
-    sycl::accessor kc {d_c, cgh, sycl::write_only};
-    cgh.parallel_for(sycl::range<1>{array_size}, [=](sycl::id<1> idx)
-    {
-      kc[idx] = ka[idx] + kb[idx];
-    });
-  });
-  queue->wait();
-}
-
-template <class T>
-void SYCLStream<T>::triad()
-{
-  const T scalar = startScalar;
-  queue->submit([&](sycl::handler &cgh)
-  {
-    sycl::accessor ka {d_a, cgh, sycl::write_only};
-    sycl::accessor kb {d_b, cgh, sycl::read_only};
-    sycl::accessor kc {d_c, cgh, sycl::read_only};
-    cgh.parallel_for(sycl::range<1>{array_size}, [=](sycl::id<1> idx)
-    {
-      ka[idx] = kb[idx] + scalar * kc[idx];
-    });
-  });
-  queue->wait();
-}
-
-template <class T>
-void SYCLStream<T>::nstream()
-{
-  const T scalar = startScalar;
-
-  queue->submit([&](sycl::handler &cgh)
-  {
-    sycl::accessor ka {d_a, cgh};
-    sycl::accessor kb {d_b, cgh, sycl::read_only};
-    sycl::accessor kc {d_c, cgh, sycl::read_only};
-    cgh.parallel_for(sycl::range<1>{array_size}, [=](sycl::id<1> idx)
-    {
-      ka[idx] += kb[idx] + scalar * kc[idx];
-    });
-  });
-  queue->wait();
-}
-
-template <class T>
-T SYCLStream<T>::dot()
-{
-
-  queue->submit([&](sycl::handler &cgh)
-  {
-    sycl::accessor ka {d_a, cgh, sycl::read_only};
-    sycl::accessor kb {d_b, cgh, sycl::read_only};
-
-    cgh.parallel_for(sycl::range<1>{array_size},
-      // Reduction object, to perform summation - initialises the result to zero
-      // AdaptiveCpp doesn't sypport the initialize_to_identity property yet
-#if defined(__HIPSYCL__) || defined(__OPENSYCL__) || defined(__ADAPTIVECPP__)
-      sycl::reduction(d_sum. template get_access<sycl::access_mode::read_write>(cgh), sycl::plus<T>()),
-#else
-      sycl::reduction(sum, sycl::plus<T>(), sycl::property::reduction::initialize_to_identity{}),
-#endif		     
-      [=](sycl::id<1> idx, auto& sum)
-      {
-        sum += ka[idx] * kb[idx];
-      });
-  });
-
-  // Get access on the host, and return a copy of the data (single number)
-  // This will block until the result is available, so no need to wait on the queue.
-  sycl::host_accessor result {d_sum, sycl::read_only};
-  return result[0];
-
-}
-
-template <class T>
-void SYCLStream<T>::init_arrays(T initA, T initB, T initC)
-{
-  queue->submit([&](sycl::handler &cgh)
-  {
-    sycl::accessor ka {d_a, cgh, sycl::write_only, sycl::no_init};
-    sycl::accessor kb {d_b, cgh, sycl::write_only, sycl::no_init};
-    sycl::accessor kc {d_c, cgh, sycl::write_only, sycl::no_init};
-
-    cgh.parallel_for(sycl::range<1>{array_size}, [=](sycl::id<1> idx)
-    {
-      ka[idx] = initA;
-      kb[idx] = initB;
-      kc[idx] = initC;
-    });
-  });
-
-  queue->wait();
-}
-
-template <class T>
-void SYCLStream<T>::get_arrays(T const*& a, T const*& b, T const*& c)
-{
-  sycl::host_accessor _a {d_a, sycl::read_only};
-  sycl::host_accessor _b {d_b, sycl::read_only};
-  sycl::host_accessor _c {d_c, sycl::read_only};
-  a = &_a[0];
-  b = &_b[0];
-  c = &_c[0];
-}
-
-void getDeviceList(void)
-{
-  // Ask SYCL runtime for all devices in system
-  devices = sycl::device::get_devices();
-  cached = true;
-}
-
-void listDevices(void)
-{
-  getDeviceList();
-
-  // Print device names
-  if (devices.size() == 0)
-  {
-    std::cerr << "No devices found." << std::endl;
-  }
-  else
-  {
-    std::cout << std::endl;
-    std::cout << "Devices:" << std::endl;
-    for (int i = 0; i < devices.size(); i++)
-    {
-      std::cout << i << ": " << getDeviceName(i) << std::endl;
-    }
-    std::cout << std::endl;
-  }
-}
-
-std::string getDeviceName(const int device)
-{
-  if (!cached)
-    getDeviceList();
-
-  std::string name;
-
-  if (device < devices.size())
-  {
-    name = devices[device].get_info<sycl::info::device::name>();
-  }
-  else
-  {
-    throw std::runtime_error("Error asking for name for non-existant device");
-  }
-
-  return name;
-}
-
-std::string getDeviceDriver(const int device)
-{
-  if (!cached)
-    getDeviceList();
-
-  std::string driver;
-
-  if (device < devices.size())
-  {
-    driver = devices[device].get_info<sycl::info::device::driver_version>();
-  }
-  else
-  {
-    throw std::runtime_error("Error asking for driver for non-existant device");
-  }
-
-  return driver;
-}
-
-template class SYCLStream<float>;
-template class SYCLStream<double>;
diff --git a/src/sycl2020-acc/model.cmake b/src/sycl2020-acc/model.cmake
deleted file mode 100644
index 9847b348..00000000
--- a/src/sycl2020-acc/model.cmake
+++ /dev/null
@@ -1,92 +0,0 @@
-
-register_flag_optional(CMAKE_CXX_COMPILER
-        "Any CXX compiler that is supported by CMake detection, this is used for host compilation when required by the SYCL compiler"
-        "c++")
-
-register_flag_required(SYCL_COMPILER
-        "Compile using the specified SYCL compiler implementation
-        Supported values are
-           ONEAPI-ICPX  - icpx as a standalone compiler
-           ONEAPI-Clang - oneAPI's Clang driver (enabled via `source /opt/intel/oneapi/setvars.sh  --include-intel-llvm`)
-           DPCPP        - dpc++ as a standalone compiler (https://github.com/intel/llvm)
-           HIPSYCL      - hipSYCL compiler (https://github.com/illuhad/hipSYCL)
-           AdaptiveCpp  - AdaptiveCpp compiler (https://github.com/adaptivecpp/adaptivecpp)")
-
-register_flag_optional(SYCL_COMPILER_DIR
-        "Absolute path to the selected SYCL compiler directory, most are packaged differently so set the path according to `SYCL_COMPILER`:
-           ONEAPI-ICPX   - `icpx` must be used for OneAPI 2023 and later on releases (i.e `source /opt/intel/oneapi/setvars.sh` first)
-           ONEAPI-Clang  - set to the directory that contains the Intel clang++ binary.
-           AdaptiveCpp|HIPSYCL|DPCPP - set to the root of the binary distribution that contains at least `bin/`, `include/`, and `lib/`"
-        "")
-
-macro(setup)
-    set(CMAKE_CXX_STANDARD 17)
-
-
-    if (${SYCL_COMPILER} STREQUAL "AdaptiveCpp")
-        set(adaptivecpp_DIR ${SYCL_COMPILER_DIR}/lib/cmake/adaptivecpp)
-
-        if (NOT EXISTS "${AdaptiveCpp_DIR}")
-            message(WARNING "Falling back to AdaptiveCpp < 0.9.0 CMake structure")
-            set(AdaptiveCpp_DIR ${SYCL_COMPILER_DIR}/lib/cmake)
-        endif ()
-        if (NOT EXISTS "${AdaptiveCpp_DIR}")
-            message(FATAL_ERROR "Can't find the appropriate CMake definitions for AdaptiveCpp")
-        endif ()
-
-        # register_definitions(_GLIBCXX_USE_CXX11_ABI=0)
-        find_package(AdaptiveCpp CONFIG REQUIRED)
-        message(STATUS "ok")
-    elseif (${SYCL_COMPILER} STREQUAL "HIPSYCL")
-        set(hipSYCL_DIR ${SYCL_COMPILER_DIR}/lib/cmake/hipSYCL)
-
-        if (NOT EXISTS "${hipSYCL_DIR}")
-            message(WARNING "Falling back to hipSYCL < 0.9.0 CMake structure")
-            set(hipSYCL_DIR ${SYCL_COMPILER_DIR}/lib/cmake)
-        endif ()
-        if (NOT EXISTS "${hipSYCL_DIR}")
-            message(FATAL_ERROR "Can't find the appropriate CMake definitions for hipSYCL")
-        endif ()
-
-        # register_definitions(_GLIBCXX_USE_CXX11_ABI=0)
-        find_package(hipSYCL CONFIG REQUIRED)
-        message(STATUS "ok")
-    elseif (${SYCL_COMPILER} STREQUAL "DPCPP")
-        set(CMAKE_CXX_COMPILER ${SYCL_COMPILER_DIR}/bin/clang++)
-        include_directories(${SYCL_COMPILER_DIR}/include/sycl)
-        register_append_cxx_flags(ANY -fsycl)
-        register_append_link_flags(-fsycl)
-    elseif (${SYCL_COMPILER} STREQUAL "ONEAPI-ICPX")
-        set(CMAKE_CXX_COMPILER icpx)
-        set(CMAKE_C_COMPILER icx)
-        register_append_cxx_flags(ANY -fsycl)
-        register_append_link_flags(-fsycl)
-    elseif (${SYCL_COMPILER} STREQUAL "ONEAPI-Clang")
-        set(CMAKE_CXX_COMPILER clang++)
-        set(CMAKE_C_COMPILER clang)
-        register_append_cxx_flags(ANY -fsycl)
-        register_append_link_flags(-fsycl)
-    else ()
-        message(FATAL_ERROR "SYCL_COMPILER=${SYCL_COMPILER} is unsupported")
-    endif ()
-
-endmacro()
-
-
-macro(setup_target NAME)
-    if (${SYCL_COMPILER} STREQUAL "AdaptiveCpp")
-        # so AdaptiveCpp has this weird (and bad) CMake usage where they append their
-        # own custom integration header flags AFTER the target has been specified
-        # hence this macro here
-        add_sycl_to_target(
-                TARGET ${NAME}
-                SOURCES ${IMPL_SOURCES})
-    elseif (${SYCL_COMPILER} STREQUAL "HIPSYCL")
-        # so hipSYCL has this weird (and bad) CMake usage where they append their
-        # own custom integration header flags AFTER the target has been specified
-        # hence this macro here
-        add_sycl_to_target(
-                TARGET ${NAME}
-                SOURCES ${IMPL_SOURCES})
-    endif ()
-endmacro()
diff --git a/src/sycl2020-usm/SYCLStream2020.h b/src/sycl2020-usm/SYCLStream2020.h
deleted file mode 100644
index c88c87a3..00000000
--- a/src/sycl2020-usm/SYCLStream2020.h
+++ /dev/null
@@ -1,54 +0,0 @@
-
-// Copyright (c) 2015-16 Tom Deakin, Simon McIntosh-Smith,
-// University of Bristol HPC
-//
-// For full license terms please see the LICENSE file distributed with this
-// source code
-
-#pragma once
-
-#include <sstream>
-#include <memory>
-
-#include "Stream.h"
-
-#include <sycl/sycl.hpp>
-
-#define IMPLEMENTATION_STRING "SYCL2020 USM"
-
-template <class T>
-class SYCLStream : public Stream<T>
-{
-  protected:
-    // Size of arrays
-    size_t array_size;
-
-    // SYCL objects
-    // Queue is a pointer because we allow device selection
-    std::unique_ptr<sycl::queue> queue;
-
-    // Buffers
-    T *a{};
-    T *b{};
-    T *c{};
-    T *sum{};
-
-  public:
-
-    SYCLStream(BenchId bs, const intptr_t array_size, const int device_index,
-	       T initA, T initB, T initC);
-    ~SYCLStream();
-
-    void copy() override;
-    void add() override;
-    void mul() override;
-    void triad() override;
-    void nstream() override;
-    T    dot() override;
-
-    void get_arrays(T const*& a, T const*& b, T const*& c) override;
-    void init_arrays(T initA, T initB, T initC);
-};
-
-// Populate the devices list
-void getDeviceList(void);
diff --git a/src/sycl2020-usm/SYCLStream2020.cpp b/src/sycl2020/SYCLStream2020.cpp
similarity index 68%
rename from src/sycl2020-usm/SYCLStream2020.cpp
rename to src/sycl2020/SYCLStream2020.cpp
index c8b863ad..56292046 100644
--- a/src/sycl2020-usm/SYCLStream2020.cpp
+++ b/src/sycl2020/SYCLStream2020.cpp
@@ -1,5 +1,5 @@
 
-// Copyright (c) 2015-23 Tom Deakin, Simon McIntosh-Smith, and Tom Lin
+// Copyright (c) 2015-16 Tom Deakin, Simon McIntosh-Smith,
 // University of Bristol HPC
 //
 // For full license terms please see the LICENSE file distributed with this
@@ -61,11 +61,21 @@ SYCLStream<T>::SYCLStream(BenchId bs, const intptr_t array_size, const int devic
     }
   }});
 
+  // Allocate memory
+  #ifdef SYCL2020ACC
+  d_a = sycl::buffer<T>{array_size};
+  d_b = sycl::buffer<T>{array_size};
+  d_c = sycl::buffer<T>{array_size};
+  d_sum = sycl::buffer<T>{1};
+  #elif SYCL2020USM
   a = sycl::malloc_shared<T>(array_size, *queue);
   b = sycl::malloc_shared<T>(array_size, *queue);
   c = sycl::malloc_shared<T>(array_size, *queue);
   sum = sycl::malloc_shared<T>(1, *queue);
-
+  #else
+  #error unimplemented
+  #endif
+  
   // No longer need list of devices
   devices.clear();
   cached = true;
@@ -75,10 +85,12 @@ SYCLStream<T>::SYCLStream(BenchId bs, const intptr_t array_size, const int devic
 
 template<class T>
 SYCLStream<T>::~SYCLStream() {
- sycl::free(a, *queue);
- sycl::free(b, *queue);
- sycl::free(c, *queue);
- sycl::free(sum, *queue);
+#ifdef SYCL2020USM
+  sycl::free(a, *queue);
+  sycl::free(b, *queue);
+  sycl::free(c, *queue);
+  sycl::free(sum, *queue);
+#endif
 }
 
 template <class T>
@@ -86,7 +98,11 @@ void SYCLStream<T>::copy()
 {
   queue->submit([&](sycl::handler &cgh)
   {
-    cgh.parallel_for(sycl::range<1>{array_size}, [=, c = this->c, a = this->a](sycl::id<1> idx)
+#ifdef SYCL2020ACC
+    sycl::accessor a {d_a, cgh, sycl::read_only};
+    sycl::accessor c {d_c, cgh, sycl::write_only};
+#endif    
+    cgh.parallel_for(sycl::range<1>{array_size}, [c=c,a=a](sycl::id<1> idx)
     {
       c[idx] = a[idx];
     });
@@ -100,7 +116,11 @@ void SYCLStream<T>::mul()
   const T scalar = startScalar;
   queue->submit([&](sycl::handler &cgh)
   {
-    cgh.parallel_for(sycl::range<1>{array_size}, [=, b = this->b, c = this->c](sycl::id<1> idx)
+#ifdef SYCL2020ACC
+    sycl::accessor b {d_b, cgh, sycl::write_only};
+    sycl::accessor c {d_c, cgh, sycl::read_only};
+#endif    
+    cgh.parallel_for(sycl::range<1>{array_size}, [=,b=b,c=c](sycl::id<1> idx)
     {
       b[idx] = scalar * c[idx];
     });
@@ -113,7 +133,12 @@ void SYCLStream<T>::add()
 {
   queue->submit([&](sycl::handler &cgh)
   {
-    cgh.parallel_for(sycl::range<1>{array_size}, [=, c = this->c, a = this->a, b = this->b](sycl::id<1> idx)
+#ifdef SYCL2020ACC
+    sycl::accessor a {d_a, cgh, sycl::read_only};
+    sycl::accessor b {d_b, cgh, sycl::read_only};
+    sycl::accessor c {d_c, cgh, sycl::write_only};
+#endif    
+    cgh.parallel_for(sycl::range<1>{array_size}, [c=c,a=a,b=b](sycl::id<1> idx)
     {
       c[idx] = a[idx] + b[idx];
     });
@@ -127,7 +152,12 @@ void SYCLStream<T>::triad()
   const T scalar = startScalar;
   queue->submit([&](sycl::handler &cgh)
   {
-    cgh.parallel_for(sycl::range<1>{array_size}, [=, a = this->a, b = this->b, c = this->c](sycl::id<1> idx)
+#ifdef SYCL2020ACC    
+    sycl::accessor a {d_a, cgh, sycl::write_only};
+    sycl::accessor b {d_b, cgh, sycl::read_only};
+    sycl::accessor c {d_c, cgh, sycl::read_only};
+#endif    
+    cgh.parallel_for(sycl::range<1>{array_size}, [=,a=a,b=b,c=c](sycl::id<1> idx)
     {
       a[idx] = b[idx] + scalar * c[idx];
     });
@@ -139,10 +169,14 @@ template <class T>
 void SYCLStream<T>::nstream()
 {
   const T scalar = startScalar;
-
   queue->submit([&](sycl::handler &cgh)
   {
-    cgh.parallel_for(sycl::range<1>{array_size}, [=, a = this->a, b = this->b, c = this->c](sycl::id<1> idx)
+#if SYCL2020ACC
+    sycl::accessor a {d_a, cgh};
+    sycl::accessor b {d_b, cgh, sycl::read_only};
+    sycl::accessor c {d_c, cgh, sycl::read_only};
+#endif    
+    cgh.parallel_for(sycl::range<1>{array_size}, [=,a=a,b=b,c=c](sycl::id<1> idx)
     {
       a[idx] += b[idx] + scalar * c[idx];
     });
@@ -155,6 +189,10 @@ T SYCLStream<T>::dot()
 {
   queue->submit([&](sycl::handler &cgh)
   {
+#if SYCL2020ACC    
+    sycl::accessor a {d_a, cgh, sycl::read_only};
+    sycl::accessor b {d_b, cgh, sycl::read_only};
+#endif
     cgh.parallel_for(sycl::range<1>{array_size},
       // Reduction object, to perform summation - initialises the result to zero
       // AdaptiveCpp doesn't sypport the initialize_to_identity property yet
@@ -163,7 +201,7 @@ T SYCLStream<T>::dot()
 #else
       sycl::reduction(sum, sycl::plus<T>(), sycl::property::reduction::initialize_to_identity{}),
 #endif
-      [a = this->a, b = this->b](sycl::id<1> idx, auto& sum)
+      [a=a,b=b](sycl::id<1> idx, auto& sum)
       {
         sum += a[idx] * b[idx];
       });
@@ -177,23 +215,32 @@ void SYCLStream<T>::init_arrays(T initA, T initB, T initC)
 {
   queue->submit([&](sycl::handler &cgh)
   {
-    cgh.parallel_for(sycl::range<1>{array_size}, [=, a = this->a, b = this->b, c = this->c](sycl::id<1> idx)
+#if SYCL2020ACC    
+    sycl::accessor a {d_a, cgh, sycl::write_only, sycl::no_init};
+    sycl::accessor b {d_b, cgh, sycl::write_only, sycl::no_init};
+    sycl::accessor c {d_c, cgh, sycl::write_only, sycl::no_init};
+#endif
+    cgh.parallel_for(sycl::range<1>{array_size}, [=,a=a,b=b,c=c](sycl::id<1> idx)
     {
       a[idx] = initA;
       b[idx] = initB;
       c[idx] = initC;
     });
   });
-
   queue->wait();
 }
 
 template <class T>
 void SYCLStream<T>::get_arrays(T const*& h_a, T const*& h_b, T const*& h_c)
 {
-  h_a = a;
-  h_b = b;
-  h_c = c;
+#if SYCL2020ACC
+  sycl::host_accessor a {d_a, sycl::read_only};
+  sycl::host_accessor b {d_b, sycl::read_only};
+  sycl::host_accessor c {d_c, sycl::read_only};
+#endif  
+  h_a = &a[0];
+  h_b = &b[0];
+  h_c = &c[0];
 }
 
 void getDeviceList(void)
diff --git a/src/sycl2020-acc/SYCLStream2020.h b/src/sycl2020/SYCLStream2020.h
similarity index 79%
rename from src/sycl2020-acc/SYCLStream2020.h
rename to src/sycl2020/SYCLStream2020.h
index c0caae2e..3b4e1ef0 100644
--- a/src/sycl2020-acc/SYCLStream2020.h
+++ b/src/sycl2020/SYCLStream2020.h
@@ -14,7 +14,15 @@
 
 #include <sycl/sycl.hpp>
 
-#define IMPLEMENTATION_STRING "SYCL2020 accessors"
+#ifdef SYCL2020ACC
+#define SYCLIMPL "Accessors"
+#elif SYCL2020USM
+#define SYCLIMPL "USM"
+#else
+#error unimplemented
+#endif
+
+#define IMPLEMENTATION_STRING "SYCL2020 " SYCLIMPL
 
 template <class T>
 class SYCLStream : public Stream<T>
@@ -28,16 +36,14 @@ class SYCLStream : public Stream<T>
     std::unique_ptr<sycl::queue> queue;
 
     // Buffers
-    sycl::buffer<T> d_a;
-    sycl::buffer<T> d_b;
-    sycl::buffer<T> d_c;
-    sycl::buffer<T> d_sum;
+    T *a, *b, *c, *sum{};
+    sycl::buffer<T> d_a, d_b, d_c, d_sum;
 
   public:
 
     SYCLStream(BenchId bs, const intptr_t array_size, const int device_id,
 	       T initA, T initB, T initC);
-    ~SYCLStream() = default;
+    ~SYCLStream();
 
     void copy() override;
     void add() override;
diff --git a/src/sycl2020-usm/model.cmake b/src/sycl2020/model.cmake
similarity index 96%
rename from src/sycl2020-usm/model.cmake
rename to src/sycl2020/model.cmake
index 72aa7c40..d6452534 100644
--- a/src/sycl2020-usm/model.cmake
+++ b/src/sycl2020/model.cmake
@@ -19,9 +19,15 @@ register_flag_optional(SYCL_COMPILER_DIR
            AdaptiveCpp|HIPSYCL|DPCPP - set to the root of the binary distribution that contains at least `bin/`, `include/`, and `lib/`"
         "")
 
+register_flag_optional(SYCL_ACCESS
+  "Data access method:
+     - ACCESSOR
+     - USM"
+  "ACCESSOR")
+
 macro(setup)
     set(CMAKE_CXX_STANDARD 17)
-
+    register_definitions(${SYCL_ACCESS})
     if (${SYCL_COMPILER} STREQUAL "AdaptiveCpp")
         set(adaptivecpp_DIR ${SYCL_COMPILER_DIR}/lib/cmake/adaptivecpp)