diff --git a/.gitignore b/.gitignore index b9c427e7..68cfecf8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ build external_dependencies/build external_dependencies/install +.spack-env diff --git a/CMakeLists.txt b/CMakeLists.txt index fd4bc358..d6d1971f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -54,3 +54,9 @@ add_executable( ${CMAKE_CURRENT_SOURCE_DIR}/src/allocator_executor_stream_test.cpp) target_link_libraries(allocator_executor_stream_test PRIVATE HPX::hpx Kokkos::kokkos HPXKokkos::hpx_kokkos buffer_manager) + +add_executable( + minimal_parallel_for + ${CMAKE_CURRENT_SOURCE_DIR}/src/minimal_parallel_for.cpp) +target_link_libraries(minimal_parallel_for + PRIVATE HPX::hpx Kokkos::kokkos HPXKokkos::hpx_kokkos buffer_manager) diff --git a/include/buffer_manager.hpp b/include/buffer_manager.hpp index 3782d151..924c0aaf 100644 --- a/include/buffer_manager.hpp +++ b/include/buffer_manager.hpp @@ -159,6 +159,7 @@ class buffer_recycler { assert(std::get<1>(tuple) == number_of_elements); assert(std::get<2>(tuple) >= 1); std::get<2>(tuple)--; // decrease usage counter + std::cout << "Usage counter is now " << std::get<2>(tuple) << std::endl; if (std::get<2>(tuple) == 0) { // not used anymore? // move to the unused_buffer list instance->unused_buffer_list.push_front(tuple); @@ -174,6 +175,7 @@ class buffer_recycler { assert(std::get<1>(tuple) == number_of_elements); assert(std::get<2>(tuple) >= 1); std::get<2>(tuple)++; // increase usage counter + std::cout << "Usage counter is now " << std::get<2>(tuple) << std::endl; } private: diff --git a/include/kokkos_buffer_util.hpp b/include/kokkos_buffer_util.hpp index e35dc819..7f64dd6a 100644 --- a/include/kokkos_buffer_util.hpp +++ b/include/kokkos_buffer_util.hpp @@ -16,6 +16,7 @@ class recycled_view : public kokkos_type total_elements(kokkos_type::required_allocation_size(args...) / sizeof(element_type)) { //std::cout << "Got buffer for " << total_elements << std::endl; + std::cout << "Creating view" << std::endl; } recycled_view(const recycled_view &other) : kokkos_type(other) @@ -23,6 +24,8 @@ class recycled_view : public kokkos_type total_elements = other.total_elements; // std::cerr << "copy" << std::endl; allocator.increase_usage_counter(other.data(), other.total_elements); + std::cout << "Copying view" << std::endl; + } recycled_view &operator=(const recycled_view &other) @@ -30,6 +33,7 @@ class recycled_view : public kokkos_type kokkos_type::operator=(other); total_elements = other.total_elements; allocator.increase_usage_counter(other.data(), other.total_elements); + std::cout << "Copying view" << std::endl; return *this; } @@ -38,6 +42,7 @@ class recycled_view : public kokkos_type total_elements = other.total_elements; // so that is doesn't matter if deallocate is called in the moved-from object allocator.increase_usage_counter(other.data(), other.total_elements); + std::cout << "Moving view" << std::endl; } recycled_view &operator=(recycled_view &&other) @@ -46,11 +51,13 @@ class recycled_view : public kokkos_type total_elements = other.total_elements; // so that is doesn't matter if deallocate is called in the moved-from object allocator.increase_usage_counter(other.data(), other.total_elements); + std::cout << "Moving view" << std::endl; return *this; } - ~recycled_view(void) + virtual ~recycled_view(void) { + std::cout << "Dellocating view" << std::endl; allocator.deallocate(this->data(), total_elements); } diff --git a/scripts/build_kokkos.sh b/scripts/build_kokkos.sh index 8a8e3f42..850c1888 100755 --- a/scripts/build_kokkos.sh +++ b/scripts/build_kokkos.sh @@ -20,6 +20,6 @@ INSTALL_DIR=${SCRIPTS_DIR}/../external_dependencies/install/kokkos-${CMAKE_BUILD mkdir -p ${BUILD_DIR} pushd ${BUILD_DIR} -cmake -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DHPX_DIR=${HPX_ROOT} -DKokkos_CXX_STANDARD=14 -DKokkos_ARCH_PASCAL61=ON ${CURRENT_CUDA_ARCH_FLAG} -DKokkos_ARCH_HSW=ON -DKokkos_ENABLE_HPX=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_SERIAL=OFF -DKokkos_ENABLE_TESTS=OFF -DKokkos_ENABLE_HPX_ASYNC_DISPATCH=ON -DKokkos_ENABLE_INTERNAL_FENCES=OFF -DKokkos_ENABLE_CUDA_LAMBDA=ON -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} ${SOURCE_DIR} +cmake -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DHPX_DIR=${HPX_ROOT} -DKokkos_CXX_STANDARD=14 -DKokkos_ARCH_PASCAL61=ON ${CURRENT_CUDA_ARCH_FLAG} -DKokkos_ARCH_HSW=ON -DKokkos_ENABLE_HPX=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_TESTS=OFF -DKokkos_ENABLE_HPX_ASYNC_DISPATCH=ON -DKokkos_ENABLE_INTERNAL_FENCES=OFF -DKokkos_ENABLE_CUDA_LAMBDA=ON -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} ${SOURCE_DIR} make -j$(nproc) install popd diff --git a/src/minimal_parallel_for.cpp b/src/minimal_parallel_for.cpp new file mode 100644 index 00000000..7d08eb1a --- /dev/null +++ b/src/minimal_parallel_for.cpp @@ -0,0 +1,51 @@ + +#include +#include +#include + +#include + +#include +#include +#include + +#include "../include/buffer_manager.hpp" +#include "../include/cuda_buffer_util.hpp" +#include "../include/kokkos_buffer_util.hpp" +#include +#include + +constexpr size_t view_size_0 = 10; +constexpr size_t view_size_1 = 50; +template +using kokkos_um_array = Kokkos::View; +template +using recycled_host_view = recycled_view, recycle_std, T>; + + +template +auto get_iteration_policy(const Executor&& executor, const ViewType& view_to_iterate){ + return get_iteration_policy(executor, view_to_iterate); +} + +int main(int argc, char *argv[]) +{ + constexpr size_t passes = 1; + for (size_t pass = 0; pass < passes; pass++) + { + recycled_host_view hostView(view_size_0,view_size_1); + + // works - usage counter goes up to 9 for the hostView + // auto host_space = hpx::kokkos::make_execution_space(); + + // broken - usage counter goes up to 13 for the hostView - goes only down to 2 + auto host_space = hpx::kokkos::make_execution_space(); + auto policy_host = get_iteration_policy(host_space, hostView); + Kokkos::parallel_for( + "host init", + policy_host, + KOKKOS_LAMBDA(int n, int o) { + hostView(n, o) = 1.0; + }); + } +}