Skip to content

Commit

Permalink
Backport test fixes [for 0.2.1]
Browse files Browse the repository at this point in the history
  • Loading branch information
G-071 committed Aug 23, 2023
1 parent fa15cbe commit 6ef2543
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 26 deletions.
57 changes: 34 additions & 23 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,17 @@ cmake_minimum_required(VERSION 3.11)
project(CPPuddle CXX C) # Some random project name
set(CMAKE_CXX_STANDARD 17)

#------------------------------------------------------------------------------------------------------------
# Version

set(CPPUDDLE_VERSION_MAJOR 0)
set(CPPUDDLE_VERSION_MINOR 2)
set(CPPUDDLE_VERSION_PATCH 1)
set(CPPUDDLE_VERSION_STRING "${CPPUDDLE_VERSION_MAJOR}.${CPPUDDLE_VERSION_MINOR}.${CPPUDDLE_VERSION_PATCH}.")

#------------------------------------------------------------------------------------------------------------
# Define Options

option(CPPUDDLE_WITH_TESTS "Build tests/examples" OFF)
set(CPPUDDLE_WITH_DEADLOCK_TEST_REPETITONS "100000" CACHE STRING "Number of repetitions for the aggregation executor deadlock tests")
option(CPPUDDLE_WITH_COUNTERS "Turns on allocations counters. Useful for extended testing" OFF)
Expand Down Expand Up @@ -335,7 +346,7 @@ if (CPPUDDLE_WITH_TESTS)

if (CPPUDDLE_WITH_HPX)
# Concurrency tests
add_test(allocator_concurrency_test.run allocator_hpx_test -t4 --passes 20 --outputfile allocator_concurrency_test.out)
add_test(allocator_concurrency_test.run allocator_hpx_test --hpx:threads=4 --passes 20 --outputfile allocator_concurrency_test.out)
set_tests_properties(allocator_concurrency_test.run PROPERTIES
FIXTURES_SETUP allocator_concurrency_output
PROCESSORS 4
Expand Down Expand Up @@ -386,11 +397,11 @@ if (CPPUDDLE_WITH_TESTS)

# GPU related tests
if (CPPUDDLE_WITH_CUDA)
add_test(allocator_cuda_test.run allocator_cuda_test -t 4)
add_test(allocator_cuda_test.run allocator_cuda_test --hpx:threads=4)
set_tests_properties(allocator_cuda_test.run PROPERTIES
PROCESSORS 4
)
add_test(stream_test.run stream_test -t 4)
add_test(stream_test.run stream_test --hpx:threads=4)
set_tests_properties(stream_test.run PROPERTIES
PROCESSORS 4
)
Expand Down Expand Up @@ -425,12 +436,12 @@ if (CPPUDDLE_WITH_TESTS)
FIXTURES_REQUIRED allocator_kokkos_output
PASS_REGULAR_EXPRESSION "--> Number of bad_allocs that triggered garbage collection: [ ]* 0"
)
add_test(allocator_kokkos_executor_for_loop_test.run allocator_kokkos_executor_for_loop_test -t 4)
add_test(allocator_kokkos_executor_for_loop_test.run allocator_kokkos_executor_for_loop_test --hpx:threads=4)
set_tests_properties(allocator_kokkos_executor_for_loop_test.run PROPERTIES
PROCESSORS 4
)

add_test(aggregation_basic_sequential_test.run work_aggregation_test -t1 --outputfile=aggregation_basic_sequential_test.out --scenario=sequential_test)
add_test(aggregation_basic_sequential_test.run work_aggregation_test --hpx:threads=1 --outputfile=aggregation_basic_sequential_test.out --scenario=sequential_test)
set_tests_properties(aggregation_basic_sequential_test.run PROPERTIES
FIXTURES_SETUP aggregation_basic_sequential_test_output
)
Expand All @@ -450,7 +461,7 @@ if (CPPUDDLE_WITH_TESTS)
PASS_REGULAR_EXPRESSION "--> Number of buffers that were marked as used upon cleanup: [ ]* 0"
)

add_test(aggregation_basic_parallel_test.run work_aggregation_test -t4 --outputfile=aggregation_basic_parallel_test.out --scenario=sequential_test)
add_test(aggregation_basic_parallel_test.run work_aggregation_test --hpx:threads=4 --outputfile=aggregation_basic_parallel_test.out --scenario=sequential_test)
set_tests_properties(aggregation_basic_parallel_test.run PROPERTIES
FIXTURES_SETUP aggregation_basic_parallel_test_output
PROCESSORS 4
Expand All @@ -473,7 +484,7 @@ if (CPPUDDLE_WITH_TESTS)



add_test(aggregation_interruption_test.run work_aggregation_test -t1 --outputfile=aggregation_interruption_test.out --scenario=interruption_test)
add_test(aggregation_interruption_test.run work_aggregation_test --hpx:threads=1 --outputfile=aggregation_interruption_test.out --scenario=interruption_test)
set_tests_properties(aggregation_interruption_test.run PROPERTIES
FIXTURES_SETUP aggregation_interruption_test_output
)
Expand All @@ -490,7 +501,7 @@ if (CPPUDDLE_WITH_TESTS)



add_test(aggregation_failure_test.run work_aggregation_test -t1 --outputfile=aggregation_failure_test.out --scenario=failure_test)
add_test(aggregation_failure_test.run work_aggregation_test --hpx:threads=1 --outputfile=aggregation_failure_test.out --scenario=failure_test)
set_tests_properties(aggregation_failure_test.run PROPERTIES
FIXTURES_SETUP aggregation_failure_test_output
)
Expand All @@ -506,7 +517,7 @@ if (CPPUDDLE_WITH_TESTS)
)


add_test(aggregation_add_pointer_test.run work_aggregation_test -t4 --outputfile=aggregation_add_pointer_test.out --scenario=pointer_add_test)
add_test(aggregation_add_pointer_test.run work_aggregation_test --hpx:threads=4 --outputfile=aggregation_add_pointer_test.out --scenario=pointer_add_test)
set_tests_properties(aggregation_add_pointer_test.run PROPERTIES
FIXTURES_SETUP aggregation_add_pointer_test_output
PROCESSORS 4
Expand All @@ -522,7 +533,7 @@ if (CPPUDDLE_WITH_TESTS)
PASS_REGULAR_EXPRESSION "--> Number of buffers that got requested from this manager: [ ]* 6"
)

add_test(aggregation_add_references_test.run work_aggregation_test -t4 --outputfile=aggregation_add_references_test.out --scenario=references_add_test)
add_test(aggregation_add_references_test.run work_aggregation_test --hpx:threads=4 --outputfile=aggregation_add_references_test.out --scenario=references_add_test)
set_tests_properties(aggregation_add_references_test.run PROPERTIES
FIXTURES_SETUP aggregation_add_references_test_output
PROCESSORS 4
Expand All @@ -547,7 +558,7 @@ if (CPPUDDLE_WITH_TESTS)
message(STATUS "Deadlock check repetitions set to ${deadlock_check_repetitions}")

# Try with few slices -- good to detect deadlocking on errors with the continuations
add_test(aggregation_stream_triad_cpu_eager_test1.run work_aggregation_cpu_triad -t 4 --number_aggregation_executors=1 --number_underlying_executors=2048 --problem_size=25600 --kernel_size=256 --max_slices=2 --repetitions=${deadlock_check_repetitions} --executor_type=EAGER --outputfile=aggregation_stream_triad_cpu_eager_test1.out)
add_test(aggregation_stream_triad_cpu_eager_test1.run work_aggregation_cpu_triad --hpx:threads=4 --number_aggregation_executors=1 --number_underlying_executors=2048 --problem_size=25600 --kernel_size=256 --max_slices=2 --repetitions=${deadlock_check_repetitions} --executor_type=EAGER --outputfile=aggregation_stream_triad_cpu_eager_test1.out)
set_tests_properties(aggregation_stream_triad_cpu_eager_test1.run PROPERTIES
FIXTURES_SETUP aggregation_stream_triad_cpu_eager_test_output1
PROCESSORS 4
Expand All @@ -567,7 +578,7 @@ if (CPPUDDLE_WITH_TESTS)

# Try with odd number of slices
# This would deadlock given the STRICT executor, the EAGER one should have no problem
add_test(aggregation_stream_triad_cpu_eager_test2.run work_aggregation_cpu_triad -t 4 --number_aggregation_executors=1 --number_underlying_executors=2048 --problem_size=25600 --kernel_size=256 --max_slices=17 --repetitions=${deadlock_check_repetitions} --executor_type=EAGER --outputfile=aggregation_stream_triad_cpu_eager_test2.out)
add_test(aggregation_stream_triad_cpu_eager_test2.run work_aggregation_cpu_triad --hpx:threads=4 --number_aggregation_executors=1 --number_underlying_executors=2048 --problem_size=25600 --kernel_size=256 --max_slices=17 --repetitions=${deadlock_check_repetitions} --executor_type=EAGER --outputfile=aggregation_stream_triad_cpu_eager_test2.out)
set_tests_properties(aggregation_stream_triad_cpu_eager_test2.run PROPERTIES
FIXTURES_SETUP aggregation_stream_triad_cpu_eager_test_output2
PROCESSORS 4
Expand All @@ -586,7 +597,7 @@ if (CPPUDDLE_WITH_TESTS)


# Try with large number of slices -- this is basically what should be used in production, hence it should be tested
add_test(aggregation_stream_triad_cpu_eager_test3.run work_aggregation_cpu_triad -t 4 --number_aggregation_executors=1 --number_underlying_executors=2048 --problem_size=25600 --kernel_size=256 --max_slices=100 --repetitions=${deadlock_check_repetitions} --executor_type=EAGER --outputfile=aggregation_stream_triad_cpu_eager_test3.out)
add_test(aggregation_stream_triad_cpu_eager_test3.run work_aggregation_cpu_triad --hpx:threads=4 --number_aggregation_executors=1 --number_underlying_executors=2048 --problem_size=25600 --kernel_size=256 --max_slices=100 --repetitions=${deadlock_check_repetitions} --executor_type=EAGER --outputfile=aggregation_stream_triad_cpu_eager_test3.out)
set_tests_properties(aggregation_stream_triad_cpu_eager_test3.run PROPERTIES
FIXTURES_SETUP aggregation_stream_triad_cpu_eager_test_output3
PROCESSORS 4
Expand All @@ -605,7 +616,7 @@ if (CPPUDDLE_WITH_TESTS)


# Basic test for the ENDLESS executor -- number slices should not matter here, hence the large value for it
add_test(aggregation_stream_triad_cpu_endless_test1.run work_aggregation_cpu_triad -t 4 --number_aggregation_executors=1 --number_underlying_executors=2048 --problem_size=25600 --kernel_size=256 --max_slices=99999999 --repetitions=${deadlock_check_repetitions} --executor_type=ENDLESS --outputfile=aggregation_stream_triad_cpu_endless_test1.out)
add_test(aggregation_stream_triad_cpu_endless_test1.run work_aggregation_cpu_triad --hpx:threads=4 --number_aggregation_executors=1 --number_underlying_executors=2048 --problem_size=25600 --kernel_size=256 --max_slices=99999999 --repetitions=${deadlock_check_repetitions} --executor_type=ENDLESS --outputfile=aggregation_stream_triad_cpu_endless_test1.out)
set_tests_properties(aggregation_stream_triad_cpu_endless_test1.run PROPERTIES
FIXTURES_SETUP aggregation_stream_triad_cpu_endless_test_output1
PROCESSORS 4
Expand All @@ -624,7 +635,7 @@ if (CPPUDDLE_WITH_TESTS)


# Basic test for the STRICT executor
add_test(aggregation_stream_triad_cpu_strict_test1.run work_aggregation_cpu_triad -t 4 --number_aggregation_executors=1 --number_underlying_executors=2048 --problem_size=25600 --kernel_size=256 --max_slices=100 --repetitions=${deadlock_check_repetitions} --executor_type=STRICT --outputfile=aggregation_stream_triad_cpu_strict_test1.out)
add_test(aggregation_stream_triad_cpu_strict_test1.run work_aggregation_cpu_triad --hpx:threads=4 --number_aggregation_executors=1 --number_underlying_executors=2048 --problem_size=25600 --kernel_size=256 --max_slices=100 --repetitions=${deadlock_check_repetitions} --executor_type=STRICT --outputfile=aggregation_stream_triad_cpu_strict_test1.out)
set_tests_properties(aggregation_stream_triad_cpu_strict_test1.run PROPERTIES
FIXTURES_SETUP aggregation_stream_triad_cpu_strict_test_output1
PROCESSORS 4
Expand All @@ -643,7 +654,7 @@ if (CPPUDDLE_WITH_TESTS)


# STRICT number of kernel launches should always be same -- hence we can check the aggregation working correctly here -- here it should be exactly 200 (no aggregation happening)
add_test(aggregation_stream_triad_cpu_strict_aggregation_test1.run work_aggregation_cpu_triad -t 4 --number_aggregation_executors=1 --number_underlying_executors=2048 --problem_size=25600 --kernel_size=256 --max_slices=1 --repetitions=2 --executor_type=STRICT --outputfile=aggregation_stream_triad_cpu_strict_aggregation_test1.out)
add_test(aggregation_stream_triad_cpu_strict_aggregation_test1.run work_aggregation_cpu_triad --hpx:threads=4 --number_aggregation_executors=1 --number_underlying_executors=2048 --problem_size=25600 --kernel_size=256 --max_slices=1 --repetitions=2 --executor_type=STRICT --outputfile=aggregation_stream_triad_cpu_strict_aggregation_test1.out)
set_tests_properties(aggregation_stream_triad_cpu_strict_aggregation_test1.run PROPERTIES
FIXTURES_SETUP aggregation_stream_triad_cpu_strict_aggregation_test_output1
PROCESSORS 4
Expand All @@ -667,7 +678,7 @@ if (CPPUDDLE_WITH_TESTS)


# STRICT number of kernel launches should always be same -- hence we can check the aggregation working correctly here -- here it should be exactly 30
add_test(aggregation_stream_triad_cpu_strict_aggregation_test2.run work_aggregation_cpu_triad -t 4 --number_aggregation_executors=1 --number_underlying_executors=2048 --problem_size=25600 --kernel_size=256 --max_slices=10 --repetitions=3 --executor_type=STRICT --outputfile=aggregation_stream_triad_cpu_strict_aggregation_test2.out)
add_test(aggregation_stream_triad_cpu_strict_aggregation_test2.run work_aggregation_cpu_triad --hpx:threads=4 --number_aggregation_executors=1 --number_underlying_executors=2048 --problem_size=25600 --kernel_size=256 --max_slices=10 --repetitions=3 --executor_type=STRICT --outputfile=aggregation_stream_triad_cpu_strict_aggregation_test2.out)
set_tests_properties(aggregation_stream_triad_cpu_strict_aggregation_test2.run PROPERTIES
FIXTURES_SETUP aggregation_stream_triad_cpu_strict_aggregation_test_output2
PROCESSORS 4
Expand All @@ -691,7 +702,7 @@ if (CPPUDDLE_WITH_TESTS)


# STRICT number of kernel launches should always be same -- hence we can check the aggregation working correctly here -- here it should be exactly 1
add_test(aggregation_stream_triad_cpu_strict_aggregation_test3.run work_aggregation_cpu_triad -t 4 --number_aggregation_executors=1 --number_underlying_executors=2048 --problem_size=25600 --kernel_size=256 --max_slices=100 --repetitions=1 --executor_type=STRICT --outputfile=aggregation_stream_triad_cpu_strict_aggregation_test3.out)
add_test(aggregation_stream_triad_cpu_strict_aggregation_test3.run work_aggregation_cpu_triad --hpx:threads=4 --number_aggregation_executors=1 --number_underlying_executors=2048 --problem_size=25600 --kernel_size=256 --max_slices=100 --repetitions=1 --executor_type=STRICT --outputfile=aggregation_stream_triad_cpu_strict_aggregation_test3.out)
set_tests_properties(aggregation_stream_triad_cpu_strict_aggregation_test3.run PROPERTIES
FIXTURES_SETUP aggregation_stream_triad_cpu_strict_aggregation_test_output3
PROCESSORS 4
Expand All @@ -716,7 +727,7 @@ if (CPPUDDLE_WITH_TESTS)
# STREAM TESTS CUDA

# Try with few slices -- good to detect deadlocking on errors with the continuations
add_test(aggregation_stream_triad_cuda_eager_test1.run work_aggregation_cuda_triad -t 4 --number_aggregation_executors=1 --number_underlying_executors=4 --problem_size=102400 --kernel_size=1024 --max_slices=2 --repetitions=${deadlock_check_repetitions} --executor_type=EAGER --outputfile=aggregation_stream_triad_cuda_eager_test1.out)
add_test(aggregation_stream_triad_cuda_eager_test1.run work_aggregation_cuda_triad --hpx:threads=4 --number_aggregation_executors=1 --number_underlying_executors=4 --problem_size=102400 --kernel_size=1024 --max_slices=2 --repetitions=${deadlock_check_repetitions} --executor_type=EAGER --outputfile=aggregation_stream_triad_cuda_eager_test1.out)
set_tests_properties(aggregation_stream_triad_cuda_eager_test1.run PROPERTIES
FIXTURES_SETUP aggregation_stream_triad_cuda_eager_test_output1
PROCESSORS 4
Expand All @@ -736,7 +747,7 @@ if (CPPUDDLE_WITH_TESTS)

# Try with odd number of slices
# This would deadlock given the STRICT executor, the EAGER one should have no problem
add_test(aggregation_stream_triad_cuda_eager_test2.run work_aggregation_cuda_triad -t 4 --number_aggregation_executors=1 --number_underlying_executors=4 --problem_size=102400 --kernel_size=1024 --max_slices=17 --repetitions=${deadlock_check_repetitions} --executor_type=EAGER --outputfile=aggregation_stream_triad_cuda_eager_test2.out)
add_test(aggregation_stream_triad_cuda_eager_test2.run work_aggregation_cuda_triad --hpx:threads=4 --number_aggregation_executors=1 --number_underlying_executors=4 --problem_size=102400 --kernel_size=1024 --max_slices=17 --repetitions=${deadlock_check_repetitions} --executor_type=EAGER --outputfile=aggregation_stream_triad_cuda_eager_test2.out)
set_tests_properties(aggregation_stream_triad_cuda_eager_test2.run PROPERTIES
FIXTURES_SETUP aggregation_stream_triad_cuda_eager_test_output2
PROCESSORS 4
Expand All @@ -755,7 +766,7 @@ if (CPPUDDLE_WITH_TESTS)


# Try with large number of slices -- this is basically what should be used in production, hence it should be tested
add_test(aggregation_stream_triad_cuda_eager_test3.run work_aggregation_cuda_triad -t 4 --number_aggregation_executors=1 --number_underlying_executors=4 --problem_size=102400 --kernel_size=1024 --max_slices=100 --repetitions=${deadlock_check_repetitions} --executor_type=EAGER --outputfile=aggregation_stream_triad_cuda_eager_test3.out)
add_test(aggregation_stream_triad_cuda_eager_test3.run work_aggregation_cuda_triad --hpx:threads=4 --number_aggregation_executors=1 --number_underlying_executors=4 --problem_size=102400 --kernel_size=1024 --max_slices=100 --repetitions=${deadlock_check_repetitions} --executor_type=EAGER --outputfile=aggregation_stream_triad_cuda_eager_test3.out)
set_tests_properties(aggregation_stream_triad_cuda_eager_test3.run PROPERTIES
FIXTURES_SETUP aggregation_stream_triad_cuda_eager_test_output3
PROCESSORS 4
Expand All @@ -774,7 +785,7 @@ if (CPPUDDLE_WITH_TESTS)


# Basic test for the ENDLESS executor -- number slices should not matter here, hence the large value for it
add_test(aggregation_stream_triad_cuda_endless_test1.run work_aggregation_cuda_triad -t 4 --number_aggregation_executors=1 --number_underlying_executors=4 --problem_size=102400 --kernel_size=1024 --max_slices=99999999 --repetitions=${deadlock_check_repetitions} --executor_type=ENDLESS --outputfile=aggregation_stream_triad_cuda_endless_test1.out)
add_test(aggregation_stream_triad_cuda_endless_test1.run work_aggregation_cuda_triad --hpx:threads=4 --number_aggregation_executors=1 --number_underlying_executors=4 --problem_size=102400 --kernel_size=1024 --max_slices=99999999 --repetitions=${deadlock_check_repetitions} --executor_type=ENDLESS --outputfile=aggregation_stream_triad_cuda_endless_test1.out)
set_tests_properties(aggregation_stream_triad_cuda_endless_test1.run PROPERTIES
FIXTURES_SETUP aggregation_stream_triad_cuda_endless_test_output1
PROCESSORS 4
Expand All @@ -793,7 +804,7 @@ if (CPPUDDLE_WITH_TESTS)


# Basic test for the STRICT executor
add_test(aggregation_stream_triad_cuda_strict_test1.run work_aggregation_cuda_triad -t 4 --number_aggregation_executors=1 --number_underlying_executors=4 --problem_size=102400 --kernel_size=1024 --max_slices=100 --repetitions=${deadlock_check_repetitions} --executor_type=STRICT --outputfile=aggregation_stream_triad_cuda_strict_test1.out)
add_test(aggregation_stream_triad_cuda_strict_test1.run work_aggregation_cuda_triad --hpx:threads=4 --number_aggregation_executors=1 --number_underlying_executors=4 --problem_size=102400 --kernel_size=1024 --max_slices=100 --repetitions=${deadlock_check_repetitions} --executor_type=STRICT --outputfile=aggregation_stream_triad_cuda_strict_test1.out)
set_tests_properties(aggregation_stream_triad_cuda_strict_test1.run PROPERTIES
FIXTURES_SETUP aggregation_stream_triad_cuda_strict_test_output1
PROCESSORS 4
Expand Down
6 changes: 3 additions & 3 deletions include/buffer_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#include <optional>

#ifdef CPPUDDLE_HAVE_COUNTERS
#include <boost/core/demangle.hpp>
//#include <boost/core/demangle.hpp>
#endif


Expand Down Expand Up @@ -323,8 +323,8 @@ class buffer_recycler {
// Print performance counters
size_t number_cleaned = unused_buffer_list.size() + buffer_map.size();
std::cout << "\nBuffer manager destructor for (Alloc: "
<< boost::core::demangle(typeid(Host_Allocator).name()) << ", Type: "
<< boost::core::demangle(typeid(T).name())
<< typeid(Host_Allocator).name() << ", Type: "
<< typeid(T).name()
<< "):" << std::endl
<< "--------------------------------------------------------------------"
<< std::endl
Expand Down
Loading

0 comments on commit 6ef2543

Please sign in to comment.