diff --git a/CMakeLists.txt b/CMakeLists.txt index b77ca5cf0..e2596a013 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -102,8 +102,8 @@ cmake_dependent_option( UMPIRE_ENABLE_BENCHMARKS "Build Umpire benchmarks" On cmake_dependent_option( UMPIRE_ENABLE_EXAMPLES "Build Umpire examples" On "ENABLE_EXAMPLES" Off ) -cmake_dependent_option( UMPIRE_ENABLE_DOCS "Build Umpire docs" Off - "ENABLE_DOCS" Off ) +cmake_dependent_option( UMPIRE_ENABLE_DOCS "Build Umpire docs" On + "ENABLE_DOCS" On ) cmake_dependent_option( UMPIRE_ENABLE_CLANGQUERY "Build Umpire with Clang query" On "ENABLE_CLANGQUERY" Off ) cmake_dependent_option( UMPIRE_ENABLE_COVERAGE "Build Umpire with Coverage support (with GCC)" On diff --git a/examples/sap_test.cpp b/examples/sap_test.cpp index e66efaeec..4bbe8fcdd 100644 --- a/examples/sap_test.cpp +++ b/examples/sap_test.cpp @@ -36,7 +36,7 @@ __global__ void touch_data(double* data, int len) __global__ void do_sleep() { //sleep - works still at 1000, so keeping it at 100k - sleep(1000); + sleep(1000000); } __global__ void check_data(double* data, int len) @@ -62,7 +62,8 @@ __global__ void touch_data_again(double* data, int len) int main(int, char**) { auto& rm = umpire::ResourceManager::getInstance(); - auto pool = rm.makeAllocator("sap-pool", rm.getAllocator("DEVICE")); + //auto pool = rm.makeAllocator("sap-pool", rm.getAllocator("DEVICE")); + auto pool = umpire::strategy::StreamAwareQuickPool("sap-pool", 150, rm.getAllocator("DEVICE"), 32, 32); int NUM_BLOCKS = NUM_THREADS / BLOCK_SIZE; cudaStream_t s1, s2; @@ -78,7 +79,7 @@ int main(int, char**) check_data<<>>(a, NUM_THREADS); //deallocate and reallocate a using different streams - pool.deallocate(s1, a); + pool.deallocate(s1, a, NUM_THREADS * sizeof(double)); a = static_cast(pool.allocate(s2, NUM_THREADS * sizeof(double))); //with stream s2, use memory in reallocated a in kernel @@ -101,7 +102,7 @@ int main(int, char**) std::cout << "Kernel succeeded! Expected result returned" << std::endl; //final deallocations - pool.deallocate(s2, a); + pool.deallocate(s2, a, NUM_THREADS * sizeof(double)); rm.deallocate(b); return 0; } diff --git a/src/umpire/strategy/StreamAwareQuickPool.cpp b/src/umpire/strategy/StreamAwareQuickPool.cpp index 176762d33..5d82246af 100644 --- a/src/umpire/strategy/StreamAwareQuickPool.cpp +++ b/src/umpire/strategy/StreamAwareQuickPool.cpp @@ -43,6 +43,7 @@ StreamAwareQuickPool::~StreamAwareQuickPool() void* StreamAwareQuickPool::allocate(void* stream, std::size_t bytes) { +/* unsigned int size = m_registered_streams.size(); UMPIRE_LOG(Debug, "Size of registered streams vector is: " << size); @@ -55,6 +56,7 @@ void* StreamAwareQuickPool::allocate(void* stream, std::size_t bytes) UMPIRE_LOG(Debug, "I did not find a registered stream so I am adding it to vector."); m_registered_streams.push_back(stream); +*/ return allocate(bytes); } @@ -165,11 +167,12 @@ void StreamAwareQuickPool::deallocate(void* stream, void* ptr, std::size_t size) m_registered_dealloc.at(i) = deallocate_has_occurred; } } - +/* UMPIRE_ERROR( runtime_error, umpire::fmt::format("Invalid deallocate: {} stream has not been allocated yet", stream)); deallocate(ptr, size); +*/ } void StreamAwareQuickPool::deallocate(void* ptr, std::size_t UMPIRE_UNUSED_ARG(size)) diff --git a/src/umpire/strategy/StreamAwareQuickPool.hpp b/src/umpire/strategy/StreamAwareQuickPool.hpp index fdd491ca6..b9d417a14 100644 --- a/src/umpire/strategy/StreamAwareQuickPool.hpp +++ b/src/umpire/strategy/StreamAwareQuickPool.hpp @@ -66,7 +66,7 @@ class StreamAwareQuickPool : public AllocationStrategy, private mixins::AlignedA ~StreamAwareQuickPool(); - StreamAwareQuickPool(const StreamAwareQuickPool&) = delete; + StreamAwareQuickPool(const StreamAwareQuickPool&); private: void* allocate(std::size_t bytes); @@ -168,7 +168,7 @@ class StreamAwareQuickPool : public AllocationStrategy, private mixins::AlignedA const std::size_t m_first_minimum_pool_allocation_size; const std::size_t m_next_minimum_pool_allocation_size; - std::vector m_registered_streams{0}; + std::vector m_registered_streams{}; std::vector m_registered_dealloc{}; std::size_t m_total_blocks{0};