From b1ac4459b30f0c2063ea85996b2f0375f1d9df33 Mon Sep 17 00:00:00 2001 From: Mike Wilson Date: Mon, 2 Nov 2020 18:39:19 -0500 Subject: [PATCH] Tracking resource adaptor to catch memory leaks (#596) `tracking_resource_adaptor` tracks memory allocations and can return or log remaining allocations to track leaks. Fixes #467. Co-authored-by: Conor Hoekstra <36027403+codereport@users.noreply.github.com> Co-authored-by: Mark Harris --- CHANGELOG.md | 3 +- CMakeLists.txt | 3 +- include/rmm/detail/stack_trace.hpp | 85 ++++++ .../mr/device/tracking_resource_adaptor.hpp | 261 ++++++++++++++++++ tests/CMakeLists.txt | 9 +- tests/mr/device/tracking_mr_tests.cpp | 93 +++++++ 6 files changed, 450 insertions(+), 4 deletions(-) create mode 100644 include/rmm/detail/stack_trace.hpp create mode 100644 include/rmm/mr/device/tracking_resource_adaptor.hpp create mode 100644 tests/mr/device/tracking_mr_tests.cpp diff --git a/CHANGELOG.md b/CHANGELOG.md index 48bd99b09..4281e5148 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,8 @@ ## New Features - - PR #608 Add stream wrapper type +- PR #596 Add `tracking_memory_resource_adaptor` to help catch memory leaks +- PR #608 Add stream wrapper type ## Improvements diff --git a/CMakeLists.txt b/CMakeLists.txt index 16af5b494..97d03d803 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -92,8 +92,7 @@ else() target_link_libraries(rmm INTERFACE CUDA::cudart) endif(CUDA_STATIC_RUNTIME) -target_link_libraries(rmm INTERFACE rmm::Thrust) -target_link_libraries(rmm INTERFACE spdlog::spdlog_header_only) +target_link_libraries(rmm INTERFACE rmm::Thrust spdlog::spdlog_header_only ${CMAKE_DL_LIBS}) ################################################################################################### # Set logging level. Must go before including gtests and benchmarks. diff --git a/include/rmm/detail/stack_trace.hpp b/include/rmm/detail/stack_trace.hpp new file mode 100644 index 000000000..91a74401f --- /dev/null +++ b/include/rmm/detail/stack_trace.hpp @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +// execinfo is a linux-only library, so stack traces will only be available on +// linux systems. +#if (defined(__GNUC__) && !defined(__MINGW32__) && !defined(__MINGW64__)) +#define RMM_ENABLE_STACK_TRACES +#endif + +#include + +#if defined(RMM_ENABLE_STACK_TRACES) +#include +#include +#include +#endif + +namespace rmm { + +namespace detail { + +/** + * @brief stack_trace is a class that will capture a stack on instatiation for output later. + * It can then be used in an output stream to display stack information. + * + * rmm::detail::stack_trace saved_stack; + * + * std::cout << "callstack: " << saved_stack; + * + */ +class stack_trace { + public: + stack_trace() + { +#if defined(RMM_ENABLE_STACK_TRACES) + const int MaxStackDepth = 64; + void* stack[MaxStackDepth]; + auto const depth = backtrace(stack, MaxStackDepth); + stack_ptrs.insert(stack_ptrs.end(), &stack[0], &stack[depth]); +#endif // RMM_ENABLE_STACK_TRACES + } + + friend std::ostream& operator<<(std::ostream& os, const stack_trace& st) + { +#if defined(RMM_ENABLE_STACK_TRACES) + std::unique_ptr strings( + backtrace_symbols(st.stack_ptrs.data(), st.stack_ptrs.size()), &::free); + if (strings.get() == nullptr) { + os << "But no stack trace could be found!" << std::endl; + } else { + ///@todo: support for demangling of C++ symbol names + for (int i = 0; i < st.stack_ptrs.size(); ++i) { + os << "#" << i << " in " << strings.get()[i] << std::endl; + } + } +#else + os << "stack traces disabled" << std::endl; +#endif // RMM_ENABLE_STACK_TRACES + return os; + }; + +#if defined(RMM_ENABLE_STACK_TRACES) + private: + std::vector stack_ptrs; +#endif // RMM_ENABLE_STACK_TRACES +}; + +} // namespace detail + +} // namespace rmm diff --git a/include/rmm/mr/device/tracking_resource_adaptor.hpp b/include/rmm/mr/device/tracking_resource_adaptor.hpp new file mode 100644 index 000000000..aa6687fb7 --- /dev/null +++ b/include/rmm/mr/device/tracking_resource_adaptor.hpp @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace rmm { +namespace mr { +/** + * @brief Resource that uses `Upstream` to allocate memory and tracks allocations. + * + * An instance of this resource can be constructed with an existing, upstream + * resource in order to satisfy allocation requests, but any existing allocations + * will be untracked. Tracking stores a size and pointer for every allocation, and a stack + * frame if `capture_stacks` is true, so it can add significant overhead. + * `tracking_resource_adaptor` is intended as a debug adaptor and shouldn't be used in + * performance-sensitive code. Note that callstacks may not contain all symbols unless + * the project is linked with `-rdynamic`. This can be accomplished with + * `add_link_options(-rdynamic)` in cmake. + * + * @tparam Upstream Type of the upstream resource used for + * allocation/deallocation. + */ +template +class tracking_resource_adaptor final : public device_memory_resource { + public: + // can be a std::shared_mutex once C++17 is adopted + using read_lock_t = std::shared_lock; + using write_lock_t = std::unique_lock; + + /** + * @brief Information stored about an allocation. Includes the size + * and a stack trace if the `tracking_resource_adaptor` was initialized + * to capture stacks. + * + */ + struct allocation_info { + std::unique_ptr strace; + std::size_t allocation_size; + + allocation_info() = delete; + allocation_info(std::size_t size, bool capture_stack) + : strace{[&]() { + return capture_stack ? std::make_unique() : nullptr; + }()}, + allocation_size{size} {}; + }; + + /** + * @brief Construct a new tracking resource adaptor using `upstream` to satisfy + * allocation requests. + * + * @throws `rmm::logic_error` if `upstream == nullptr` + * + * @param upstream The resource used for allocating/deallocating device memory + * @param capture_stacks If true, capture stacks for allocation calls + */ + tracking_resource_adaptor(Upstream* upstream, bool capture_stacks = false) + : upstream_{upstream}, capture_stacks_{capture_stacks}, allocated_bytes_{0} + { + RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer."); + } + + tracking_resource_adaptor() = delete; + ~tracking_resource_adaptor() = default; + tracking_resource_adaptor(tracking_resource_adaptor const&) = delete; + tracking_resource_adaptor(tracking_resource_adaptor&&) = default; + tracking_resource_adaptor& operator=(tracking_resource_adaptor const&) = delete; + tracking_resource_adaptor& operator=(tracking_resource_adaptor&&) = default; + + /** + * @brief Return pointer to the upstream resource. + * + * @return Upstream* Pointer to the upstream resource. + */ + Upstream* get_upstream() const noexcept { return upstream_; } + + /** + * @brief Checks whether the upstream resource supports streams. + * + * @return true The upstream resource supports streams + * @return false The upstream resource does not support streams. + */ + bool supports_streams() const noexcept override { return upstream_->supports_streams(); } + + /** + * @brief Query whether the resource supports the get_mem_info API. + * + * @return bool true if the upstream resource supports get_mem_info, false otherwise. + */ + bool supports_get_mem_info() const noexcept override + { + return upstream_->supports_get_mem_info(); + } + + /** + * @brief Get the outstanding allocations map + * + * @return std::map const& of a map of allocations. The key + * is the allocated memory pointer and the data is the allocation_info structure, which + * contains size and, potentially, stack traces. + */ + std::map const& get_outstanding_allocations() const noexcept + { + return allocations_; + } + + /** + * @brief Query the number of bytes that have been allocated. Note that + * this can not be used to know how large of an allocation is possible due + * to both possible fragmentation and also internal page sizes and alignment + * that is not tracked by this allocator. + * + * @return std::size_t number of bytes that have been allocated through this + * allocator. + */ + std::size_t get_allocated_bytes() const noexcept { return allocated_bytes_; } + + /** + * @brief Log any outstanding allocations via RMM_LOG_DEBUG + * + */ + void log_outstanding_allocations() const + { +#if SPDLOG_ACTIVE_LEVEL <= SPDLOG_LEVEL_DEBUG + read_lock_t lock(mtx); + if (not allocations.empty()) { + std::ostringstream oss; + for (auto const& al : allocations) { + oss << al.first << ": " << al.second.allocation_size << " B"; + if (al.second.strace != nullptr) { + oss << " : callstack:" << std::endl << *al.second.strace; + } + oss << std::endl; + } + RMM_LOG_DEBUG("Outstanding Allocations: {}", oss.str()); + } +#endif // SPDLOG_ACTIVE_LEVEL <= SPDLOG_LEVEL_DEBUG + } + + private: + /** + * @brief Allocates memory of size at least `bytes` using the upstream + * resource as long as it fits inside the allocation limit. + * + * The returned pointer has at least 256B alignment. + * + * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled + * by the upstream resource. + * + * @param bytes The size, in bytes, of the allocation + * @param stream Stream on which to perform the allocation + * @return void* Pointer to the newly allocated memory + */ + void* do_allocate(std::size_t bytes, cuda_stream_view stream) override + { + void* p = upstream_->allocate(bytes, stream); + + // track it. + { + write_lock_t lock(mtx_); + allocations_.emplace(p, allocation_info{bytes, capture_stacks_}); + } + allocated_bytes_ += bytes; + + return p; + } + + /** + * @brief Free allocation of size `bytes` pointed to by `p` + * + * @throws Nothing. + * + * @param p Pointer to be deallocated + * @param bytes Size of the allocation + * @param stream Stream on which to perform the deallocation + */ + void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override + { + upstream_->deallocate(p, bytes, stream); + { + write_lock_t lock(mtx_); + allocations_.erase(p); + } + allocated_bytes_ -= bytes; + } + + /** + * @brief Compare the upstream resource to another. + * + * @throws Nothing. + * + * @param other The other resource to compare to + * @return true If the two resources are equivalent + * @return false If the two resources are not equal + */ + bool do_is_equal(device_memory_resource const& other) const noexcept override + { + if (this == &other) + return true; + else { + auto cast = dynamic_cast const*>(&other); + return cast != nullptr ? upstream_->is_equal(*cast->get_upstream()) + : upstream_->is_equal(other); + } + } + + /** + * @brief Get free and available memory from upstream resource. + * + * @throws `rmm::cuda_error` if unable to retrieve memory info. + * + * @param stream Stream on which to get the mem info. + * @return std::pair contaiing free_size and total_size of memory + */ + std::pair do_get_mem_info(cuda_stream_view stream) const override + { + return upstream_->get_mem_info(stream); + } + + bool capture_stacks_; // whether or not to capture call stacks + std::map allocations_; // map of active allocations + std::atomic allocated_bytes_; // number of bytes currently allocated + std::shared_timed_mutex mutable mtx_; // mutex for thread safe access to allocations_ + Upstream* upstream_; // the upstream resource used for satisfying allocation requests +}; + +/** + * @brief Convenience factory to return a `tracking_resource_adaptor` around the + * upstream resource `upstream`. + * + * @tparam Upstream Type of the upstream `device_memory_resource`. + * @param upstream Pointer to the upstream resource + */ +template +tracking_resource_adaptor make_tracking_adaptor(Upstream* upstream) +{ + return tracking_resource_adaptor{upstream}; +} + +} // namespace mr +} // namespace rmm diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e787aa017..1c62a130e 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -82,6 +82,14 @@ set(THRUST_ALLOCATOR_TEST_SRC ConfigureTest(THRUST_ALLOCATOR_TEST "${THRUST_ALLOCATOR_TEST_SRC}") +################################################################################################### +# - tracking adaptor tests + +set(TRACKING_TEST_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/mr/device/tracking_mr_tests.cpp") + +ConfigureTest(TRACKING_TEST "${TRACKING_TEST_SRC}") + ################################################################################################### # - limiting adaptor tests @@ -89,7 +97,6 @@ set(LIMITING_TEST_SRC "${CMAKE_CURRENT_SOURCE_DIR}/mr/device/limiting_mr_tests.cpp") ConfigureTest(LIMITING_TEST "${LIMITING_TEST_SRC}") -target_compile_definitions(LIMITING_TEST PUBLIC CUDA_API_PER_THREAD_DEFAULT_STREAM) ################################################################################################### # - host mr tests diff --git a/tests/mr/device/tracking_mr_tests.cpp b/tests/mr/device/tracking_mr_tests.cpp new file mode 100644 index 000000000..81e7e64f8 --- /dev/null +++ b/tests/mr/device/tracking_mr_tests.cpp @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include "mr_test.hpp" + +#include + +namespace rmm { +namespace test { +namespace { + +using tracking_adaptor = rmm::mr::tracking_resource_adaptor; + +TEST(TrackingTest, ThrowOnNullUpstream) +{ + auto construct_nullptr = []() { tracking_adaptor mr{nullptr}; }; + EXPECT_THROW(construct_nullptr(), rmm::logic_error); +} + +TEST(TrackingTest, Empty) +{ + tracking_adaptor mr{rmm::mr::get_current_device_resource()}; + EXPECT_EQ(mr.get_outstanding_allocations().size(), 0); + EXPECT_EQ(mr.get_allocated_bytes(), 0); +} + +TEST(TrackingTest, AllFreed) +{ + tracking_adaptor mr{rmm::mr::get_current_device_resource()}; + std::vector allocations; + for (int i = 0; i < 10; ++i) { + allocations.push_back(mr.allocate(10_MiB)); + } + for (auto p : allocations) { + mr.deallocate(p, 10_MiB); + } + EXPECT_EQ(mr.get_outstanding_allocations().size(), 0); + EXPECT_EQ(mr.get_allocated_bytes(), 0); +} + +TEST(TrackingTest, AllocationsLeftWithStacks) +{ + tracking_adaptor mr{rmm::mr::get_current_device_resource(), true}; + std::vector allocations; + for (int i = 0; i < 10; ++i) { + allocations.push_back(mr.allocate(10_MiB)); + } + for (int i = 0; i < 10; i += 2) { + mr.deallocate(allocations[i], 10_MiB); + } + EXPECT_EQ(mr.get_outstanding_allocations().size(), 5); + EXPECT_EQ(mr.get_allocated_bytes(), 50_MiB); + auto const &outstanding_allocations = mr.get_outstanding_allocations(); + EXPECT_EQ(outstanding_allocations.size(), 5); + EXPECT_NE(outstanding_allocations.begin()->second.strace, nullptr); +} + +TEST(TrackingTest, AllocationsLeftWithoutStacks) +{ + tracking_adaptor mr{rmm::mr::get_current_device_resource()}; + std::vector allocations; + for (int i = 0; i < 10; ++i) { + allocations.push_back(mr.allocate(10_MiB)); + } + for (int i = 0; i < 10; i += 2) { + mr.deallocate(allocations[i], 10_MiB); + } + EXPECT_EQ(mr.get_outstanding_allocations().size(), 5); + EXPECT_EQ(mr.get_allocated_bytes(), 50_MiB); + auto const &outstanding_allocations = mr.get_outstanding_allocations(); + EXPECT_EQ(outstanding_allocations.size(), 5); + EXPECT_EQ(outstanding_allocations.begin()->second.strace, nullptr); +} + +} // namespace +} // namespace test +} // namespace rmm