Skip to content

Commit

Permalink
Tracking resource adaptor to catch memory leaks (#596)
Browse files Browse the repository at this point in the history
`tracking_resource_adaptor` tracks memory allocations and can return or log remaining allocations to track leaks. Fixes #467.

Co-authored-by: Conor Hoekstra <[email protected]>
Co-authored-by: Mark Harris <[email protected]>
  • Loading branch information
3 people authored Nov 2, 2020
1 parent f70306c commit b1ac445
Show file tree
Hide file tree
Showing 6 changed files with 450 additions and 4 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

## New Features

- PR #608 Add stream wrapper type
- PR #596 Add `tracking_memory_resource_adaptor` to help catch memory leaks
- PR #608 Add stream wrapper type

## Improvements

Expand Down
3 changes: 1 addition & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,7 @@ else()
target_link_libraries(rmm INTERFACE CUDA::cudart)
endif(CUDA_STATIC_RUNTIME)

target_link_libraries(rmm INTERFACE rmm::Thrust)
target_link_libraries(rmm INTERFACE spdlog::spdlog_header_only)
target_link_libraries(rmm INTERFACE rmm::Thrust spdlog::spdlog_header_only ${CMAKE_DL_LIBS})

###################################################################################################
# Set logging level. Must go before including gtests and benchmarks.
Expand Down
85 changes: 85 additions & 0 deletions include/rmm/detail/stack_trace.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

// execinfo is a linux-only library, so stack traces will only be available on
// linux systems.
#if (defined(__GNUC__) && !defined(__MINGW32__) && !defined(__MINGW64__))
#define RMM_ENABLE_STACK_TRACES
#endif

#include <sstream>

#if defined(RMM_ENABLE_STACK_TRACES)
#include <execinfo.h>
#include <memory>
#include <vector>
#endif

namespace rmm {

namespace detail {

/**
* @brief stack_trace is a class that will capture a stack on instatiation for output later.
* It can then be used in an output stream to display stack information.
*
* rmm::detail::stack_trace saved_stack;
*
* std::cout << "callstack: " << saved_stack;
*
*/
class stack_trace {
public:
stack_trace()
{
#if defined(RMM_ENABLE_STACK_TRACES)
const int MaxStackDepth = 64;
void* stack[MaxStackDepth];
auto const depth = backtrace(stack, MaxStackDepth);
stack_ptrs.insert(stack_ptrs.end(), &stack[0], &stack[depth]);
#endif // RMM_ENABLE_STACK_TRACES
}

friend std::ostream& operator<<(std::ostream& os, const stack_trace& st)
{
#if defined(RMM_ENABLE_STACK_TRACES)
std::unique_ptr<char*, decltype(&::free)> strings(
backtrace_symbols(st.stack_ptrs.data(), st.stack_ptrs.size()), &::free);
if (strings.get() == nullptr) {
os << "But no stack trace could be found!" << std::endl;
} else {
///@todo: support for demangling of C++ symbol names
for (int i = 0; i < st.stack_ptrs.size(); ++i) {
os << "#" << i << " in " << strings.get()[i] << std::endl;
}
}
#else
os << "stack traces disabled" << std::endl;
#endif // RMM_ENABLE_STACK_TRACES
return os;
};

#if defined(RMM_ENABLE_STACK_TRACES)
private:
std::vector<void*> stack_ptrs;
#endif // RMM_ENABLE_STACK_TRACES
};

} // namespace detail

} // namespace rmm
261 changes: 261 additions & 0 deletions include/rmm/mr/device/tracking_resource_adaptor.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,261 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <map>
#include <mutex>
#include <rmm/detail/error.hpp>
#include <rmm/detail/stack_trace.hpp>
#include <rmm/mr/device/device_memory_resource.hpp>
#include <shared_mutex>
#include <sstream>

namespace rmm {
namespace mr {
/**
* @brief Resource that uses `Upstream` to allocate memory and tracks allocations.
*
* An instance of this resource can be constructed with an existing, upstream
* resource in order to satisfy allocation requests, but any existing allocations
* will be untracked. Tracking stores a size and pointer for every allocation, and a stack
* frame if `capture_stacks` is true, so it can add significant overhead.
* `tracking_resource_adaptor` is intended as a debug adaptor and shouldn't be used in
* performance-sensitive code. Note that callstacks may not contain all symbols unless
* the project is linked with `-rdynamic`. This can be accomplished with
* `add_link_options(-rdynamic)` in cmake.
*
* @tparam Upstream Type of the upstream resource used for
* allocation/deallocation.
*/
template <typename Upstream>
class tracking_resource_adaptor final : public device_memory_resource {
public:
// can be a std::shared_mutex once C++17 is adopted
using read_lock_t = std::shared_lock<std::shared_timed_mutex>;
using write_lock_t = std::unique_lock<std::shared_timed_mutex>;

/**
* @brief Information stored about an allocation. Includes the size
* and a stack trace if the `tracking_resource_adaptor` was initialized
* to capture stacks.
*
*/
struct allocation_info {
std::unique_ptr<rmm::detail::stack_trace> strace;
std::size_t allocation_size;

allocation_info() = delete;
allocation_info(std::size_t size, bool capture_stack)
: strace{[&]() {
return capture_stack ? std::make_unique<rmm::detail::stack_trace>() : nullptr;
}()},
allocation_size{size} {};
};

/**
* @brief Construct a new tracking resource adaptor using `upstream` to satisfy
* allocation requests.
*
* @throws `rmm::logic_error` if `upstream == nullptr`
*
* @param upstream The resource used for allocating/deallocating device memory
* @param capture_stacks If true, capture stacks for allocation calls
*/
tracking_resource_adaptor(Upstream* upstream, bool capture_stacks = false)
: upstream_{upstream}, capture_stacks_{capture_stacks}, allocated_bytes_{0}
{
RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
}

tracking_resource_adaptor() = delete;
~tracking_resource_adaptor() = default;
tracking_resource_adaptor(tracking_resource_adaptor const&) = delete;
tracking_resource_adaptor(tracking_resource_adaptor&&) = default;
tracking_resource_adaptor& operator=(tracking_resource_adaptor const&) = delete;
tracking_resource_adaptor& operator=(tracking_resource_adaptor&&) = default;

/**
* @brief Return pointer to the upstream resource.
*
* @return Upstream* Pointer to the upstream resource.
*/
Upstream* get_upstream() const noexcept { return upstream_; }

/**
* @brief Checks whether the upstream resource supports streams.
*
* @return true The upstream resource supports streams
* @return false The upstream resource does not support streams.
*/
bool supports_streams() const noexcept override { return upstream_->supports_streams(); }

/**
* @brief Query whether the resource supports the get_mem_info API.
*
* @return bool true if the upstream resource supports get_mem_info, false otherwise.
*/
bool supports_get_mem_info() const noexcept override
{
return upstream_->supports_get_mem_info();
}

/**
* @brief Get the outstanding allocations map
*
* @return std::map<void*, allocation_info> const& of a map of allocations. The key
* is the allocated memory pointer and the data is the allocation_info structure, which
* contains size and, potentially, stack traces.
*/
std::map<void*, allocation_info> const& get_outstanding_allocations() const noexcept
{
return allocations_;
}

/**
* @brief Query the number of bytes that have been allocated. Note that
* this can not be used to know how large of an allocation is possible due
* to both possible fragmentation and also internal page sizes and alignment
* that is not tracked by this allocator.
*
* @return std::size_t number of bytes that have been allocated through this
* allocator.
*/
std::size_t get_allocated_bytes() const noexcept { return allocated_bytes_; }

/**
* @brief Log any outstanding allocations via RMM_LOG_DEBUG
*
*/
void log_outstanding_allocations() const
{
#if SPDLOG_ACTIVE_LEVEL <= SPDLOG_LEVEL_DEBUG
read_lock_t lock(mtx);
if (not allocations.empty()) {
std::ostringstream oss;
for (auto const& al : allocations) {
oss << al.first << ": " << al.second.allocation_size << " B";
if (al.second.strace != nullptr) {
oss << " : callstack:" << std::endl << *al.second.strace;
}
oss << std::endl;
}
RMM_LOG_DEBUG("Outstanding Allocations: {}", oss.str());
}
#endif // SPDLOG_ACTIVE_LEVEL <= SPDLOG_LEVEL_DEBUG
}

private:
/**
* @brief Allocates memory of size at least `bytes` using the upstream
* resource as long as it fits inside the allocation limit.
*
* The returned pointer has at least 256B alignment.
*
* @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled
* by the upstream resource.
*
* @param bytes The size, in bytes, of the allocation
* @param stream Stream on which to perform the allocation
* @return void* Pointer to the newly allocated memory
*/
void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
{
void* p = upstream_->allocate(bytes, stream);

// track it.
{
write_lock_t lock(mtx_);
allocations_.emplace(p, allocation_info{bytes, capture_stacks_});
}
allocated_bytes_ += bytes;

return p;
}

/**
* @brief Free allocation of size `bytes` pointed to by `p`
*
* @throws Nothing.
*
* @param p Pointer to be deallocated
* @param bytes Size of the allocation
* @param stream Stream on which to perform the deallocation
*/
void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override
{
upstream_->deallocate(p, bytes, stream);
{
write_lock_t lock(mtx_);
allocations_.erase(p);
}
allocated_bytes_ -= bytes;
}

/**
* @brief Compare the upstream resource to another.
*
* @throws Nothing.
*
* @param other The other resource to compare to
* @return true If the two resources are equivalent
* @return false If the two resources are not equal
*/
bool do_is_equal(device_memory_resource const& other) const noexcept override
{
if (this == &other)
return true;
else {
auto cast = dynamic_cast<tracking_resource_adaptor<Upstream> const*>(&other);
return cast != nullptr ? upstream_->is_equal(*cast->get_upstream())
: upstream_->is_equal(other);
}
}

/**
* @brief Get free and available memory from upstream resource.
*
* @throws `rmm::cuda_error` if unable to retrieve memory info.
*
* @param stream Stream on which to get the mem info.
* @return std::pair contaiing free_size and total_size of memory
*/
std::pair<std::size_t, std::size_t> do_get_mem_info(cuda_stream_view stream) const override
{
return upstream_->get_mem_info(stream);
}

bool capture_stacks_; // whether or not to capture call stacks
std::map<void*, allocation_info> allocations_; // map of active allocations
std::atomic<std::size_t> allocated_bytes_; // number of bytes currently allocated
std::shared_timed_mutex mutable mtx_; // mutex for thread safe access to allocations_
Upstream* upstream_; // the upstream resource used for satisfying allocation requests
};

/**
* @brief Convenience factory to return a `tracking_resource_adaptor` around the
* upstream resource `upstream`.
*
* @tparam Upstream Type of the upstream `device_memory_resource`.
* @param upstream Pointer to the upstream resource
*/
template <typename Upstream>
tracking_resource_adaptor<Upstream> make_tracking_adaptor(Upstream* upstream)
{
return tracking_resource_adaptor<Upstream>{upstream};
}

} // namespace mr
} // namespace rmm
9 changes: 8 additions & 1 deletion tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -82,14 +82,21 @@ set(THRUST_ALLOCATOR_TEST_SRC

ConfigureTest(THRUST_ALLOCATOR_TEST "${THRUST_ALLOCATOR_TEST_SRC}")

###################################################################################################
# - tracking adaptor tests

set(TRACKING_TEST_SRC
"${CMAKE_CURRENT_SOURCE_DIR}/mr/device/tracking_mr_tests.cpp")

ConfigureTest(TRACKING_TEST "${TRACKING_TEST_SRC}")

###################################################################################################
# - limiting adaptor tests

set(LIMITING_TEST_SRC
"${CMAKE_CURRENT_SOURCE_DIR}/mr/device/limiting_mr_tests.cpp")

ConfigureTest(LIMITING_TEST "${LIMITING_TEST_SRC}")
target_compile_definitions(LIMITING_TEST PUBLIC CUDA_API_PER_THREAD_DEFAULT_STREAM)

###################################################################################################
# - host mr tests
Expand Down
Loading

0 comments on commit b1ac445

Please sign in to comment.