Skip to content

Commit

Permalink
Reenable huge pages
Browse files Browse the repository at this point in the history
  • Loading branch information
vyasr committed Oct 15, 2024
1 parent 4779af3 commit ff3e5df
Showing 1 changed file with 27 additions and 0 deletions.
27 changes: 27 additions & 0 deletions cpp/src/interop/to_arrow_host.cu
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
#include <nanoarrow/nanoarrow.h>
#include <nanoarrow/nanoarrow.hpp>
#include <nanoarrow/nanoarrow_device.h>
#include <sys/mman.h>

#include <iostream>

Expand All @@ -52,6 +53,30 @@ namespace detail {

namespace {

/*
Enable Transparent Huge Pages (THP) for large (>4MB) allocations.
`buf` is returned untouched.
Enabling THP can improve performance of device-host memory transfers
significantly, see <https://github.com/rapidsai/cudf/pull/13914>.
*/
void enable_hugepage(ArrowBuffer* buffer)
{
if (buffer->size_bytes < (1u << 22u)) { // Smaller than 4 MB
return;
}

#ifdef MADV_HUGEPAGE
auto const pagesize = sysconf(_SC_PAGESIZE);
void* addr = const_cast<uint8_t*>(buffer->data);
auto length{static_cast<std::size_t>(buffer->size_bytes)};
if (std::align(pagesize, pagesize, addr, length)) {
// Intentionally not checking for errors that may be returned by older kernel versions;
// optimistically tries enabling huge pages.
madvise(addr, length, MADV_HUGEPAGE);
}
#endif
}

struct dispatch_to_arrow_host {
cudf::column_view column;
rmm::cuda_stream_view stream;
Expand All @@ -62,6 +87,7 @@ struct dispatch_to_arrow_host {
if (!column.has_nulls()) { return NANOARROW_OK; }

NANOARROW_RETURN_NOT_OK(ArrowBitmapResize(bitmap, static_cast<int64_t>(column.size()), 0));
enable_hugepage(&bitmap->buffer);
CUDF_CUDA_TRY(cudaMemcpyAsync(bitmap->buffer.data,
(column.offset() > 0)
? cudf::detail::copy_bitmask(column, stream, mr).data()
Expand All @@ -76,6 +102,7 @@ struct dispatch_to_arrow_host {
int populate_data_buffer(device_span<T const> input, ArrowBuffer* buffer) const
{
NANOARROW_RETURN_NOT_OK(ArrowBufferResize(buffer, input.size_bytes(), 1));
enable_hugepage(buffer);
CUDF_CUDA_TRY(cudaMemcpyAsync(
buffer->data, input.data(), input.size_bytes(), cudaMemcpyDefault, stream.value()));
return NANOARROW_OK;
Expand Down

0 comments on commit ff3e5df

Please sign in to comment.