Skip to content

Commit

Permalink
Remove the additional host register calls initially intended for perf…
Browse files Browse the repository at this point in the history
…ormance improvement on Grace Hopper (#17092)

On Grace Hopper, file I/O takes a special path that calls `cudaHostRegister` to circumvent a performance issue. Recent benchmark shows that this workaround is no longer necessary . This PR is making a clean-up.

Authors:
  - Tianyu Liu (https://github.com/kingcrimsontianyu)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - David Wendt (https://github.com/davidwendt)

URL: #17092
  • Loading branch information
kingcrimsontianyu authored Oct 17, 2024
1 parent 14209c1 commit 920a5f6
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 101 deletions.
19 changes: 6 additions & 13 deletions cpp/include/cudf/io/datasource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,28 +86,21 @@ class datasource {
/**
* @brief Creates a source from a file path.
*
* @note Parameters `offset`, `max_size_estimate` and `min_size_estimate` are hints to the
* `datasource` implementation about the expected range of the data that will be read. The
* implementation may use these hints to optimize the read operation. These parameters are usually
* based on the byte range option. In this case, `min_size_estimate` should be no greater than the
* byte range to avoid potential issues when reading adjacent ranges. `max_size_estimate` can
* include padding after the byte range, to include additional data that may be needed for
* processing.
*
@throws cudf::logic_error if the minimum size estimate is greater than the maximum size estimate
* Parameters `offset` and `max_size_estimate` are hints to the `datasource` implementation about
* the expected range of the data that will be read. The implementation may use these hints to
* optimize the read operation. These parameters are usually based on the byte range option. In
* this case, `max_size_estimate` can include padding after the byte range, to include additional
* data that may be needed for processing.
*
* @param[in] filepath Path to the file to use
* @param[in] offset Starting byte offset from which data will be read (the default is zero)
* @param[in] max_size_estimate Upper estimate of the data range that will be read (the default is
* zero, which means the whole file after `offset`)
* @param[in] min_size_estimate Lower estimate of the data range that will be read (the default is
* zero, which means the whole file after `offset`)
* @return Constructed datasource object
*/
static std::unique_ptr<datasource> create(std::string const& filepath,
size_t offset = 0,
size_t max_size_estimate = 0,
size_t min_size_estimate = 0);
size_t max_size_estimate = 0);

/**
* @brief Creates a source from a host memory buffer.
Expand Down
12 changes: 4 additions & 8 deletions cpp/src/io/functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,15 +123,13 @@ namespace {

std::vector<std::unique_ptr<cudf::io::datasource>> make_datasources(source_info const& info,
size_t offset = 0,
size_t max_size_estimate = 0,
size_t min_size_estimate = 0)
size_t max_size_estimate = 0)
{
switch (info.type()) {
case io_type::FILEPATH: {
auto sources = std::vector<std::unique_ptr<cudf::io::datasource>>();
for (auto const& filepath : info.filepaths()) {
sources.emplace_back(
cudf::io::datasource::create(filepath, offset, max_size_estimate, min_size_estimate));
sources.emplace_back(cudf::io::datasource::create(filepath, offset, max_size_estimate));
}
return sources;
}
Expand Down Expand Up @@ -213,8 +211,7 @@ table_with_metadata read_json(json_reader_options options,

auto datasources = make_datasources(options.get_source(),
options.get_byte_range_offset(),
options.get_byte_range_size_with_padding(),
options.get_byte_range_size());
options.get_byte_range_size_with_padding());

return json::detail::read_json(datasources, options, stream, mr);
}
Expand All @@ -241,8 +238,7 @@ table_with_metadata read_csv(csv_reader_options options,

auto datasources = make_datasources(options.get_source(),
options.get_byte_range_offset(),
options.get_byte_range_size_with_padding(),
options.get_byte_range_size());
options.get_byte_range_size_with_padding());

CUDF_EXPECTS(datasources.size() == 1, "Only a single source is currently supported.");

Expand Down
84 changes: 4 additions & 80 deletions cpp/src/io/utilities/datasource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,27 +134,6 @@ class file_source : public datasource {
static constexpr size_t _gds_read_preferred_threshold = 128 << 10; // 128KB
};

/**
* @brief Memoized pageableMemoryAccessUsesHostPageTables device property.
*/
[[nodiscard]] bool pageableMemoryAccessUsesHostPageTables()
{
static std::unordered_map<int, bool> result_cache{};

int deviceId{};
CUDF_CUDA_TRY(cudaGetDevice(&deviceId));

if (result_cache.find(deviceId) == result_cache.end()) {
cudaDeviceProp props{};
CUDF_CUDA_TRY(cudaGetDeviceProperties(&props, deviceId));
result_cache[deviceId] = (props.pageableMemoryAccessUsesHostPageTables == 1);
CUDF_LOG_INFO(
"Device {} pageableMemoryAccessUsesHostPageTables: {}", deviceId, result_cache[deviceId]);
}

return result_cache[deviceId];
}

/**
* @brief Implementation class for reading from a file using memory mapped access.
*
Expand All @@ -163,28 +142,18 @@ class file_source : public datasource {
*/
class memory_mapped_source : public file_source {
public:
explicit memory_mapped_source(char const* filepath,
size_t offset,
size_t max_size_estimate,
size_t min_size_estimate)
explicit memory_mapped_source(char const* filepath, size_t offset, size_t max_size_estimate)
: file_source(filepath)
{
if (_file.size() != 0) {
// Memory mapping is not exclusive, so we can include the whole region we expect to read
map(_file.desc(), offset, max_size_estimate);
// Buffer registration is exclusive (can't overlap with other registered buffers) so we
// register the lower estimate; this avoids issues when reading adjacent ranges from the same
// file from multiple threads
register_mmap_buffer(offset, min_size_estimate);
}
}

~memory_mapped_source() override
{
if (_map_addr != nullptr) {
unmap();
unregister_mmap_buffer();
}
if (_map_addr != nullptr) { unmap(); }
}

std::unique_ptr<buffer> host_read(size_t offset, size_t size) override
Expand Down Expand Up @@ -227,46 +196,6 @@ class memory_mapped_source : public file_source {
}

private:
/**
* @brief Page-locks (registers) the memory range of the mapped file.
*
* Fixes nvbugs/4215160
*/
void register_mmap_buffer(size_t offset, size_t size)
{
if (_map_addr == nullptr or not pageableMemoryAccessUsesHostPageTables()) { return; }

// Registered region must be within the mapped region
_reg_offset = std::max(offset, _map_offset);
_reg_size = std::min(size != 0 ? size : _map_size, (_map_offset + _map_size) - _reg_offset);

_reg_addr = static_cast<std::byte*>(_map_addr) - _map_offset + _reg_offset;
auto const result = cudaHostRegister(_reg_addr, _reg_size, cudaHostRegisterReadOnly);
if (result != cudaSuccess) {
_reg_addr = nullptr;
CUDF_LOG_WARN("cudaHostRegister failed with {} ({})",
static_cast<int>(result),
cudaGetErrorString(result));
}
}

/**
* @brief Unregisters the memory range of the mapped file.
*/
void unregister_mmap_buffer()
{
if (_reg_addr == nullptr) { return; }

auto const result = cudaHostUnregister(_reg_addr);
if (result == cudaSuccess) {
_reg_addr = nullptr;
} else {
CUDF_LOG_WARN("cudaHostUnregister failed with {} ({})",
static_cast<int>(result),
cudaGetErrorString(result));
}
}

void map(int fd, size_t offset, size_t size)
{
CUDF_EXPECTS(offset < _file.size(), "Offset is past end of file", std::overflow_error);
Expand Down Expand Up @@ -461,21 +390,16 @@ class user_datasource_wrapper : public datasource {

std::unique_ptr<datasource> datasource::create(std::string const& filepath,
size_t offset,
size_t max_size_estimate,
size_t min_size_estimate)
size_t max_size_estimate)
{
CUDF_EXPECTS(max_size_estimate == 0 or min_size_estimate <= max_size_estimate,
"Invalid min/max size estimates for datasource creation");

#ifdef CUFILE_FOUND
if (cufile_integration::is_always_enabled()) {
// avoid mmap as GDS is expected to be used for most reads
return std::make_unique<file_source>(filepath.c_str());
}
#endif
// Use our own memory mapping implementation for direct file reads
return std::make_unique<memory_mapped_source>(
filepath.c_str(), offset, max_size_estimate, min_size_estimate);
return std::make_unique<memory_mapped_source>(filepath.c_str(), offset, max_size_estimate);
}

std::unique_ptr<datasource> datasource::create(host_buffer const& buffer)
Expand Down

0 comments on commit 920a5f6

Please sign in to comment.