Skip to content

Commit

Permalink
Custom error messages for IO with nonexistent files (#14662)
Browse files Browse the repository at this point in the history
Closes #12311; closes #9564
We return a somewhat cryptic error when opening a file that does not exist: "Cannot query file size".

With this change, we report whether the file exists, or, if the file does exist, what the errno value is after `open`.
Also added a check for the output files' directory in `file_sink`. This check is now also included in `file_wrapper`, just in case initialization order changes at some point.
Now we should always correctly report missing output file directory and missing input files.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Karthikeyan (https://github.com/karthikeyann)

URL: #14662
  • Loading branch information
vuule authored Jan 5, 2024
1 parent 4de4aae commit 0c98134
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 17 deletions.
5 changes: 3 additions & 2 deletions cpp/src/io/utilities/data_sink.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -26,6 +26,7 @@

namespace cudf {
namespace io {

/**
* @brief Implementation class for storing data into a local file.
*/
Expand All @@ -34,7 +35,7 @@ class file_sink : public data_sink {
explicit file_sink(std::string const& filepath)
{
_output_stream.open(filepath, std::ios::out | std::ios::binary | std::ios::trunc);
CUDF_EXPECTS(_output_stream.is_open(), "Cannot open output file");
if (!_output_stream.is_open()) { detail::throw_on_file_open_failure(filepath, true); }

if (detail::cufile_integration::is_kvikio_enabled()) {
_kvikio_file = kvikio::FileHandle(filepath, "w");
Expand Down
43 changes: 33 additions & 10 deletions cpp/src/io/utilities/file_io_utilities.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -13,38 +13,61 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "file_io_utilities.hpp"
#include <cudf/detail/utilities/integer_utils.hpp>
#include <io/utilities/config_utils.hpp>

#include <rmm/device_buffer.hpp>

#include <dlfcn.h>
#include <errno.h>
#include <string.h>

#include <filesystem>
#include <fstream>
#include <numeric>

namespace cudf {
namespace io {
namespace detail {

size_t get_file_size(int file_descriptor)
[[noreturn]] void throw_on_file_open_failure(std::string const& filepath, bool is_create)
{
struct stat st;
CUDF_EXPECTS(fstat(file_descriptor, &st) != -1, "Cannot query file size");
return static_cast<size_t>(st.st_size);
// save errno because it may be overwritten by subsequent calls
auto const err = errno;

if (auto const path = std::filesystem::path(filepath); is_create) {
CUDF_EXPECTS(std::filesystem::exists(path.parent_path()),
"Cannot create output file; directory does not exist");

} else {
CUDF_EXPECTS(std::filesystem::exists(path), "Cannot open file; it does not exist");
}

std::array<char, 1024> error_msg_buffer;
auto const error_msg = strerror_r(err, error_msg_buffer.data(), 1024);
CUDF_FAIL("Cannot open file; failed with errno: " + std::string{error_msg});
}

file_wrapper::file_wrapper(std::string const& filepath, int flags)
: fd(open(filepath.c_str(), flags)), _size{get_file_size(fd)}
[[nodiscard]] int open_file_checked(std::string const& filepath, int flags, mode_t mode)
{
CUDF_EXPECTS(fd != -1, "Cannot open file " + filepath);
auto const fd = open(filepath.c_str(), flags, mode);
if (fd == -1) { throw_on_file_open_failure(filepath, flags & O_CREAT); }

return fd;
}

[[nodiscard]] size_t get_file_size(int file_descriptor)
{
struct stat st;
CUDF_EXPECTS(fstat(file_descriptor, &st) != -1, "Cannot query file size");
return static_cast<size_t>(st.st_size);
}

file_wrapper::file_wrapper(std::string const& filepath, int flags, mode_t mode)
: fd(open(filepath.c_str(), flags, mode)), _size{get_file_size(fd)}
: fd(open_file_checked(filepath.c_str(), flags, mode)), _size{get_file_size(fd)}
{
CUDF_EXPECTS(fd != -1, "Cannot open file " + filepath);
}

file_wrapper::~file_wrapper() { close(fd); }
Expand Down
11 changes: 6 additions & 5 deletions cpp/src/io/utilities/file_io_utilities.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -34,16 +34,17 @@ namespace cudf {
namespace io {
namespace detail {

[[noreturn]] void throw_on_file_open_failure(std::string const& filepath, bool is_create);

/**
* @brief Class that provides RAII for file handling.
*/
class file_wrapper {
int fd = -1;
size_t _size;
int fd = -1;
size_t _size = 0;

public:
explicit file_wrapper(std::string const& filepath, int flags);
explicit file_wrapper(std::string const& filepath, int flags, mode_t mode);
explicit file_wrapper(std::string const& filepath, int flags, mode_t mode = 0);
~file_wrapper();
[[nodiscard]] auto size() const { return _size; }
[[nodiscard]] auto desc() const { return fd; }
Expand Down

0 comments on commit 0c98134

Please sign in to comment.