diff --git a/cpp/benchmarks/io/csv/csv_reader_input.cpp b/cpp/benchmarks/io/csv/csv_reader_input.cpp
index 6216a9ecec2..e47a8593aac 100644
--- a/cpp/benchmarks/io/csv/csv_reader_input.cpp
+++ b/cpp/benchmarks/io/csv/csv_reader_input.cpp
@@ -45,6 +45,7 @@ void csv_read_common(DataType const& data_types,
   cudf::io::csv_reader_options const read_options =
     cudf::io::csv_reader_options::builder(source_sink.make_source_info());
 
+  auto checker                = benchmark_roundtrip_checker{options.get_table()};
   auto const mem_stats_logger = cudf::memory_stats_logger();  // init stats logger
   state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer,
@@ -52,8 +53,9 @@ void csv_read_common(DataType const& data_types,
                try_drop_l3_cache();  // Drop L3 cache for accurate measurement
 
                timer.start();
-               cudf::io::read_csv(read_options);
+               auto const res = cudf::io::read_csv(read_options);
                timer.stop();
+               checker.check_once(res.tbl->view());
              });
 
   auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
diff --git a/cpp/benchmarks/io/csv/csv_reader_options.cpp b/cpp/benchmarks/io/csv/csv_reader_options.cpp
index 93ef5bed774..f6ea1f70578 100644
--- a/cpp/benchmarks/io/csv/csv_reader_options.cpp
+++ b/cpp/benchmarks/io/csv/csv_reader_options.cpp
@@ -62,7 +62,12 @@ void BM_csv_read_varying_options(
 
   size_t const chunk_size             = source_sink.size() / num_chunks;
   cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks;
-  auto const mem_stats_logger         = cudf::memory_stats_logger();
+
+  std::optional<benchmark_roundtrip_checker> checker;
+  if (ColSelection == column_selection::ALL and RowSelection == row_selection::ALL) {
+    checker = benchmark_roundtrip_checker{options.get_table()};
+  }
+  auto const mem_stats_logger = cudf::memory_stats_logger();
   state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer,
              [&](nvbench::launch& launch, auto& timer) {
@@ -94,9 +99,11 @@ void BM_csv_read_varying_options(
                    default: CUDF_FAIL("Unsupported row selection method");
                  }
 
-                 cudf::io::read_csv(read_options);
+                 auto const res = cudf::io::read_csv(read_options);
+                 if (is_last_chunk) { timer.stop(); }
+
+                 if (checker.has_value()) { checker->check_once(res.tbl->view()); }
                }
-               timer.stop();
              });
 
   auto const elapsed_time   = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp
index 6b8af91b842..ce05d1e41c4 100644
--- a/cpp/benchmarks/io/cuio_common.cpp
+++ b/cpp/benchmarks/io/cuio_common.cpp
@@ -16,6 +16,7 @@
 
 #include <benchmarks/io/cuio_common.hpp>
 #include <cudf/detail/utilities/logger.hpp>
+#include <cudf_test/column_utilities.hpp>
 
 #include <cstdio>
 #include <fstream>
@@ -199,3 +200,42 @@ void try_drop_l3_cache()
                            [](auto& cmd) { return exec_cmd(cmd).empty(); }),
                "Failed to execute the drop cache command");
 }
+
+[[nodiscard]] cudf::test::debug_output_level get_env_verbosity()
+{
+  static auto const env_val = getenv("CUDF_BENCH_OUTPUT_DIFF");
+  if (env_val == nullptr) { return cudf::test::debug_output_level::QUIET; }
+
+  auto const env_verbosity = std::string{env_val};
+  if (env_verbosity == "FIRST_ERROR") { return cudf::test::debug_output_level::FIRST_ERROR; }
+  if (env_verbosity == "ALL_ERRORS") { return cudf::test::debug_output_level::ALL_ERRORS; }
+  CUDF_FAIL("Invalid CUDF_BENCH_OUTPUT_DIFF value: " + env_verbosity);
+}
+
+void benchmark_roundtrip_checker::check_once(cudf::table_view const& output_table)
+{
+  if (is_checked) { return; }
+  is_checked = true;
+
+  if (input_table.num_columns() != output_table.num_columns()) {
+    CUDF_LOG_WARN("Number of columns mismatch");
+    return;
+  }
+  if (input_table.num_rows() != output_table.num_rows()) {
+    CUDF_LOG_WARN("Number of rows mismatch");
+    return;
+  }
+
+  auto const verbosity = get_env_verbosity();
+  auto const output_matches =
+    std::all_of(thrust::make_counting_iterator(0),
+                thrust::make_counting_iterator(input_table.num_columns()),
+                [&](auto i) {
+                  return cudf::test::detail::expect_columns_equal(
+                    input_table.column(i), output_table.column(i), verbosity);
+                });
+
+  if (not output_matches and verbosity == cudf::test::debug_output_level::QUIET) {
+    CUDF_LOG_WARN("Data (type) mismatch; set `CUDF_BENCH_OUTPUT_DIFF` for more details");
+  }
+}
diff --git a/cpp/benchmarks/io/cuio_common.hpp b/cpp/benchmarks/io/cuio_common.hpp
index 34adae30505..a443f4a3e9c 100644
--- a/cpp/benchmarks/io/cuio_common.hpp
+++ b/cpp/benchmarks/io/cuio_common.hpp
@@ -138,3 +138,29 @@ std::vector<cudf::size_type> segments_in_chunk(int num_segments, int num_chunks,
  * @throw cudf::logic_error if the environment variable is set and the command fails
  */
 void try_drop_l3_cache();
+
+/**
+ * @brief Checks if the benchmark output is equal to the input.
+ */
+class benchmark_roundtrip_checker {
+ public:
+  explicit benchmark_roundtrip_checker(cudf::table_view const& input_table)
+    : input_table{input_table}
+  {
+  }
+
+  /**
+   * @brief Checks if the output table is equal to the input table.
+   *
+   * Performs the check only once. Subsequent calls are no-ops.
+   * Logs a warning if the tables are not equal. Setting `CUDF_BENCH_OUTPUT_DIFF` environment
+   * variable enables printing the difference to stdout.
+   *
+   * @param output_table The output table to check against stored `input_table`
+   */
+  void check_once(cudf::table_view const& output_table);
+
+ private:
+  bool is_checked = false;
+  cudf::table_view input_table;
+};
diff --git a/cpp/benchmarks/io/json/json_reader_input.cpp b/cpp/benchmarks/io/json/json_reader_input.cpp
index 31bb5dafa88..ecddab91b79 100644
--- a/cpp/benchmarks/io/json/json_reader_input.cpp
+++ b/cpp/benchmarks/io/json/json_reader_input.cpp
@@ -29,11 +29,16 @@
 constexpr size_t data_size         = 512 << 20;
 constexpr cudf::size_type num_cols = 64;
 
-void json_read_common(cuio_source_sink_pair& source_sink, nvbench::state& state)
+void json_read_common(cudf::io::json_writer_options const& write_opts,
+                      cuio_source_sink_pair& source_sink,
+                      nvbench::state& state)
 {
+  cudf::io::write_json(write_opts);
+
   cudf::io::json_reader_options read_opts =
     cudf::io::json_reader_options::builder(source_sink.make_source_info());
 
+  auto checker          = benchmark_roundtrip_checker{write_opts.get_table()};
   auto mem_stats_logger = cudf::memory_stats_logger();
   state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer,
@@ -41,8 +46,9 @@ void json_read_common(cuio_source_sink_pair& source_sink, nvbench::state& state)
                try_drop_l3_cache();
 
                timer.start();
-               cudf::io::read_json(read_opts);
+               auto const res = cudf::io::read_json(read_opts);
                timer.stop();
+               checker.check_once(res.tbl->view());
              });
 
   auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
@@ -67,19 +73,16 @@ void BM_json_read_io(nvbench::state& state, nvbench::type_list<nvbench::enum_typ
   auto const source_type = IO;
   cuio_source_sink_pair source_sink(source_type);
 
-  {
-    auto const tbl = create_random_table(
-      cycle_dtypes(d_type, num_cols), table_size_bytes{data_size}, data_profile_builder());
-    auto const view = tbl->view();
+  auto const tbl = create_random_table(
+    cycle_dtypes(d_type, num_cols), table_size_bytes{data_size}, data_profile_builder());
+  auto const view = tbl->view();
 
-    cudf::io::json_writer_options const write_opts =
-      cudf::io::json_writer_options::builder(source_sink.make_sink_info(), view)
-        .na_rep("null")
-        .rows_per_chunk(100'000);
-    cudf::io::write_json(write_opts);
-  }
+  cudf::io::json_writer_options const write_opts =
+    cudf::io::json_writer_options::builder(source_sink.make_sink_info(), view)
+      .na_rep("null")
+      .rows_per_chunk(100'000);
 
-  json_read_common(source_sink, state);
+  json_read_common(write_opts, source_sink, state);
 }
 
 template <data_type DataType, cudf::io::io_type IO>
@@ -89,18 +92,17 @@ void BM_json_read_data_type(
   auto const d_type      = get_type_or_group(static_cast<int32_t>(DataType));
   auto const source_type = IO;
   cuio_source_sink_pair source_sink(source_type);
-  {
-    auto const tbl = create_random_table(
-      cycle_dtypes(d_type, num_cols), table_size_bytes{data_size}, data_profile_builder());
-    auto const view = tbl->view();
-
-    cudf::io::json_writer_options const write_opts =
-      cudf::io::json_writer_options::builder(source_sink.make_sink_info(), view)
-        .na_rep("null")
-        .rows_per_chunk(100'000);
-    cudf::io::write_json(write_opts);
-  }
-  json_read_common(source_sink, state);
+
+  auto const tbl = create_random_table(
+    cycle_dtypes(d_type, num_cols), table_size_bytes{data_size}, data_profile_builder());
+  auto const view = tbl->view();
+
+  cudf::io::json_writer_options const write_opts =
+    cudf::io::json_writer_options::builder(source_sink.make_sink_info(), view)
+      .na_rep("null")
+      .rows_per_chunk(100'000);
+
+  json_read_common(write_opts, source_sink, state);
 }
 
 using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL,
diff --git a/cpp/benchmarks/io/orc/orc_reader_input.cpp b/cpp/benchmarks/io/orc/orc_reader_input.cpp
index b6e15fb3923..3053a8f1684 100644
--- a/cpp/benchmarks/io/orc/orc_reader_input.cpp
+++ b/cpp/benchmarks/io/orc/orc_reader_input.cpp
@@ -38,6 +38,7 @@ void orc_read_common(cudf::io::orc_writer_options const& opts,
   cudf::io::orc_reader_options read_opts =
     cudf::io::orc_reader_options::builder(source_sink.make_source_info());
 
+  auto checker          = benchmark_roundtrip_checker{opts.get_table()};
   auto mem_stats_logger = cudf::memory_stats_logger();  // init stats logger
   state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer,
@@ -45,8 +46,9 @@ void orc_read_common(cudf::io::orc_writer_options const& opts,
                try_drop_l3_cache();
 
                timer.start();
-               cudf::io::read_orc(read_opts);
+               auto const res = cudf::io::read_orc(read_opts);
                timer.stop();
+               checker.check_once(res.tbl->view());
              });
 
   auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
diff --git a/cpp/benchmarks/io/orc/orc_reader_options.cpp b/cpp/benchmarks/io/orc/orc_reader_options.cpp
index 647a411c89d..a7404025a76 100644
--- a/cpp/benchmarks/io/orc/orc_reader_options.cpp
+++ b/cpp/benchmarks/io/orc/orc_reader_options.cpp
@@ -90,6 +90,10 @@ void BM_orc_read_varying_options(nvbench::state& state,
     cudf::io::read_orc_metadata(source_sink.make_source_info()).num_stripes();
   cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks;
 
+  std::optional<benchmark_roundtrip_checker> checker;
+  if (ColSelection == column_selection::ALL and RowSelection == row_selection::ALL) {
+    checker = benchmark_roundtrip_checker{options.get_table()};
+  }
   auto mem_stats_logger = cudf::memory_stats_logger();
   state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(
@@ -113,11 +117,14 @@ void BM_orc_read_varying_options(nvbench::state& state,
           default: CUDF_FAIL("Unsupported row selection method");
         }
 
-        rows_read += cudf::io::read_orc(read_options).tbl->num_rows();
+        auto const res = cudf::io::read_orc(read_options);
+        if (is_last_chunk) { timer.stop(); }
+
+        rows_read += res.tbl->num_rows();
+        if (checker.has_value()) { checker->check_once(res.tbl->view()); }
       }
 
       CUDF_EXPECTS(rows_read == view.num_rows(), "Benchmark did not read the entire table");
-      timer.stop();
     });
 
   auto const elapsed_time   = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
diff --git a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp
index 80303ea04af..682c897b24a 100644
--- a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp
@@ -38,6 +38,7 @@ void parquet_read_common(cudf::io::parquet_writer_options const& write_opts,
   cudf::io::parquet_reader_options read_opts =
     cudf::io::parquet_reader_options::builder(source_sink.make_source_info());
 
+  auto checker          = benchmark_roundtrip_checker{write_opts.get_table()};
   auto mem_stats_logger = cudf::memory_stats_logger();
   state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer,
@@ -45,8 +46,9 @@ void parquet_read_common(cudf::io::parquet_writer_options const& write_opts,
                try_drop_l3_cache();
 
                timer.start();
-               cudf::io::read_parquet(read_opts);
+               auto const res = cudf::io::read_parquet(read_opts);
                timer.stop();
+               checker.check_once(res.tbl->view());
              });
 
   auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
diff --git a/cpp/benchmarks/io/parquet/parquet_reader_options.cpp b/cpp/benchmarks/io/parquet/parquet_reader_options.cpp
index 4105f2182d7..c33b7919a8d 100644
--- a/cpp/benchmarks/io/parquet/parquet_reader_options.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_reader_options.cpp
@@ -91,6 +91,10 @@ void BM_parquet_read_options(nvbench::state& state,
   auto constexpr num_row_groups = data_size / row_group_size;
   auto constexpr num_chunks     = 1;
 
+  std::optional<benchmark_roundtrip_checker> checker;
+  if (ColSelection == column_selection::ALL and RowSelection == row_selection::ALL) {
+    checker = benchmark_roundtrip_checker{options.get_table()};
+  }
   auto mem_stats_logger = cudf::memory_stats_logger();
   state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(
@@ -115,11 +119,14 @@ void BM_parquet_read_options(nvbench::state& state,
           default: CUDF_FAIL("Unsupported row selection method");
         }
 
-        rows_read += cudf::io::read_parquet(read_options).tbl->num_rows();
+        auto const res = cudf::io::read_parquet(read_options);
+        if (is_last_chunk) { timer.stop(); }
+
+        rows_read += res.tbl->num_rows();
+        if (checker.has_value()) { checker->check_once(res.tbl->view()); }
       }
 
       CUDF_EXPECTS(rows_read == view.num_rows(), "Benchmark did not read the entire table");
-      timer.stop();
     });
 
   auto const elapsed_time   = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");