diff --git a/src/tree/gpu_hist/evaluate_splits.cu b/src/tree/gpu_hist/evaluate_splits.cu index 3b5c1b76fa1c..e536fc0ac90b 100644 --- a/src/tree/gpu_hist/evaluate_splits.cu +++ b/src/tree/gpu_hist/evaluate_splits.cu @@ -406,7 +406,8 @@ void GPUHistEvaluator::EvaluateSplits(Context const *ctx, const std::vector splits_out_storage(d_inputs.size()); auto out_splits = dh::ToSpan(splits_out_storage); - bool is_passive_party = is_column_split_ && collective::IsEncrypted() && collective::GetRank() != 0; + bool is_passive_party = is_column_split_ && collective::IsEncrypted() + && collective::GetRank() != 0; bool is_active_party = !is_passive_party; // Under secure vertical setting, only the active party is able to evaluate the split // based on global histogram. Other parties will receive the final best split information @@ -421,7 +422,8 @@ void GPUHistEvaluator::EvaluateSplits(Context const *ctx, const std::vector all_candidate_storage(out_splits.size() * world_size); + dh::TemporaryArray all_candidate_storage( + out_splits.size() * world_size); auto all_candidates = dh::ToSpan(all_candidate_storage); auto current_rank = all_candidates.subspan(collective::GetRank() * out_splits.size(), out_splits.size()); diff --git a/src/tree/gpu_hist/histogram.cu b/src/tree/gpu_hist/histogram.cu index d87f5cc077ac..def1b3016af3 100644 --- a/src/tree/gpu_hist/histogram.cu +++ b/src/tree/gpu_hist/histogram.cu @@ -388,11 +388,12 @@ void DeviceHistogramBuilder::BuildHistogram(Context const* ctx, common::Span ridx, common::Span histogram, GradientQuantiser rounding, MetaInfo const& info) { - - auto IsSecureVertical = !info.IsRowSplit() && collective::IsDistributed() && collective::IsEncrypted(); + auto IsSecureVertical = !info.IsRowSplit() && collective::IsDistributed() + && collective::IsEncrypted(); if (!IsSecureVertical) { // Regular training, build histogram locally - this->p_impl_->BuildHistogram(ctx->CUDACtx(), matrix, feature_groups, gpair, ridx, histogram, rounding); + this->p_impl_->BuildHistogram(ctx->CUDACtx(), matrix, feature_groups, + gpair, ridx, histogram, rounding); } else { // Encrypted vertical, build histogram using federated plugin auto const &comm = collective::GlobalCommGroup()->Ctx(ctx, DeviceOrd::CPU()); @@ -400,11 +401,12 @@ void DeviceHistogramBuilder::BuildHistogram(Context const* ctx, auto plugin = fed.EncryptionPlugin(); // Transmit matrix to plugin - if(!is_aggr_context_initialized_){ + if (!is_aggr_context_initialized_) { // Get cutptrs std::vector h_cuts_ptr(matrix.feature_segments.size()); dh::CopyDeviceSpanToVector(&h_cuts_ptr, matrix.feature_segments); - common::Span cutptrs = common::Span(h_cuts_ptr.data(), h_cuts_ptr.size()); + common::Span cutptrs = + common::Span(h_cuts_ptr.data(), h_cuts_ptr.size()); // Get bin_idx matrix auto kRows = matrix.n_rows; @@ -414,7 +416,8 @@ void DeviceHistogramBuilder::BuildHistogram(Context const* ctx, thrust::device_vector matrix_d(kRows * kCols); dh::LaunchN(kRows * kCols, ReadMatrixFunction(matrix, kCols, matrix_d.data().get())); thrust::copy(matrix_d.begin(), matrix_d.end(), h_bin_idx.begin()); - common::Span bin_idx = common::Span(h_bin_idx.data(), h_bin_idx.size()); + common::Span bin_idx = + common::Span(h_bin_idx.data(), h_bin_idx.size()); // Initialize plugin context plugin->Reset(h_cuts_ptr, h_bin_idx); @@ -443,12 +446,14 @@ void DeviceHistogramBuilder::BuildHistogram(Context const* ctx, HostDeviceVector hist_entries; std::vector recv_segments; collective::SafeColl( - collective::AllgatherV(ctx, linalg::MakeVec(hist_data), &recv_segments, &hist_entries)); + collective::AllgatherV(ctx, linalg::MakeVec(hist_data), + &recv_segments, &hist_entries)); // Call the plugin here to get the resulting histogram. Histogram from all workers are // gathered to the label owner. common::Span hist_aggr = - plugin->SyncEncryptedHistVert(common::RestoreType(hist_entries.HostSpan())); + plugin->SyncEncryptedHistVert( + common::RestoreType(hist_entries.HostSpan())); // Post process the AllGathered data auto world_size = collective::GetWorldSize(); diff --git a/tests/cpp/tree/gpu_hist/test_histogram.cu b/tests/cpp/tree/gpu_hist/test_histogram.cu index c9320f616983..f73fff58f05d 100644 --- a/tests/cpp/tree/gpu_hist/test_histogram.cu +++ b/tests/cpp/tree/gpu_hist/test_histogram.cu @@ -80,9 +80,9 @@ void TestDeterministicHistogram(bool is_dense, int shm_size, bool force_global) auto quantiser = GradientQuantiser(&ctx, gpair.DeviceSpan(), MetaInfo()); DeviceHistogramBuilder builder; builder.Reset(&ctx, feature_groups.DeviceAccessor(ctx.Device()), force_global); - builder.BuildHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(ctx.Device()), + builder.BuildHistogram(&ctx, page->GetDeviceAccessor(ctx.Device()), feature_groups.DeviceAccessor(ctx.Device()), gpair.DeviceSpan(), ridx, - d_histogram, quantiser); + d_histogram, quantiser, MetaInfo()); std::vector histogram_h(num_bins); dh::safe_cuda(cudaMemcpy(histogram_h.data(), d_histogram.data(), @@ -95,9 +95,9 @@ void TestDeterministicHistogram(bool is_dense, int shm_size, bool force_global) auto quantiser = GradientQuantiser(&ctx, gpair.DeviceSpan(), MetaInfo()); DeviceHistogramBuilder builder; builder.Reset(&ctx, feature_groups.DeviceAccessor(ctx.Device()), force_global); - builder.BuildHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(ctx.Device()), + builder.BuildHistogram(&ctx, page->GetDeviceAccessor(ctx.Device()), feature_groups.DeviceAccessor(ctx.Device()), gpair.DeviceSpan(), ridx, - d_new_histogram, quantiser); + d_new_histogram, quantiser, MetaInfo()); std::vector new_histogram_h(num_bins); dh::safe_cuda(cudaMemcpy(new_histogram_h.data(), d_new_histogram.data(), @@ -119,9 +119,9 @@ void TestDeterministicHistogram(bool is_dense, int shm_size, bool force_global) dh::device_vector baseline(num_bins); DeviceHistogramBuilder builder; builder.Reset(&ctx, single_group.DeviceAccessor(ctx.Device()), force_global); - builder.BuildHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(ctx.Device()), + builder.BuildHistogram(&ctx, page->GetDeviceAccessor(ctx.Device()), single_group.DeviceAccessor(ctx.Device()), gpair.DeviceSpan(), ridx, - dh::ToSpan(baseline), quantiser); + dh::ToSpan(baseline), quantiser, MetaInfo()); std::vector baseline_h(num_bins); dh::safe_cuda(cudaMemcpy(baseline_h.data(), baseline.data().get(), @@ -185,9 +185,9 @@ void TestGPUHistogramCategorical(size_t num_categories) { FeatureGroups single_group(page->Cuts()); DeviceHistogramBuilder builder; builder.Reset(&ctx, single_group.DeviceAccessor(ctx.Device()), false); - builder.BuildHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(ctx.Device()), + builder.BuildHistogram(&ctx, page->GetDeviceAccessor(ctx.Device()), single_group.DeviceAccessor(ctx.Device()), gpair.DeviceSpan(), ridx, - dh::ToSpan(cat_hist), quantiser); + dh::ToSpan(cat_hist), quantiser, MetaInfo()); } /** @@ -201,9 +201,9 @@ void TestGPUHistogramCategorical(size_t num_categories) { FeatureGroups single_group(page->Cuts()); DeviceHistogramBuilder builder; builder.Reset(&ctx, single_group.DeviceAccessor(ctx.Device()), false); - builder.BuildHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(ctx.Device()), + builder.BuildHistogram(&ctx, page->GetDeviceAccessor(ctx.Device()), single_group.DeviceAccessor(ctx.Device()), gpair.DeviceSpan(), ridx, - dh::ToSpan(encode_hist), quantiser); + dh::ToSpan(encode_hist), quantiser, MetaInfo()); } std::vector h_cat_hist(cat_hist.size()); @@ -350,9 +350,9 @@ class HistogramExternalMemoryTest : public ::testing::TestWithParamDeviceAccessor(ctx.Device()), force_global); - builder.BuildHistogram(ctx.CUDACtx(), impl->GetDeviceAccessor(ctx.Device()), + builder.BuildHistogram(&ctx, impl->GetDeviceAccessor(ctx.Device()), fg->DeviceAccessor(ctx.Device()), gpair.ConstDeviceSpan(), ridx, - d_histogram, quantiser); + d_histogram, quantiser, MetaInfo()); ++k; } ASSERT_EQ(k, n_batches); @@ -373,9 +373,9 @@ class HistogramExternalMemoryTest : public ::testing::TestWithParamDeviceAccessor(ctx.Device()), force_global); - builder.BuildHistogram(ctx.CUDACtx(), page.GetDeviceAccessor(ctx.Device()), + builder.BuildHistogram(&ctx, page.GetDeviceAccessor(ctx.Device()), fg->DeviceAccessor(ctx.Device()), gpair.ConstDeviceSpan(), ridx, - d_histogram, quantiser); + d_histogram, quantiser, MetaInfo()); } std::vector h_single(single_hist.size()); diff --git a/tests/cpp/tree/test_gpu_hist.cu b/tests/cpp/tree/test_gpu_hist.cu index edd129353bdf..454d05d14df1 100644 --- a/tests/cpp/tree/test_gpu_hist.cu +++ b/tests/cpp/tree/test_gpu_hist.cu @@ -79,10 +79,10 @@ void TestBuildHist(bool use_shared_memory_histograms) { DeviceHistogramBuilder builder; builder.Reset(&ctx, maker.feature_groups->DeviceAccessor(ctx.Device()), !use_shared_memory_histograms); - builder.BuildHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(ctx.Device()), + builder.BuildHistogram(&ctx, page->GetDeviceAccessor(ctx.Device()), maker.feature_groups->DeviceAccessor(ctx.Device()), gpair.DeviceSpan(), maker.row_partitioner->GetRows(0), maker.hist.GetNodeHistogram(0), - *maker.quantiser); + *maker.quantiser, MetaInfo()); DeviceHistogramStorage<>& d_hist = maker.hist;