-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[CUDA] L2 regression objective for cuda_exp (#5452)
* add (l2) regression objective for cuda_exp * fix lint errors * correct time tag
- Loading branch information
Showing
7 changed files
with
298 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
/*! | ||
* Copyright (c) 2021 Microsoft Corporation. All rights reserved. | ||
* Licensed under the MIT License. See LICENSE file in the project root for | ||
* license information. | ||
*/ | ||
|
||
#ifdef USE_CUDA_EXP | ||
|
||
#include "cuda_regression_objective.hpp" | ||
|
||
#include <string> | ||
#include <vector> | ||
|
||
namespace LightGBM { | ||
|
||
CUDARegressionL2loss::CUDARegressionL2loss(const Config& config): | ||
RegressionL2loss(config) { | ||
cuda_block_buffer_ = nullptr; | ||
cuda_trans_label_ = nullptr; | ||
} | ||
|
||
CUDARegressionL2loss::CUDARegressionL2loss(const std::vector<std::string>& strs): | ||
RegressionL2loss(strs) {} | ||
|
||
CUDARegressionL2loss::~CUDARegressionL2loss() { | ||
DeallocateCUDAMemory(&cuda_block_buffer_, __FILE__, __LINE__); | ||
DeallocateCUDAMemory(&cuda_trans_label_, __FILE__, __LINE__); | ||
} | ||
|
||
void CUDARegressionL2loss::Init(const Metadata& metadata, data_size_t num_data) { | ||
RegressionL2loss::Init(metadata, num_data); | ||
cuda_labels_ = metadata.cuda_metadata()->cuda_label(); | ||
cuda_weights_ = metadata.cuda_metadata()->cuda_weights(); | ||
num_get_gradients_blocks_ = (num_data_ + GET_GRADIENTS_BLOCK_SIZE_REGRESSION - 1) / GET_GRADIENTS_BLOCK_SIZE_REGRESSION; | ||
AllocateCUDAMemory<double>(&cuda_block_buffer_, static_cast<size_t>(num_get_gradients_blocks_), __FILE__, __LINE__); | ||
if (sqrt_) { | ||
InitCUDAMemoryFromHostMemory<label_t>(&cuda_trans_label_, trans_label_.data(), trans_label_.size(), __FILE__, __LINE__); | ||
cuda_labels_ = cuda_trans_label_; | ||
} | ||
} | ||
|
||
void CUDARegressionL2loss::GetGradients(const double* score, score_t* gradients, score_t* hessians) const { | ||
LaunchGetGradientsKernel(score, gradients, hessians); | ||
} | ||
|
||
double CUDARegressionL2loss::BoostFromScore(int) const { | ||
return LaunchCalcInitScoreKernel(); | ||
} | ||
|
||
void CUDARegressionL2loss::ConvertOutputCUDA(const data_size_t num_data, const double* input, double* output) const { | ||
LaunchConvertOutputCUDAKernel(num_data, input, output); | ||
} | ||
|
||
void CUDARegressionL2loss::RenewTreeOutputCUDA( | ||
const double* score, | ||
const data_size_t* data_indices_in_leaf, | ||
const data_size_t* num_data_in_leaf, | ||
const data_size_t* data_start_in_leaf, | ||
const int num_leaves, | ||
double* leaf_value) const { | ||
global_timer.Start("CUDARegressionL2loss::LaunchRenewTreeOutputCUDAKernel"); | ||
LaunchRenewTreeOutputCUDAKernel(score, data_indices_in_leaf, num_data_in_leaf, data_start_in_leaf, num_leaves, leaf_value); | ||
SynchronizeCUDADevice(__FILE__, __LINE__); | ||
global_timer.Stop("CUDARegressionL2loss::LaunchRenewTreeOutputCUDAKernel"); | ||
} | ||
|
||
|
||
} // namespace LightGBM | ||
|
||
#endif // USE_CUDA_EXP |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
/*! | ||
* Copyright (c) 2021 Microsoft Corporation. All rights reserved. | ||
* Licensed under the MIT License. See LICENSE file in the project root for | ||
* license information. | ||
*/ | ||
|
||
#ifdef USE_CUDA_EXP | ||
|
||
#include "cuda_regression_objective.hpp" | ||
#include <LightGBM/cuda/cuda_algorithms.hpp> | ||
|
||
namespace LightGBM { | ||
|
||
double CUDARegressionL2loss::LaunchCalcInitScoreKernel() const { | ||
double label_sum = 0.0f, weight_sum = 0.0f; | ||
if (cuda_weights_ == nullptr) { | ||
ShuffleReduceSumGlobal<label_t, double>(cuda_labels_, static_cast<size_t>(num_data_), cuda_block_buffer_); | ||
CopyFromCUDADeviceToHost<double>(&label_sum, cuda_block_buffer_, 1, __FILE__, __LINE__); | ||
weight_sum = static_cast<double>(num_data_); | ||
} else { | ||
ShuffleReduceDotProdGlobal<label_t, double>(cuda_labels_, cuda_weights_, static_cast<size_t>(num_data_), cuda_block_buffer_); | ||
CopyFromCUDADeviceToHost<double>(&label_sum, cuda_block_buffer_, 1, __FILE__, __LINE__); | ||
ShuffleReduceSumGlobal<label_t, double>(cuda_weights_, static_cast<size_t>(num_data_), cuda_block_buffer_); | ||
CopyFromCUDADeviceToHost<double>(&weight_sum, cuda_block_buffer_, 1, __FILE__, __LINE__); | ||
} | ||
return label_sum / weight_sum; | ||
} | ||
|
||
__global__ void ConvertOutputCUDAKernel_Regression(const bool sqrt, const data_size_t num_data, const double* input, double* output) { | ||
const int data_index = static_cast<data_size_t>(blockIdx.x * blockDim.x + threadIdx.x); | ||
if (data_index < num_data) { | ||
if (sqrt) { | ||
const double sign = input[data_index] >= 0.0f ? 1 : -1; | ||
output[data_index] = sign * input[data_index] * input[data_index]; | ||
} else { | ||
output[data_index] = input[data_index]; | ||
} | ||
} | ||
} | ||
|
||
void CUDARegressionL2loss::LaunchConvertOutputCUDAKernel(const data_size_t num_data, const double* input, double* output) const { | ||
const int num_blocks = (num_data + GET_GRADIENTS_BLOCK_SIZE_REGRESSION - 1) / GET_GRADIENTS_BLOCK_SIZE_REGRESSION; | ||
ConvertOutputCUDAKernel_Regression<<<num_blocks, GET_GRADIENTS_BLOCK_SIZE_REGRESSION>>>(sqrt_, num_data, input, output); | ||
} | ||
|
||
template <bool USE_WEIGHT> | ||
__global__ void GetGradientsKernel_RegressionL2(const double* cuda_scores, const label_t* cuda_labels, const label_t* cuda_weights, const data_size_t num_data, | ||
score_t* cuda_out_gradients, score_t* cuda_out_hessians) { | ||
const data_size_t data_index = static_cast<data_size_t>(blockDim.x * blockIdx.x + threadIdx.x); | ||
if (data_index < num_data) { | ||
if (!USE_WEIGHT) { | ||
cuda_out_gradients[data_index] = static_cast<score_t>(cuda_scores[data_index] - cuda_labels[data_index]); | ||
cuda_out_hessians[data_index] = 1.0f; | ||
} else { | ||
const score_t weight = static_cast<score_t>(cuda_weights[data_index]); | ||
cuda_out_gradients[data_index] = static_cast<score_t>(cuda_scores[data_index] - cuda_labels[data_index]) * weight; | ||
cuda_out_hessians[data_index] = weight; | ||
} | ||
} | ||
} | ||
|
||
void CUDARegressionL2loss::LaunchGetGradientsKernel(const double* score, score_t* gradients, score_t* hessians) const { | ||
const int num_blocks = (num_data_ + GET_GRADIENTS_BLOCK_SIZE_REGRESSION - 1) / GET_GRADIENTS_BLOCK_SIZE_REGRESSION; | ||
if (cuda_weights_ == nullptr) { | ||
GetGradientsKernel_RegressionL2<false><<<num_blocks, GET_GRADIENTS_BLOCK_SIZE_REGRESSION>>>(score, cuda_labels_, nullptr, num_data_, gradients, hessians); | ||
} else { | ||
GetGradientsKernel_RegressionL2<true><<<num_blocks, GET_GRADIENTS_BLOCK_SIZE_REGRESSION>>>(score, cuda_labels_, cuda_weights_, num_data_, gradients, hessians); | ||
} | ||
} | ||
|
||
|
||
} // namespace LightGBM | ||
|
||
#endif // USE_CUDA_EXP |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
/*! | ||
* Copyright (c) 2021 Microsoft Corporation. All rights reserved. | ||
* Licensed under the MIT License. See LICENSE file in the project root for | ||
* license information. | ||
*/ | ||
|
||
#ifndef LIGHTGBM_NEW_CUDA_REGRESSION_OBJECTIVE_HPP_ | ||
#define LIGHTGBM_NEW_CUDA_REGRESSION_OBJECTIVE_HPP_ | ||
|
||
#ifdef USE_CUDA_EXP | ||
|
||
#define GET_GRADIENTS_BLOCK_SIZE_REGRESSION (1024) | ||
|
||
#include <LightGBM/cuda/cuda_objective_function.hpp> | ||
|
||
#include <string> | ||
#include <vector> | ||
|
||
#include "../regression_objective.hpp" | ||
|
||
namespace LightGBM { | ||
|
||
class CUDARegressionL2loss : public CUDAObjectiveInterface, public RegressionL2loss { | ||
public: | ||
explicit CUDARegressionL2loss(const Config& config); | ||
|
||
explicit CUDARegressionL2loss(const std::vector<std::string>& strs); | ||
|
||
~CUDARegressionL2loss(); | ||
|
||
void Init(const Metadata& metadata, data_size_t num_data) override; | ||
|
||
void GetGradients(const double* score, score_t* gradients, score_t* hessians) const override; | ||
|
||
void ConvertOutputCUDA(const data_size_t num_data, const double* input, double* output) const override; | ||
|
||
double BoostFromScore(int) const override; | ||
|
||
void RenewTreeOutputCUDA(const double* score, const data_size_t* data_indices_in_leaf, const data_size_t* num_data_in_leaf, | ||
const data_size_t* data_start_in_leaf, const int num_leaves, double* leaf_value) const override; | ||
|
||
std::function<void(data_size_t, const double*, double*)> GetCUDAConvertOutputFunc() const override { | ||
return [this] (data_size_t num_data, const double* input, double* output) { | ||
ConvertOutputCUDA(num_data, input, output); | ||
}; | ||
} | ||
|
||
bool IsConstantHessian() const override { | ||
if (cuda_weights_ == nullptr) { | ||
return true; | ||
} else { | ||
return false; | ||
} | ||
} | ||
|
||
bool IsCUDAObjective() const override { return true; } | ||
|
||
protected: | ||
virtual double LaunchCalcInitScoreKernel() const; | ||
|
||
virtual void LaunchGetGradientsKernel(const double* score, score_t* gradients, score_t* hessians) const; | ||
|
||
virtual void LaunchConvertOutputCUDAKernel(const data_size_t num_data, const double* input, double* output) const; | ||
|
||
virtual void LaunchRenewTreeOutputCUDAKernel( | ||
const double* /*score*/, const data_size_t* /*data_indices_in_leaf*/, const data_size_t* /*num_data_in_leaf*/, | ||
const data_size_t* /*data_start_in_leaf*/, const int /*num_leaves*/, double* /*leaf_value*/) const {} | ||
|
||
const label_t* cuda_labels_; | ||
const label_t* cuda_weights_; | ||
label_t* cuda_trans_label_; | ||
double* cuda_block_buffer_; | ||
data_size_t num_get_gradients_blocks_; | ||
data_size_t num_init_score_blocks_; | ||
}; | ||
|
||
|
||
} // namespace LightGBM | ||
|
||
#endif // USE_CUDA_EXP | ||
#endif // LIGHTGBM_NEW_CUDA_REGRESSION_OBJECTIVE_HPP_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters