Skip to content

Commit

Permalink
initial work on precision.
Browse files Browse the repository at this point in the history
  • Loading branch information
trivialfis committed May 30, 2023
1 parent 0462569 commit c3a15c2
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 38 deletions.
74 changes: 51 additions & 23 deletions src/metric/rank_metric.cc
Original file line number Diff line number Diff line change
Expand Up @@ -253,23 +253,6 @@ struct EvalRank : public MetricNoCache, public EvalRankConfig {
virtual double EvalGroup(PredIndPairContainer *recptr) const = 0;
};

/*! \brief Precision at N, for both classification and rank */
struct EvalPrecision : public EvalRank {
public:
explicit EvalPrecision(const char* name, const char* param) : EvalRank(name, param) {}

double EvalGroup(PredIndPairContainer *recptr) const override {
PredIndPairContainer &rec(*recptr);
// calculate Precision
std::stable_sort(rec.begin(), rec.end(), common::CmpFirst);
unsigned nhit = 0;
for (size_t j = 0; j < rec.size() && j < this->topn; ++j) {
nhit += (rec[j].second != 0);
}
return static_cast<double>(nhit) / this->topn;
}
};

/*! \brief Cox: Partial likelihood of the Cox proportional hazards model */
struct EvalCox : public MetricNoCache {
public:
Expand Down Expand Up @@ -321,10 +304,6 @@ XGBOOST_REGISTER_METRIC(AMS, "ams")
.describe("AMS metric for higgs.")
.set_body([](const char* param) { return new EvalAMS(param); });

XGBOOST_REGISTER_METRIC(Precision, "pre")
.describe("precision@k for rank.")
.set_body([](const char* param) { return new EvalPrecision("pre", param); });

XGBOOST_REGISTER_METRIC(Cox, "cox-nloglik")
.describe("Negative log partial likelihood of Cox proportional hazards model.")
.set_body([](const char*) { return new EvalCox(); });
Expand Down Expand Up @@ -387,6 +366,8 @@ class EvalRankWithCache : public Metric {
return result;
}

[[nodiscard]] const char* Name() const override { return name_.c_str(); }

virtual double Eval(HostDeviceVector<float> const& preds, MetaInfo const& info,
std::shared_ptr<Cache> p_cache) = 0;
};
Expand All @@ -408,6 +389,51 @@ double Finalize(MetaInfo const& info, double score, double sw) {
}
} // namespace

class EvalPrecision : public EvalRankWithCache<ltr::MAPCache> {
public:
using EvalRankWithCache::EvalRankWithCache;
double Eval(HostDeviceVector<float> const& predt, MetaInfo const& info,
std::shared_ptr<ltr::MAPCache> p_cache) final {
// Fixme: check whether minus is applicable here.
if (ctx_->IsCUDA()) {
auto pre = cuda_impl::PreScore(ctx_, info, predt, minus_, p_cache);
return Finalize(info, pre.Residue(), pre.Weights());
}

auto gptr = p_cache->DataGroupPtr(ctx_);
auto h_label = info.labels.HostView().Slice(linalg::All(), 0);
auto h_predt = linalg::MakeTensorView(ctx_, &predt, predt.Size());
auto rank_idx = p_cache->SortedIdx(ctx_, predt.ConstHostSpan());

auto pre = p_cache->Map(ctx_);
auto topk = p_cache->Param().TopK();

common::ParallelFor(p_cache->Groups(), ctx_->Threads(), [&](auto g) {
auto g_label = h_label.Slice(linalg::Range(gptr[g], gptr[g + 1]));
auto g_rank = rank_idx.subspan(gptr[g]);

auto n = std::min(static_cast<std::size_t>(param_.TopK()), g_label.Size());
double n_hits{0.0};
for (std::size_t i = 0; i < n; ++i) {
n_hits += g_label(g_rank[i]);
}
pre[g] = n_hits / topk;
});

auto sw = 0.0;
auto weight = common::MakeOptionalWeights(ctx_, info.weights_);
if (!weight.Empty()) {
CHECK_EQ(weight.weights.size(), p_cache->Groups());
}
for (std::size_t i = 0; i < pre.size(); ++i) {
pre[i] = pre[i] * weight[i];
sw += weight[i];
}
auto sum = std::accumulate(pre.cbegin(), pre.cend(), 0.0);
return Finalize(info, sum, sw);
}
};

/**
* \brief Implement the NDCG score function for learning to rank.
*
Expand All @@ -416,7 +442,6 @@ double Finalize(MetaInfo const& info, double score, double sw) {
class EvalNDCG : public EvalRankWithCache<ltr::NDCGCache> {
public:
using EvalRankWithCache::EvalRankWithCache;
const char* Name() const override { return name_.c_str(); }

double Eval(HostDeviceVector<float> const& preds, MetaInfo const& info,
std::shared_ptr<ltr::NDCGCache> p_cache) override {
Expand Down Expand Up @@ -475,7 +500,6 @@ class EvalNDCG : public EvalRankWithCache<ltr::NDCGCache> {
class EvalMAPScore : public EvalRankWithCache<ltr::MAPCache> {
public:
using EvalRankWithCache::EvalRankWithCache;
const char* Name() const override { return name_.c_str(); }

double Eval(HostDeviceVector<float> const& predt, MetaInfo const& info,
std::shared_ptr<ltr::MAPCache> p_cache) override {
Expand Down Expand Up @@ -527,6 +551,10 @@ class EvalMAPScore : public EvalRankWithCache<ltr::MAPCache> {
}
};

XGBOOST_REGISTER_METRIC(Precision, "pre")
.describe("precision@k for rank.")
.set_body([](const char* param) { return new EvalPrecision("pre", param); });

XGBOOST_REGISTER_METRIC(EvalMAP, "map")
.describe("map@k for ranking.")
.set_body([](char const* param) {
Expand Down
50 changes: 42 additions & 8 deletions src/metric/rank_metric.cu
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ struct EvalRankGpu : public GPUMetric, public EvalRankConfig {
return EvalMetricT::EvalMetric(segment_pred_sorter, dlabels.Values().data(), *this);
}

const char* Name() const override {
[[nodiscard]] const char* Name() const override {
return name.c_str();
}

Expand Down Expand Up @@ -133,16 +133,50 @@ namespace cuda_impl {
PackedReduceResult PreScore(Context const *ctx, MetaInfo const &info,
HostDeviceVector<float> const &predt, bool minus,
std::shared_ptr<ltr::MAPCache> p_cache) {
auto d_rank_idx = p_cache->SortedIdx(ctx, predt.ConstDeviceSpan());
auto d_group_ptr = p_cache->DataGroupPtr(ctx);
auto it = dh::MakeTransformIterator<PackedReduceResult>(
thrust::make_counting_iterator(0ul),
[=] XGBOOST_DEVICE(std::size_t i) {
auto d_label = info.labels.View(ctx->gpu_id).Slice(linalg::All(), 0);

predt.SetDevice(ctx->gpu_id);
auto d_rank_idx = p_cache->SortedIdx(ctx, predt.ConstDeviceSpan());
auto topk = p_cache->Param().TopK();
auto d_weight = common::MakeOptionalWeights(ctx, info.weights_);

auto it = dh::MakeTransformIterator<double>(
thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) {
auto group_idx = dh::SegmentId(d_group_ptr, i);
auto g_begin = d_group_ptr[group_idx];
auto g_end = d_group_ptr[group_idx + 1];
i -= g_begin;
auto g_label = d_label.Slice(linalg::Range(g_begin, g_end));
auto g_rank = d_rank_idx.subspan(g_begin, g_end - g_begin);
auto y = g_label(g_rank[i]);
if (i >= topk) {
return 0.0;
}
return y / static_cast<double>(topk);
});
PackedReduceResult n_hits = thrust::reduce(it, it + info.num_row_);
double topk = p_cache->Param().TopK();
return n_hits / topk;

auto cuctx = ctx->CUDACtx();
auto pre = p_cache->Map(ctx);
thrust::fill_n(cuctx->CTP(), pre.data(), pre.size(), 0.0);

std::size_t bytes;
cub::DeviceSegmentedReduce::Sum(nullptr, bytes, it, pre.data(), p_cache->Groups(),
d_group_ptr.data(), d_group_ptr.data() + 1, cuctx->Stream());
dh::TemporaryArray<char> temp(bytes);
cub::DeviceSegmentedReduce::Sum(nullptr, bytes, it, pre.data(), p_cache->Groups(),
d_group_ptr.data(), d_group_ptr.data() + 1, cuctx->Stream());

if (!d_weight.Empty()) {
CHECK_EQ(d_weight.weights.size(), p_cache->Groups());
}
auto val_it = dh::MakeTransformIterator<PackedReduceResult>(
thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t g) {
return PackedReduceResult{pre[g] * d_weight[g], static_cast<double>(d_weight[g])};
});
auto result =
thrust::reduce(cuctx->CTP(), val_it, val_it + pre.size(), PackedReduceResult{0.0, 0.0});
return result;
}

PackedReduceResult NDCGScore(Context const *ctx, MetaInfo const &info,
Expand Down
21 changes: 14 additions & 7 deletions src/metric/rank_metric.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
/**
* Copyright 2023 by XGBoost Contributors
*/
#include <memory> // for shared_ptr
#include <memory> // for shared_ptr

#include "../common/common.h" // for AssertGPUSupport
#include "../common/ranking_utils.h" // for NDCGCache, MAPCache
Expand All @@ -12,9 +12,7 @@
#include "xgboost/data.h" // for MetaInfo
#include "xgboost/host_device_vector.h" // for HostDeviceVector

namespace xgboost {
namespace metric {
namespace cuda_impl {
namespace xgboost::metric::cuda_impl {
PackedReduceResult NDCGScore(Context const *ctx, MetaInfo const &info,
HostDeviceVector<float> const &predt, bool minus,
std::shared_ptr<ltr::NDCGCache> p_cache);
Expand All @@ -23,6 +21,10 @@ PackedReduceResult MAPScore(Context const *ctx, MetaInfo const &info,
HostDeviceVector<float> const &predt, bool minus,
std::shared_ptr<ltr::MAPCache> p_cache);

PackedReduceResult PreScore(Context const *ctx, MetaInfo const &info,
HostDeviceVector<float> const &predt, bool minus,
std::shared_ptr<ltr::MAPCache> p_cache);

#if !defined(XGBOOST_USE_CUDA)
inline PackedReduceResult NDCGScore(Context const *, MetaInfo const &,
HostDeviceVector<float> const &, bool,
Expand All @@ -37,8 +39,13 @@ inline PackedReduceResult MAPScore(Context const *, MetaInfo const &,
common::AssertGPUSupport();
return {};
}

inline PackedReduceResult PreScore(Context const *, MetaInfo const &,
HostDeviceVector<float> const &, bool,
std::shared_ptr<ltr::MAPCache>) {
common::AssertGPUSupport();
return {};
}
#endif
} // namespace cuda_impl
} // namespace metric
} // namespace xgboost
} // namespace xgboost::metric::cuda_impl
#endif // XGBOOST_METRIC_RANK_METRIC_H_

0 comments on commit c3a15c2

Please sign in to comment.