diff --git a/src/metric/rank_metric.cc b/src/metric/rank_metric.cc index c4549458d28f..03110e457659 100644 --- a/src/metric/rank_metric.cc +++ b/src/metric/rank_metric.cc @@ -253,23 +253,6 @@ struct EvalRank : public MetricNoCache, public EvalRankConfig { virtual double EvalGroup(PredIndPairContainer *recptr) const = 0; }; -/*! \brief Precision at N, for both classification and rank */ -struct EvalPrecision : public EvalRank { - public: - explicit EvalPrecision(const char* name, const char* param) : EvalRank(name, param) {} - - double EvalGroup(PredIndPairContainer *recptr) const override { - PredIndPairContainer &rec(*recptr); - // calculate Precision - std::stable_sort(rec.begin(), rec.end(), common::CmpFirst); - unsigned nhit = 0; - for (size_t j = 0; j < rec.size() && j < this->topn; ++j) { - nhit += (rec[j].second != 0); - } - return static_cast(nhit) / this->topn; - } -}; - /*! \brief Cox: Partial likelihood of the Cox proportional hazards model */ struct EvalCox : public MetricNoCache { public: @@ -321,10 +304,6 @@ XGBOOST_REGISTER_METRIC(AMS, "ams") .describe("AMS metric for higgs.") .set_body([](const char* param) { return new EvalAMS(param); }); -XGBOOST_REGISTER_METRIC(Precision, "pre") -.describe("precision@k for rank.") -.set_body([](const char* param) { return new EvalPrecision("pre", param); }); - XGBOOST_REGISTER_METRIC(Cox, "cox-nloglik") .describe("Negative log partial likelihood of Cox proportional hazards model.") .set_body([](const char*) { return new EvalCox(); }); @@ -387,6 +366,8 @@ class EvalRankWithCache : public Metric { return result; } + [[nodiscard]] const char* Name() const override { return name_.c_str(); } + virtual double Eval(HostDeviceVector const& preds, MetaInfo const& info, std::shared_ptr p_cache) = 0; }; @@ -408,6 +389,51 @@ double Finalize(MetaInfo const& info, double score, double sw) { } } // namespace +class EvalPrecision : public EvalRankWithCache { + public: + using EvalRankWithCache::EvalRankWithCache; + double Eval(HostDeviceVector const& predt, MetaInfo const& info, + std::shared_ptr p_cache) final { + // Fixme: check whether minus is applicable here. + if (ctx_->IsCUDA()) { + auto pre = cuda_impl::PreScore(ctx_, info, predt, minus_, p_cache); + return Finalize(info, pre.Residue(), pre.Weights()); + } + + auto gptr = p_cache->DataGroupPtr(ctx_); + auto h_label = info.labels.HostView().Slice(linalg::All(), 0); + auto h_predt = linalg::MakeTensorView(ctx_, &predt, predt.Size()); + auto rank_idx = p_cache->SortedIdx(ctx_, predt.ConstHostSpan()); + + auto pre = p_cache->Map(ctx_); + auto topk = p_cache->Param().TopK(); + + common::ParallelFor(p_cache->Groups(), ctx_->Threads(), [&](auto g) { + auto g_label = h_label.Slice(linalg::Range(gptr[g], gptr[g + 1])); + auto g_rank = rank_idx.subspan(gptr[g]); + + auto n = std::min(static_cast(param_.TopK()), g_label.Size()); + double n_hits{0.0}; + for (std::size_t i = 0; i < n; ++i) { + n_hits += g_label(g_rank[i]); + } + pre[g] = n_hits / topk; + }); + + auto sw = 0.0; + auto weight = common::MakeOptionalWeights(ctx_, info.weights_); + if (!weight.Empty()) { + CHECK_EQ(weight.weights.size(), p_cache->Groups()); + } + for (std::size_t i = 0; i < pre.size(); ++i) { + pre[i] = pre[i] * weight[i]; + sw += weight[i]; + } + auto sum = std::accumulate(pre.cbegin(), pre.cend(), 0.0); + return Finalize(info, sum, sw); + } +}; + /** * \brief Implement the NDCG score function for learning to rank. * @@ -416,7 +442,6 @@ double Finalize(MetaInfo const& info, double score, double sw) { class EvalNDCG : public EvalRankWithCache { public: using EvalRankWithCache::EvalRankWithCache; - const char* Name() const override { return name_.c_str(); } double Eval(HostDeviceVector const& preds, MetaInfo const& info, std::shared_ptr p_cache) override { @@ -475,7 +500,6 @@ class EvalNDCG : public EvalRankWithCache { class EvalMAPScore : public EvalRankWithCache { public: using EvalRankWithCache::EvalRankWithCache; - const char* Name() const override { return name_.c_str(); } double Eval(HostDeviceVector const& predt, MetaInfo const& info, std::shared_ptr p_cache) override { @@ -527,6 +551,10 @@ class EvalMAPScore : public EvalRankWithCache { } }; +XGBOOST_REGISTER_METRIC(Precision, "pre") + .describe("precision@k for rank.") + .set_body([](const char* param) { return new EvalPrecision("pre", param); }); + XGBOOST_REGISTER_METRIC(EvalMAP, "map") .describe("map@k for ranking.") .set_body([](char const* param) { diff --git a/src/metric/rank_metric.cu b/src/metric/rank_metric.cu index aa0239f0e85f..6fe7ba908f3d 100644 --- a/src/metric/rank_metric.cu +++ b/src/metric/rank_metric.cu @@ -57,7 +57,7 @@ struct EvalRankGpu : public GPUMetric, public EvalRankConfig { return EvalMetricT::EvalMetric(segment_pred_sorter, dlabels.Values().data(), *this); } - const char* Name() const override { + [[nodiscard]] const char* Name() const override { return name.c_str(); } @@ -133,16 +133,50 @@ namespace cuda_impl { PackedReduceResult PreScore(Context const *ctx, MetaInfo const &info, HostDeviceVector const &predt, bool minus, std::shared_ptr p_cache) { - auto d_rank_idx = p_cache->SortedIdx(ctx, predt.ConstDeviceSpan()); auto d_group_ptr = p_cache->DataGroupPtr(ctx); - auto it = dh::MakeTransformIterator( - thrust::make_counting_iterator(0ul), - [=] XGBOOST_DEVICE(std::size_t i) { + auto d_label = info.labels.View(ctx->gpu_id).Slice(linalg::All(), 0); + + predt.SetDevice(ctx->gpu_id); + auto d_rank_idx = p_cache->SortedIdx(ctx, predt.ConstDeviceSpan()); + auto topk = p_cache->Param().TopK(); + auto d_weight = common::MakeOptionalWeights(ctx, info.weights_); + + auto it = dh::MakeTransformIterator( + thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) { auto group_idx = dh::SegmentId(d_group_ptr, i); + auto g_begin = d_group_ptr[group_idx]; + auto g_end = d_group_ptr[group_idx + 1]; + i -= g_begin; + auto g_label = d_label.Slice(linalg::Range(g_begin, g_end)); + auto g_rank = d_rank_idx.subspan(g_begin, g_end - g_begin); + auto y = g_label(g_rank[i]); + if (i >= topk) { + return 0.0; + } + return y / static_cast(topk); }); - PackedReduceResult n_hits = thrust::reduce(it, it + info.num_row_); - double topk = p_cache->Param().TopK(); - return n_hits / topk; + + auto cuctx = ctx->CUDACtx(); + auto pre = p_cache->Map(ctx); + thrust::fill_n(cuctx->CTP(), pre.data(), pre.size(), 0.0); + + std::size_t bytes; + cub::DeviceSegmentedReduce::Sum(nullptr, bytes, it, pre.data(), p_cache->Groups(), + d_group_ptr.data(), d_group_ptr.data() + 1, cuctx->Stream()); + dh::TemporaryArray temp(bytes); + cub::DeviceSegmentedReduce::Sum(nullptr, bytes, it, pre.data(), p_cache->Groups(), + d_group_ptr.data(), d_group_ptr.data() + 1, cuctx->Stream()); + + if (!d_weight.Empty()) { + CHECK_EQ(d_weight.weights.size(), p_cache->Groups()); + } + auto val_it = dh::MakeTransformIterator( + thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t g) { + return PackedReduceResult{pre[g] * d_weight[g], static_cast(d_weight[g])}; + }); + auto result = + thrust::reduce(cuctx->CTP(), val_it, val_it + pre.size(), PackedReduceResult{0.0, 0.0}); + return result; } PackedReduceResult NDCGScore(Context const *ctx, MetaInfo const &info, diff --git a/src/metric/rank_metric.h b/src/metric/rank_metric.h index b3b121973ef8..b7761c01b6dd 100644 --- a/src/metric/rank_metric.h +++ b/src/metric/rank_metric.h @@ -3,7 +3,7 @@ /** * Copyright 2023 by XGBoost Contributors */ -#include // for shared_ptr +#include // for shared_ptr #include "../common/common.h" // for AssertGPUSupport #include "../common/ranking_utils.h" // for NDCGCache, MAPCache @@ -12,9 +12,7 @@ #include "xgboost/data.h" // for MetaInfo #include "xgboost/host_device_vector.h" // for HostDeviceVector -namespace xgboost { -namespace metric { -namespace cuda_impl { +namespace xgboost::metric::cuda_impl { PackedReduceResult NDCGScore(Context const *ctx, MetaInfo const &info, HostDeviceVector const &predt, bool minus, std::shared_ptr p_cache); @@ -23,6 +21,10 @@ PackedReduceResult MAPScore(Context const *ctx, MetaInfo const &info, HostDeviceVector const &predt, bool minus, std::shared_ptr p_cache); +PackedReduceResult PreScore(Context const *ctx, MetaInfo const &info, + HostDeviceVector const &predt, bool minus, + std::shared_ptr p_cache); + #if !defined(XGBOOST_USE_CUDA) inline PackedReduceResult NDCGScore(Context const *, MetaInfo const &, HostDeviceVector const &, bool, @@ -37,8 +39,13 @@ inline PackedReduceResult MAPScore(Context const *, MetaInfo const &, common::AssertGPUSupport(); return {}; } + +inline PackedReduceResult PreScore(Context const *, MetaInfo const &, + HostDeviceVector const &, bool, + std::shared_ptr) { + common::AssertGPUSupport(); + return {}; +} #endif -} // namespace cuda_impl -} // namespace metric -} // namespace xgboost +} // namespace xgboost::metric::cuda_impl #endif // XGBOOST_METRIC_RANK_METRIC_H_