diff --git a/src/metric/rank_metric.cc b/src/metric/rank_metric.cc
index c4549458d28f..03110e457659 100644
--- a/src/metric/rank_metric.cc
+++ b/src/metric/rank_metric.cc
@@ -253,23 +253,6 @@ struct EvalRank : public MetricNoCache, public EvalRankConfig {
   virtual double EvalGroup(PredIndPairContainer *recptr) const = 0;
 };
 
-/*! \brief Precision at N, for both classification and rank */
-struct EvalPrecision : public EvalRank {
- public:
-  explicit EvalPrecision(const char* name, const char* param) : EvalRank(name, param) {}
-
-  double EvalGroup(PredIndPairContainer *recptr) const override {
-    PredIndPairContainer &rec(*recptr);
-    // calculate Precision
-    std::stable_sort(rec.begin(), rec.end(), common::CmpFirst);
-    unsigned nhit = 0;
-    for (size_t j = 0; j < rec.size() && j < this->topn; ++j) {
-      nhit += (rec[j].second != 0);
-    }
-    return static_cast<double>(nhit) / this->topn;
-  }
-};
-
 /*! \brief Cox: Partial likelihood of the Cox proportional hazards model */
 struct EvalCox : public MetricNoCache {
  public:
@@ -321,10 +304,6 @@ XGBOOST_REGISTER_METRIC(AMS, "ams")
 .describe("AMS metric for higgs.")
 .set_body([](const char* param) { return new EvalAMS(param); });
 
-XGBOOST_REGISTER_METRIC(Precision, "pre")
-.describe("precision@k for rank.")
-.set_body([](const char* param) { return new EvalPrecision("pre", param); });
-
 XGBOOST_REGISTER_METRIC(Cox, "cox-nloglik")
 .describe("Negative log partial likelihood of Cox proportional hazards model.")
 .set_body([](const char*) { return new EvalCox(); });
@@ -387,6 +366,8 @@ class EvalRankWithCache : public Metric {
     return result;
   }
 
+  [[nodiscard]] const char* Name() const override { return name_.c_str(); }
+
   virtual double Eval(HostDeviceVector<float> const& preds, MetaInfo const& info,
                       std::shared_ptr<Cache> p_cache) = 0;
 };
@@ -408,6 +389,51 @@ double Finalize(MetaInfo const& info, double score, double sw) {
 }
 }  // namespace
 
+class EvalPrecision : public EvalRankWithCache<ltr::MAPCache> {
+ public:
+  using EvalRankWithCache::EvalRankWithCache;
+  double Eval(HostDeviceVector<float> const& predt, MetaInfo const& info,
+              std::shared_ptr<ltr::MAPCache> p_cache) final {
+    // Fixme: check whether minus is applicable here.
+    if (ctx_->IsCUDA()) {
+      auto pre = cuda_impl::PreScore(ctx_, info, predt, minus_, p_cache);
+      return Finalize(info, pre.Residue(), pre.Weights());
+    }
+
+    auto gptr = p_cache->DataGroupPtr(ctx_);
+    auto h_label = info.labels.HostView().Slice(linalg::All(), 0);
+    auto h_predt = linalg::MakeTensorView(ctx_, &predt, predt.Size());
+    auto rank_idx = p_cache->SortedIdx(ctx_, predt.ConstHostSpan());
+
+    auto pre = p_cache->Map(ctx_);
+    auto topk = p_cache->Param().TopK();
+
+    common::ParallelFor(p_cache->Groups(), ctx_->Threads(), [&](auto g) {
+      auto g_label = h_label.Slice(linalg::Range(gptr[g], gptr[g + 1]));
+      auto g_rank = rank_idx.subspan(gptr[g]);
+
+      auto n = std::min(static_cast<std::size_t>(param_.TopK()), g_label.Size());
+      double n_hits{0.0};
+      for (std::size_t i = 0; i < n; ++i) {
+        n_hits += g_label(g_rank[i]);
+      }
+      pre[g] = n_hits / topk;
+    });
+
+    auto sw = 0.0;
+    auto weight = common::MakeOptionalWeights(ctx_, info.weights_);
+    if (!weight.Empty()) {
+      CHECK_EQ(weight.weights.size(), p_cache->Groups());
+    }
+    for (std::size_t i = 0; i < pre.size(); ++i) {
+      pre[i] = pre[i] * weight[i];
+      sw += weight[i];
+    }
+    auto sum = std::accumulate(pre.cbegin(), pre.cend(), 0.0);
+    return Finalize(info, sum, sw);
+  }
+};
+
 /**
  * \brief Implement the NDCG score function for learning to rank.
  *
@@ -416,7 +442,6 @@ double Finalize(MetaInfo const& info, double score, double sw) {
 class EvalNDCG : public EvalRankWithCache<ltr::NDCGCache> {
  public:
   using EvalRankWithCache::EvalRankWithCache;
-  const char* Name() const override { return name_.c_str(); }
 
   double Eval(HostDeviceVector<float> const& preds, MetaInfo const& info,
               std::shared_ptr<ltr::NDCGCache> p_cache) override {
@@ -475,7 +500,6 @@ class EvalNDCG : public EvalRankWithCache<ltr::NDCGCache> {
 class EvalMAPScore : public EvalRankWithCache<ltr::MAPCache> {
  public:
   using EvalRankWithCache::EvalRankWithCache;
-  const char* Name() const override { return name_.c_str(); }
 
   double Eval(HostDeviceVector<float> const& predt, MetaInfo const& info,
               std::shared_ptr<ltr::MAPCache> p_cache) override {
@@ -527,6 +551,10 @@ class EvalMAPScore : public EvalRankWithCache<ltr::MAPCache> {
   }
 };
 
+XGBOOST_REGISTER_METRIC(Precision, "pre")
+    .describe("precision@k for rank.")
+    .set_body([](const char* param) { return new EvalPrecision("pre", param); });
+
 XGBOOST_REGISTER_METRIC(EvalMAP, "map")
     .describe("map@k for ranking.")
     .set_body([](char const* param) {
diff --git a/src/metric/rank_metric.cu b/src/metric/rank_metric.cu
index aa0239f0e85f..6fe7ba908f3d 100644
--- a/src/metric/rank_metric.cu
+++ b/src/metric/rank_metric.cu
@@ -57,7 +57,7 @@ struct EvalRankGpu : public GPUMetric, public EvalRankConfig {
     return EvalMetricT::EvalMetric(segment_pred_sorter, dlabels.Values().data(), *this);
   }
 
-  const char* Name() const override {
+  [[nodiscard]] const char* Name() const override {
     return name.c_str();
   }
 
@@ -133,16 +133,50 @@ namespace cuda_impl {
 PackedReduceResult PreScore(Context const *ctx, MetaInfo const &info,
                             HostDeviceVector<float> const &predt, bool minus,
                             std::shared_ptr<ltr::MAPCache> p_cache) {
-  auto d_rank_idx = p_cache->SortedIdx(ctx, predt.ConstDeviceSpan());
   auto d_group_ptr = p_cache->DataGroupPtr(ctx);
-  auto it = dh::MakeTransformIterator<PackedReduceResult>(
-      thrust::make_counting_iterator(0ul),
-      [=] XGBOOST_DEVICE(std::size_t i) {
+  auto d_label = info.labels.View(ctx->gpu_id).Slice(linalg::All(), 0);
+
+  predt.SetDevice(ctx->gpu_id);
+  auto d_rank_idx = p_cache->SortedIdx(ctx, predt.ConstDeviceSpan());
+  auto topk = p_cache->Param().TopK();
+  auto d_weight = common::MakeOptionalWeights(ctx, info.weights_);
+
+  auto it = dh::MakeTransformIterator<double>(
+      thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) {
         auto group_idx = dh::SegmentId(d_group_ptr, i);
+        auto g_begin = d_group_ptr[group_idx];
+        auto g_end = d_group_ptr[group_idx + 1];
+        i -= g_begin;
+        auto g_label = d_label.Slice(linalg::Range(g_begin, g_end));
+        auto g_rank = d_rank_idx.subspan(g_begin, g_end - g_begin);
+        auto y = g_label(g_rank[i]);
+        if (i >= topk) {
+          return 0.0;
+        }
+        return y / static_cast<double>(topk);
       });
-  PackedReduceResult n_hits = thrust::reduce(it, it + info.num_row_);
-  double topk = p_cache->Param().TopK();
-  return n_hits / topk;
+
+  auto cuctx = ctx->CUDACtx();
+  auto pre = p_cache->Map(ctx);
+  thrust::fill_n(cuctx->CTP(), pre.data(), pre.size(), 0.0);
+
+  std::size_t bytes;
+  cub::DeviceSegmentedReduce::Sum(nullptr, bytes, it, pre.data(), p_cache->Groups(),
+                                  d_group_ptr.data(), d_group_ptr.data() + 1, cuctx->Stream());
+  dh::TemporaryArray<char> temp(bytes);
+  cub::DeviceSegmentedReduce::Sum(nullptr, bytes, it, pre.data(), p_cache->Groups(),
+                                  d_group_ptr.data(), d_group_ptr.data() + 1, cuctx->Stream());
+
+  if (!d_weight.Empty()) {
+    CHECK_EQ(d_weight.weights.size(), p_cache->Groups());
+  }
+  auto val_it = dh::MakeTransformIterator<PackedReduceResult>(
+      thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t g) {
+        return PackedReduceResult{pre[g] * d_weight[g], static_cast<double>(d_weight[g])};
+      });
+  auto result =
+      thrust::reduce(cuctx->CTP(), val_it, val_it + pre.size(), PackedReduceResult{0.0, 0.0});
+  return result;
 }
 
 PackedReduceResult NDCGScore(Context const *ctx, MetaInfo const &info,
diff --git a/src/metric/rank_metric.h b/src/metric/rank_metric.h
index b3b121973ef8..b7761c01b6dd 100644
--- a/src/metric/rank_metric.h
+++ b/src/metric/rank_metric.h
@@ -3,7 +3,7 @@
 /**
  * Copyright 2023 by XGBoost Contributors
  */
-#include <memory>                        // for shared_ptr
+#include <memory>  // for shared_ptr
 
 #include "../common/common.h"            // for AssertGPUSupport
 #include "../common/ranking_utils.h"     // for NDCGCache, MAPCache
@@ -12,9 +12,7 @@
 #include "xgboost/data.h"                // for MetaInfo
 #include "xgboost/host_device_vector.h"  // for HostDeviceVector
 
-namespace xgboost {
-namespace metric {
-namespace cuda_impl {
+namespace xgboost::metric::cuda_impl {
 PackedReduceResult NDCGScore(Context const *ctx, MetaInfo const &info,
                              HostDeviceVector<float> const &predt, bool minus,
                              std::shared_ptr<ltr::NDCGCache> p_cache);
@@ -23,6 +21,10 @@ PackedReduceResult MAPScore(Context const *ctx, MetaInfo const &info,
                             HostDeviceVector<float> const &predt, bool minus,
                             std::shared_ptr<ltr::MAPCache> p_cache);
 
+PackedReduceResult PreScore(Context const *ctx, MetaInfo const &info,
+                            HostDeviceVector<float> const &predt, bool minus,
+                            std::shared_ptr<ltr::MAPCache> p_cache);
+
 #if !defined(XGBOOST_USE_CUDA)
 inline PackedReduceResult NDCGScore(Context const *, MetaInfo const &,
                                     HostDeviceVector<float> const &, bool,
@@ -37,8 +39,13 @@ inline PackedReduceResult MAPScore(Context const *, MetaInfo const &,
   common::AssertGPUSupport();
   return {};
 }
+
+inline PackedReduceResult PreScore(Context const *, MetaInfo const &,
+                                   HostDeviceVector<float> const &, bool,
+                                   std::shared_ptr<ltr::MAPCache>) {
+  common::AssertGPUSupport();
+  return {};
+}
 #endif
-}  // namespace cuda_impl
-}  // namespace metric
-}  // namespace xgboost
+}  // namespace xgboost::metric::cuda_impl
 #endif  // XGBOOST_METRIC_RANK_METRIC_H_