dmlc · trivialfis · Nov 17, 2024 · Nov 13, 2024 · Nov 13, 2024 · Nov 13, 2024
diff --git a/doc/tutorials/intercept.rst b/doc/tutorials/intercept.rst
@@ -102,4 +102,38 @@ is fitted:
    E[y] = intercept
 
 For some objectives like MAE, there are close solutions, while for others it's estimated
-with one step Newton method.
+with one step Newton method.
+
+******
+Offset
+******
+
+The ``base_margin`` is a form of ``offset`` in GLM. Using the Poisson objective as an
+example, we might want to model the rate instead of the count:
+
+.. math::
+
+   rate = \frac{count}{exposure}
+
+And the offset is defined as log link applied to the exposure variable:
+:math:`\ln{exposure}`. Let :math:`c` be the count and :math:`\beta` be the exposure,
+substituting the response :math:`y` in our previous formulation of base margin:
+
+.. math::
+
-   g{\frac{E[c_i]}{\beta_i}} = F(x_i)
+   g({\frac{E[c_i]}{\beta_i}}) = F(x_i)
-   g{\frac{E[c_i]}{\beta_i}} = F(x_i)
+   g({\frac{E[c_i]}{\beta_i}}) = F(x_i)
+   g{\frac{E[c_i]}{\beta_i}} = F(x_i)
+
+Substitute :math:`g` with :math:`\ln` for Poisson regression:
+
+.. math::
+
+   \ln{\frac{E[c_i]}{\beta_i}} = F(x_i)
+
+We have:
+
+.. math::
+
+   E[c_i] &= \exp{F(x_i) + \ln{\beta_i}} \\
+   E[c_i] &= g^{-1}(F(x_i) + g(\beta_i))
+
+As you can see, we can use the ``base_margin`` for modeling with offset similar to GLMs
diff --git a/include/xgboost/objective.h b/include/xgboost/objective.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2014-2023 by XGBoost Contributors
+ * Copyright 2014-2024, XGBoost Contributors
  * \file objective.h
  * \brief interface of objective function used by xgboost.
  * \author Tianqi Chen, Kailong Chen
@@ -17,8 +17,6 @@
 #include <cstdint>  // std::int32_t
 #include <functional>
 #include <string>
-#include <utility>
-#include <vector>
 
 namespace xgboost {
 
@@ -40,7 +38,7 @@ class ObjFunction : public Configurable {
    * \brief Configure the objective with the specified parameters.
    * \param args arguments to the objective function.
    */
-  virtual void Configure(const std::vector<std::pair<std::string, std::string> >& args) = 0;
+  virtual void Configure(Args const& args) = 0;
   /**
    * @brief Get gradient over each of predictions, given existing information.
    *
@@ -49,7 +47,7 @@ class ObjFunction : public Configurable {
    * @param iteration current iteration number.
    * @param out_gpair output of get gradient, saves gradient and second order gradient in
    */
-  virtual void GetGradient(const HostDeviceVector<bst_float>& preds, const MetaInfo& info,
+  virtual void GetGradient(HostDeviceVector<float> const& preds, MetaInfo const& info,
                            std::int32_t iter, linalg::Matrix<GradientPair>* out_gpair) = 0;
 
   /*! \return the default evaluation metric for the objective */
@@ -60,27 +58,31 @@ class ObjFunction : public Configurable {
   virtual Json DefaultMetricConfig() const { return Json{Null{}}; }
 
   // the following functions are optional, most of time default implementation is good enough
-  /*!
-   * \brief transform prediction values, this is only called when Prediction is called
-   * \param io_preds prediction values, saves to this vector as well
+  /**
+   * @brief Apply inverse link (activation) function to prediction values.
+   *
+   *   This is only called when Prediction is called
+   *
+   * @param [in,out] io_preds prediction values, saves to this vector as well.
    */
-  virtual void PredTransform(HostDeviceVector<bst_float>*) const {}
-
-  /*!
-   * \brief transform prediction values, this is only called when Eval is called,
-   *  usually it redirect to PredTransform
-   * \param io_preds prediction values, saves to this vector as well
+  virtual void PredTransform(HostDeviceVector<float>*) const {}
+  /**
+   * @brief Apply inverse link (activation) function to prediction values
+   *
+   *  This is only called when Eval is called, usually it redirect to PredTransform
+   *
+   * @param [in,out] io_preds prediction values, saves to this vector as well.
    */
-  virtual void EvalTransform(HostDeviceVector<bst_float> *io_preds) {
-    this->PredTransform(io_preds);
-  }
-  /*!
-   * \brief transform probability value back to margin
-   * this is used to transform user-set base_score back to margin
-   * used by gradient boosting
-   * \return transformed value
+  virtual void EvalTransform(HostDeviceVector<float>* io_preds) { this->PredTransform(io_preds); }
+  /**
+   * @brief Apply link function to the intercept.
+   *
+   *   This is used to transform user-set base_score back to margin used by gradient
+   *   boosting
+   *
+   * @return transformed value
    */
-  [[nodiscard]] virtual bst_float ProbToMargin(bst_float base_score) const { return base_score; }
+  [[nodiscard]] virtual float ProbToMargin(float base_score) const { return base_score; }
   /**
    * @brief Obtain the initial estimation of prediction.
    *