Integer gradient summation for GPU histogram algorithm. (#2681)

dmlc · Sep 8, 2017 · e6a9063 · e6a9063
1 parent 15267ee
commit e6a9063
Show file tree

Hide file tree

Showing 15 changed files with 181 additions and 127 deletions.
diff --git a/include/xgboost/base.h b/include/xgboost/base.h
@@ -87,65 +87,116 @@ typedef uint64_t bst_ulong;  // NOLINT(*)
 typedef float bst_float;
 
 
-/*! \brief Implementation of gradient statistics pair */
+namespace detail {
+/*! \brief Implementation of gradient statistics pair. Template specialisation
+ * may be used to overload different gradients types e.g. low precision, high
+ * precision, integer, floating point. */
 template <typename T>
-struct bst_gpair_internal {
+class bst_gpair_internal {
   /*! \brief gradient statistics */
-  T grad;
+  T grad_;
   /*! \brief second order gradient statistics */
-  T hess;
+  T hess_;
 
-  XGBOOST_DEVICE bst_gpair_internal() : grad(0), hess(0) {}
+  XGBOOST_DEVICE void SetGrad(float g) { grad_ = g; }
+  XGBOOST_DEVICE void SetHess(float h) { hess_ = h; }
 
-  XGBOOST_DEVICE bst_gpair_internal(T grad, T hess)
-      : grad(grad), hess(hess) {}
+ public:
+  typedef T value_t;
 
+  XGBOOST_DEVICE bst_gpair_internal() : grad_(0), hess_(0) {}
+
+  XGBOOST_DEVICE bst_gpair_internal(float grad, float hess) {
+    SetGrad(grad);
+    SetHess(hess);
+  }
+
+  // Copy constructor if of same value type
+  XGBOOST_DEVICE bst_gpair_internal(const bst_gpair_internal<T> &g)
+      : grad_(g.grad_), hess_(g.hess_) {}
+
+  // Copy constructor if different value type - use getters and setters to
+  // perform conversion
   template <typename T2>
-  XGBOOST_DEVICE bst_gpair_internal(bst_gpair_internal<T2>&g)
-      : grad(g.grad), hess(g.hess) {}
+  XGBOOST_DEVICE bst_gpair_internal(const bst_gpair_internal<T2> &g) {
+    SetGrad(g.GetGrad());
+    SetHess(g.GetHess());
+  }
+
+  XGBOOST_DEVICE float GetGrad() const { return grad_; }
+  XGBOOST_DEVICE float GetHess() const { return hess_; }
 
-  XGBOOST_DEVICE bst_gpair_internal<T> &operator+=(const bst_gpair_internal<T> &rhs) {
-    grad += rhs.grad;
-    hess += rhs.hess;
+  XGBOOST_DEVICE bst_gpair_internal<T> &operator+=(
+      const bst_gpair_internal<T> &rhs) {
+    grad_ += rhs.grad_;
+    hess_ += rhs.hess_;
     return *this;
   }
 
-  XGBOOST_DEVICE bst_gpair_internal<T> operator+(const bst_gpair_internal<T> &rhs) const {
+  XGBOOST_DEVICE bst_gpair_internal<T> operator+(
+      const bst_gpair_internal<T> &rhs) const {
     bst_gpair_internal<T> g;
-    g.grad = grad + rhs.grad;
-    g.hess = hess + rhs.hess;
+    g.grad_ = grad_ + rhs.grad_;
+    g.hess_ = hess_ + rhs.hess_;
     return g;
   }
 
-  XGBOOST_DEVICE bst_gpair_internal<T> &operator-=(const bst_gpair_internal<T> &rhs) {
-    grad -= rhs.grad;
-    hess -= rhs.hess;
+  XGBOOST_DEVICE bst_gpair_internal<T> &operator-=(
+      const bst_gpair_internal<T> &rhs) {
+    grad_ -= rhs.grad_;
+    hess_ -= rhs.hess_;
     return *this;
   }
 
-  XGBOOST_DEVICE bst_gpair_internal<T> operator-(const bst_gpair_internal<T> &rhs) const {
+  XGBOOST_DEVICE bst_gpair_internal<T> operator-(
+      const bst_gpair_internal<T> &rhs) const {
     bst_gpair_internal<T> g;
-    g.grad = grad - rhs.grad;
-    g.hess = hess - rhs.hess;
+    g.grad_ = grad_ - rhs.grad_;
+    g.hess_ = hess_ - rhs.hess_;
     return g;
   }
 
   XGBOOST_DEVICE bst_gpair_internal(int value) {
-    *this = bst_gpair_internal<T>(static_cast<float>(value), static_cast<float>(value));
+    *this = bst_gpair_internal<T>(static_cast<float>(value),
+                                  static_cast<float>(value));
   }
 
   friend std::ostream &operator<<(std::ostream &os,
                                   const bst_gpair_internal<T> &g) {
-    os << g.grad << "/" << g.hess;
+    os << g.grad_ << "/" << g.hess_;
     return os;
   }
 };
 
+template<>
+inline XGBOOST_DEVICE float bst_gpair_internal<int64_t>::GetGrad() const {
+  return grad_ * 1e-5;
+}
+template<>
+inline XGBOOST_DEVICE float bst_gpair_internal<int64_t>::GetHess() const {
+  return hess_ * 1e-5;
+}
+template<>
+inline XGBOOST_DEVICE void bst_gpair_internal<int64_t>::SetGrad(float g) {
+  grad_ = g * 1e5;
+}
+template<>
+inline XGBOOST_DEVICE void bst_gpair_internal<int64_t>::SetHess(float h) {
+  hess_ = h * 1e5;
+}
+
+}  // namespace detail
+
 /*! \brief gradient statistics pair usually needed in gradient boosting */
-typedef bst_gpair_internal<float> bst_gpair;
+typedef detail::bst_gpair_internal<float> bst_gpair;
 
 /*! \brief High precision gradient statistics pair */
-typedef bst_gpair_internal<double> bst_gpair_precise;
+typedef detail::bst_gpair_internal<double> bst_gpair_precise;
+
+  /*! \brief High precision gradient statistics pair with integer backed
+   * storage. Operators are associative where floating point versions are not
+   * associative. */
+  typedef detail::bst_gpair_internal<int64_t> bst_gpair_integer;
 
 /*! \brief small eps gap for minimum split decision. */
 const bst_float rt_eps = 1e-6f;

diff --git a/src/common/hist_util.h b/src/common/hist_util.h
@@ -33,8 +33,8 @@ struct GHistEntry {
 
   /*! \brief add a bst_gpair to the sum */
   inline void Add(const bst_gpair& e) {
-    sum_grad += e.grad;
-    sum_hess += e.hess;
+    sum_grad += e.GetGrad();
+    sum_hess += e.GetHess();
   }
 
   /*! \brief add a GHistEntry to the sum */

diff --git a/src/gbm/gblinear.cc b/src/gbm/gblinear.cc
@@ -120,8 +120,9 @@ class GBLinear : public GradientBooster {
       #pragma omp parallel for schedule(static) reduction(+: sum_grad, sum_hess)
       for (bst_omp_uint i = 0; i < ndata; ++i) {
         bst_gpair &p = gpair[rowset[i] * ngroup + gid];
-        if (p.hess >= 0.0f) {
-          sum_grad += p.grad; sum_hess += p.hess;
+        if (p.GetHess() >= 0.0f) {
+          sum_grad += p.GetGrad();
+          sum_hess += p.GetHess();
         }
       }
       // remove bias effect
@@ -132,8 +133,8 @@ class GBLinear : public GradientBooster {
       #pragma omp parallel for schedule(static)
       for (bst_omp_uint i = 0; i < ndata; ++i) {
         bst_gpair &p = gpair[rowset[i] * ngroup + gid];
-        if (p.hess >= 0.0f) {
-          p.grad += p.hess * dw;
+        if (p.GetHess() >= 0.0f) {
+          p += bst_gpair(p.GetHess() * dw, 0);
         }
       }
     }
@@ -151,9 +152,9 @@ class GBLinear : public GradientBooster {
           for (bst_uint j = 0; j < col.length; ++j) {
             const bst_float v = col[j].fvalue;
             bst_gpair &p = gpair[col[j].index * ngroup + gid];
-            if (p.hess < 0.0f) continue;
-            sum_grad += p.grad * v;
-            sum_hess += p.hess * v * v;
+            if (p.GetHess() < 0.0f) continue;
+            sum_grad += p.GetGrad() * v;
+            sum_hess += p.GetHess() * v * v;
           }
           bst_float &w = model[fid][gid];
           bst_float dw = static_cast<bst_float>(param.learning_rate *
@@ -162,8 +163,8 @@ class GBLinear : public GradientBooster {
           // update grad value
           for (bst_uint j = 0; j < col.length; ++j) {
             bst_gpair &p = gpair[col[j].index * ngroup + gid];
-            if (p.hess < 0.0f) continue;
-            p.grad += p.hess * col[j].fvalue * dw;
+            if (p.GetHess() < 0.0f) continue;
+            p += bst_gpair(p.GetHess() * col[j].fvalue * dw, 0);
           }
         }
       }

diff --git a/src/objective/rank_obj.cc b/src/objective/rank_obj.cc
@@ -109,10 +109,8 @@ class LambdaRankObj : public ObjFunction {
           bst_float g = p - 1.0f;
           bst_float h = std::max(p * (1.0f - p), eps);
           // accumulate gradient and hessian in both pid, and nid
-          gpair[pos.rindex].grad += g * w;
-          gpair[pos.rindex].hess += 2.0f * w * h;
-          gpair[neg.rindex].grad -= g * w;
-          gpair[neg.rindex].hess += 2.0f * w * h;
+          gpair[pos.rindex] += bst_gpair(g * w, 2.0f*w*h);
+          gpair[neg.rindex] += bst_gpair(-g * w, 2.0f*w*h);
         }
       }
     }

diff --git a/src/tree/param.h b/src/tree/param.h
@@ -313,7 +313,7 @@ struct XGBOOST_ALIGNAS(16) GradStats {
    * \brief accumulate statistics
    * \param p the gradient pair
    */
-  inline void Add(bst_gpair p) { this->Add(p.grad, p.hess); }
+  inline void Add(bst_gpair p) { this->Add(p.GetGrad(), p.GetHess()); }
   /*!
    * \brief accumulate statistics, more complicated version
    * \param gpair the vector storing the gradient statistics
@@ -323,7 +323,7 @@ struct XGBOOST_ALIGNAS(16) GradStats {
   inline void Add(const std::vector<bst_gpair>& gpair, const MetaInfo& info,
                   bst_uint ridx) {
     const bst_gpair& b = gpair[ridx];
-    this->Add(b.grad, b.hess);
+    this->Add(b.GetGrad(), b.GetHess());
   }
   /*! \brief calculate leaf weight */
   inline double CalcWeight(const TrainParam& param) const {

diff --git a/src/tree/updater_basemaker-inl.h b/src/tree/updater_basemaker-inl.h
@@ -140,14 +140,14 @@ class BaseMaker: public TreeUpdater {
       }
       // mark delete for the deleted datas
       for (size_t i = 0; i < position.size(); ++i) {
-        if (gpair[i].hess < 0.0f) position[i] = ~position[i];
+        if (gpair[i].GetHess() < 0.0f) position[i] = ~position[i];
       }
       // mark subsample
       if (param.subsample < 1.0f) {
         std::bernoulli_distribution coin_flip(param.subsample);
         auto& rnd = common::GlobalRandom();
         for (size_t i = 0; i < position.size(); ++i) {
-          if (gpair[i].hess < 0.0f) continue;
+          if (gpair[i].GetHess() < 0.0f) continue;
           if (!coin_flip(rnd)) position[i] = ~position[i];
         }
       }

diff --git a/src/tree/updater_colmaker.cc b/src/tree/updater_colmaker.cc
@@ -136,15 +136,15 @@ class ColMaker: public TreeUpdater {
         // mark delete for the deleted datas
         for (size_t i = 0; i < rowset.size(); ++i) {
           const bst_uint ridx = rowset[i];
-          if (gpair[ridx].hess < 0.0f) position[ridx] = ~position[ridx];
+          if (gpair[ridx].GetHess() < 0.0f) position[ridx] = ~position[ridx];
         }
         // mark subsample
         if (param.subsample < 1.0f) {
           std::bernoulli_distribution coin_flip(param.subsample);
           auto& rnd = common::GlobalRandom();
           for (size_t i = 0; i < rowset.size(); ++i) {
             const bst_uint ridx = rowset[i];
-            if (gpair[ridx].hess < 0.0f) continue;
+            if (gpair[ridx].GetHess() < 0.0f) continue;
             if (!coin_flip(rnd)) position[ridx] = ~position[ridx];
           }
         }

diff --git a/src/tree/updater_fast_hist.cc b/src/tree/updater_fast_hist.cc
@@ -372,13 +372,13 @@ class FastHistMaker: public TreeUpdater {
           std::bernoulli_distribution coin_flip(param.subsample);
           auto& rnd = common::GlobalRandom();
           for (size_t i = 0; i < info.num_row; ++i) {
-            if (gpair[i].hess >= 0.0f && coin_flip(rnd)) {
+            if (gpair[i].GetHess() >= 0.0f && coin_flip(rnd)) {
               row_indices.push_back(i);
             }
           }
         } else {
           for (size_t i = 0; i < info.num_row; ++i) {
-            if (gpair[i].hess >= 0.0f) {
+            if (gpair[i].GetHess() >= 0.0f) {
               row_indices.push_back(i);
             }
           }

diff --git a/src/tree/updater_gpu_common.cuh b/src/tree/updater_gpu_common.cuh
@@ -82,8 +82,8 @@ struct DeviceDenseNode {
         fvalue(0.f),
         fidx(UNUSED_NODE),
         idx(nidx) {
-    this->root_gain = CalcGain(param, sum_gradients.grad, sum_gradients.hess);
-    this->weight = CalcWeight(param, sum_gradients.grad, sum_gradients.hess);
+    this->root_gain = CalcGain(param, sum_gradients.GetGrad(), sum_gradients.GetHess());
+    this->weight = CalcWeight(param, sum_gradients.GetGrad(), sum_gradients.GetHess());
   }
 
   HOST_DEV_INLINE void SetSplit(float fvalue, int fidx, DefaultDirection dir) {
@@ -113,8 +113,8 @@ __device__ inline float device_calc_loss_chg(
 
   gpair_t right = parent_sum - left;
 
-  float left_gain = CalcGain(param, left.grad, left.hess);
-  float right_gain = CalcGain(param, right.grad, right.hess);
+  float left_gain = CalcGain(param, left.GetGrad(), left.GetHess());
+  float right_gain = CalcGain(param, right.GetGrad(), right.GetHess());
   return left_gain + right_gain - parent_gain;
 }
 
@@ -181,13 +181,13 @@ inline void dense2sparse_tree(RegTree* p_tree,
       tree[nid].set_split(n.fidx, n.fvalue, n.dir == LeftDir);
       tree.stat(nid).loss_chg = n.root_gain;
       tree.stat(nid).base_weight = n.weight;
-      tree.stat(nid).sum_hess = n.sum_gradients.hess;
+      tree.stat(nid).sum_hess = n.sum_gradients.GetHess();
       tree[tree[nid].cleft()].set_leaf(0);
       tree[tree[nid].cright()].set_leaf(0);
       nid++;
     } else if (n.IsLeaf()) {
       tree[nid].set_leaf(n.weight * param.learning_rate);
-      tree.stat(nid).sum_hess = n.sum_gradients.hess;
+      tree.stat(nid).sum_hess = n.sum_gradients.GetHess();
       nid++;
     }
   }