Refactor GPTQPass to improve readability and maintainability

This commit refactors the QuantizeWeightsWithGPTQPass.cpp file to improve its readability and maintainability. ONE-DCO-1.0-Signed-off-by: y01000.you <[email protected]>
Samsung · Nov 26, 2024 · 460f439 · 460f439
1 parent 60b9eaf
commit 460f439
Showing 1 changed file with 95 additions and 92 deletions.
diff --git a/compiler/luci/pass/src/QuantizeWeightsWithGPTQPass.cpp b/compiler/luci/pass/src/QuantizeWeightsWithGPTQPass.cpp
@@ -43,9 +43,9 @@ void iterate_per_channel_with_order(CircleConst *node, IterFunc func, bool rever
   loco::TensorShape dimension;
   dimension.rank(4);
   uint32_t indices[4] = {0};
-  int32_t channel_dim_index{0};
+  int32_t index_channel_dim{0};
   uint32_t num_dims[4];
-  if (!get_channel_dim_index(node, dimension, channel_dim_index))
+  if (!get_channel_dim_index(node, dimension, index_channel_dim))
   {
     throw std::runtime_error("GPTQPass: Failed to get channel dim index.");
   }
@@ -69,28 +69,48 @@ void iterate_per_channel_with_order(CircleConst *node, IterFunc func, bool rever
           indices[order[1]] = j;
           indices[order[2]] = s;
           indices[order[3]] = t;
-          func(indices, dimension, channel_dim_index);
+          func(indices, dimension, index_channel_dim);
         }
       }
     }
   }
 }
 
 size_t calculate_qauntized_value(CircleConst *node, uint32_t *indices, loco::TensorShape &dimension,
-                                 int channel_dim_index, std::vector<float> &scaling_factor,
+                                 int index_channel_dim, std::vector<float> &scaling_factor,
                                  std::vector<float> &max, std::vector<float> &min)
 {
   assert(node != nullptr);
 
-  int channel_idx = indices[channel_dim_index];
+  int idx_channel = indices[index_channel_dim];
 
-  assert(scaling_factor[channel_idx] > 0);
-  const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
+  assert(scaling_factor[idx_channel] > 0);
+  const float scaling_factor_inv = 1.0 / scaling_factor[idx_channel];
   auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
-  auto data_clipped = data < min[channel_idx] ? min[channel_idx] : data;
-  data_clipped = data_clipped > max[channel_idx] ? max[channel_idx] : data_clipped;
+  auto data_clipped = data < min[idx_channel] ? min[idx_channel] : data;
+  data_clipped = data_clipped > max[idx_channel] ? max[idx_channel] : data_clipped;
 
-  return static_cast<int32_t>(std::round((data_clipped - min[channel_idx]) * scaling_factor_inv));
+  return static_cast<int32_t>(std::round((data_clipped - min[idx_channel]) * scaling_factor_inv));
+}
+
+void apply_damping_to_hessian(std::vector<float> &hessian, uint32_t num_size)
+{
+  float damp = 0;
+  float percdamp = .01;
+
+  for (uint32_t i = 0; i < num_size; i++)
+  {
+    damp += hessian[i * num_size + i];
+  }
+
+  assert(num_size != 0);
+  damp /= num_size;
+  damp *= percdamp;
+
+  for (uint32_t i = 0; i < num_size; i++)
+  {
+    hessian[i * num_size + i] += damp;
+  }
 }
 
 void cholesky_decomposition(std::vector<float> &src, uint32_t num_size)
@@ -220,31 +240,31 @@ void cal_minmax_per_channel(CircleConst *node, std::vector<float> &min, std::vec
 {
   loco::TensorShape dimension;
   dimension.rank(4);
-  int32_t channel_dim_index{0};
+  int32_t index_channel_dim{0};
 
-  if (!get_channel_dim_index(node, dimension, channel_dim_index))
+  if (!get_channel_dim_index(node, dimension, index_channel_dim))
   {
     throw std::runtime_error("GPTQPass: Failed to get channel dim index.");
   }
-  auto size = dimension.dim(channel_dim_index).value();
+  auto size = dimension.dim(index_channel_dim).value();
 
   std::vector<bool> has_min_max_value(size, false);
   min.resize(size);
   max.resize(size);
 
-  auto cal_minmax = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
-    int channel_idx = indices[channel_dim_index];
+  auto cal_minmax = [&](uint32_t *indices, loco::TensorShape &dimension, int index_channel_dim) {
+    int idx_channel = indices[index_channel_dim];
     auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
-    if (has_min_max_value[channel_idx])
+    if (has_min_max_value[idx_channel])
     {
-      min[channel_idx] = data < min[channel_idx] ? data : min[channel_idx];
-      max[channel_idx] = data > max[channel_idx] ? data : max[channel_idx];
+      min[idx_channel] = data < min[idx_channel] ? data : min[idx_channel];
+      max[idx_channel] = data > max[idx_channel] ? data : max[idx_channel];
     }
     else
     {
-      min[channel_idx] = data;
-      max[channel_idx] = data;
-      has_min_max_value[channel_idx] = true;
+      min[idx_channel] = data;
+      max[idx_channel] = data;
+      has_min_max_value[idx_channel] = true;
     }
   };
 
@@ -253,22 +273,16 @@ void cal_minmax_per_channel(CircleConst *node, std::vector<float> &min, std::vec
 
 /**
  * @brief Compute the scale and zero point for the given range of values
- * @param min: The minimum value in the range of values to be quantized.
- * @param max: The maximum value in the range of values to be quantized.
- * @param k_max_scale: The maximum value of the quantization scale.
- * @param scaling_factor: The computed scaling factor for the quantization.
- * @param zp: The computed zero point for the quantization.
- * @param nudged_min: The nudged minimum value after applying the scaling factor and zero point.
- * @param nudged_max: The nudged maximum value after applying the scaling factor and zero point.
  */
-void compute_asym_scale_zp(float min, float max, int32_t k_max_scale, float &scaling_factor,
+void compute_asym_scale_zp(float min, float max, loco::DataType data_type, float &scaling_factor,
                            int64_t &zp, float &nudged_min, float &nudged_max)
 {
   LOGGER(l);
 
   assert(min <= max);
   const int32_t kMinScale = 0;
-  const int32_t kMaxScale = k_max_scale;
+  const int32_t kMaxScale = data_type == loco::DataType::U4 ? 15 : 255;
+
   const double qmin_double = kMinScale;
   const double qmax_double = kMaxScale;
   const double rmin = std::fmin(0, min);
@@ -325,11 +339,24 @@ void compute_asym_scale_zp(float min, float max, int32_t k_max_scale, float &sca
   zp = nudged_zero_point;
 }
 
+void transpose_to_upper_triangular(std::vector<float> &matrix, uint32_t num_size)
+{
+  for (uint32_t i = 0; i < num_size; i++)
+  {
+    for (uint32_t j = 0; j < i; j++)
+    {
+      float tmp = matrix[i * num_size + j];
+      matrix[i * num_size + j] = matrix[j * num_size + i];
+      matrix[j * num_size + i] = tmp;
+    }
+  }
+}
+
 void asymmetric_wquant_per_channel(CircleConst *node, std::vector<float> &min,
                                    std::vector<float> &max, std::vector<float> &scaling_factor,
                                    std::vector<int64_t> &zp, std::vector<float> &nudged_min,
-                                   std::vector<float> &nudged_max, loco::DataType output_type,
-                                   std::vector<float> &hessian)
+                                   std::vector<float> &nudged_max, std::vector<float> &hessian,
+                                   loco::DataType output_type)
 {
   assert(node->dtype() == loco::DataType::FLOAT32);
   assert(output_type == loco::DataType::U8 || output_type != loco::DataType::U4);
@@ -339,59 +366,35 @@ void asymmetric_wquant_per_channel(CircleConst *node, std::vector<float> &min,
   const int32_t kMinScale = 0;
   const int32_t kMaxScale = output_type == loco::DataType::U4 ? 15 : 255;
 
-  uint32_t size = node->size<loco::DataType::FLOAT32>();
-  std::vector<int32_t> quantized_values(size);
+  uint32_t input_size = node->size<loco::DataType::FLOAT32>();
+  std::vector<int32_t> quantized_values(input_size);
 
   for (size_t i = 0; i < min.size(); ++i)
   {
-    compute_asym_scale_zp(min[i], max[i], kMaxScale, scaling_factor[i], zp[i], nudged_min[i],
+    compute_asym_scale_zp(min[i], max[i], output_type, scaling_factor[i], zp[i], nudged_min[i],
                           nudged_max[i]);
   }
 
   if (hessian.empty()) // Case where GPTQ is not applied
   {
-    quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
+    quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int index_channel_dim) {
       quantized_values[cal_offset(dimension, indices)] = calculate_qauntized_value(
-        node, indices, dimension, channel_dim_index, scaling_factor, nudged_max, nudged_min);
+        node, indices, dimension, index_channel_dim, scaling_factor, nudged_max, nudged_min);
     };
     iterate_per_channel_with_order(node, quantize, false);
   }
   else // Case where GPTQ is applied
   {
     uint32_t size_hessian = static_cast<uint32_t>(sqrt(hessian.size()));
-    float percdamp = .01;
-    float damp = 0;
-
-    for (uint32_t i = 0; i < size_hessian; i++)
-    {
-      damp += hessian[i * size_hessian + i];
-    }
-    assert(size_hessian != 0);
-    damp /= size_hessian;
-    damp *= percdamp;
-
-    for (uint32_t i = 0; i < size_hessian; i++)
-    {
-      hessian[i * size_hessian + i] += damp;
-    }
 
-    // calculate hessian inverse
+    // Calculate hessian inverse
+    apply_damping_to_hessian(hessian, size_hessian);
     cholesky_decomposition(hessian, size_hessian);
     cholesky_inverse(hessian, size_hessian);
     cholesky_decomposition(hessian, size_hessian);
+    transpose_to_upper_triangular(hessian, size_hessian);
 
-    // transpose hessian to make upper triangular
-    for (uint32_t i = 0; i < size_hessian; i++)
-    {
-      for (uint32_t j = 0; j < i; j++)
-      {
-        float tmp = hessian[i * size_hessian + j];
-        hessian[i * size_hessian + j] = hessian[j * size_hessian + i];
-        hessian[j * size_hessian + i] = tmp;
-      }
-    }
-
-    std::vector<float> error(size);
+    std::vector<float> error(input_size);
 
     loco::TensorShape dimension_channel_last;
     dimension_channel_last.rank(4);
@@ -401,35 +404,34 @@ void asymmetric_wquant_per_channel(CircleConst *node, std::vector<float> &min,
     dimension_hessian.dim(0).set(size_hessian);
     dimension_hessian.dim(1).set(size_hessian);
 
-    quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
-      quantized_values[cal_offset(dimension, indices)] = calculate_qauntized_value(
-        node, indices, dimension, channel_dim_index, scaling_factor, nudged_max, nudged_min);
+    quantize = [&](uint32_t *indices, loco::TensorShape &dimension_input, int index_channel_dim) {
+      quantized_values[cal_offset(dimension_input, indices)] = calculate_qauntized_value(
+        node, indices, dimension_input, index_channel_dim, scaling_factor, nudged_max, nudged_min);
 
       uint32_t indices_channel_last[4] = {
         indices[0], indices[3], indices[1], indices[2] // ohwi -> oihw
       };
-
-      uint32_t dimension_channel_last[4] = {dimension.dim(0).value(), dimension.dim(3).value(),
-                                            dimension.dim(1).value(), dimension.dim(2).value()};
+      uint32_t dimension_channel_last[4] = {
+        dimension_input.dim(0).value(), dimension_input.dim(3).value(),
+        dimension_input.dim(1).value(), dimension_input.dim(2).value()};
 
       uint32_t idx_quant_column =
         dimension_channel_last[2] * dimension_channel_last[3] * indices_channel_last[1] +
         dimension_channel_last[3] * indices_channel_last[2] + indices_channel_last[3];
 
+      uint32_t idx_channel = indices[index_channel_dim];
       uint32_t indices_diag_hessian[2] = {idx_quant_column, idx_quant_column};
 
-      uint32_t channel_idx = indices[channel_dim_index];
+      auto idx_input_data = cal_offset(dimension_input, indices);
+      auto idx_hessian = cal_offset_2d(dimension_hessian, indices_diag_hessian);
 
-      auto data_indices = cal_offset(dimension, indices);
-      auto hessian_indices = cal_offset_2d(dimension_hessian, indices_diag_hessian);
-
-      auto data = node->at<loco::DataType::FLOAT32>(data_indices);
+      auto input_data = node->at<loco::DataType::FLOAT32>(idx_input_data);
       auto quantized_rvalue =
-        (quantized_values[data_indices] - zp[channel_idx]) * scaling_factor[channel_idx];
+        (quantized_values[idx_input_data] - zp[idx_channel]) * scaling_factor[idx_channel];
 
-      error[data_indices] = (data - quantized_rvalue) / hessian[hessian_indices];
+      error[idx_input_data] = (input_data - quantized_rvalue) / hessian[idx_hessian];
 
-      if (channel_idx == (dimension.dim(channel_dim_index).value() - 1))
+      if (idx_channel == (dimension_input.dim(index_channel_dim).value() - 1))
       {
         for (uint32_t o = 0; o < dimension_channel_last[0]; o++)
         {
@@ -439,19 +441,20 @@ void asymmetric_wquant_per_channel(CircleConst *node, std::vector<float> &min,
             {
               for (uint32_t w = 0; w < dimension_channel_last[3]; w++)
               {
-                // convert coordination
+                // Convert coordination
                 uint32_t indices_channel_first[4] = {o, h, w, i};
                 uint32_t indices_error[4] = {o, indices[1], indices[2], indices[3]};
                 uint32_t idx_ihw = dimension_channel_last[2] * dimension_channel_last[3] * i +
                                    dimension_channel_last[3] * h + w;
                 uint32_t indices_hessain[2] = {idx_quant_column, idx_ihw};
 
-                auto _h_indices = cal_offset_2d(dimension_hessian, indices_hessain);
-                auto _data_indices = cal_offset(dimension, indices_channel_first);
-                auto _error_indices = cal_offset(dimension, indices_error);
+                auto _idx_h = cal_offset_2d(dimension_hessian, indices_hessain);
+                auto _idx_input_data = cal_offset(dimension_input, indices_channel_first);
+                auto _idx_error = cal_offset(dimension_input, indices_error);
 
-                node->at<loco::DataType::FLOAT32>(_data_indices) -=
-                  error[_error_indices] * hessian[_h_indices];
+                // Compensate quantize error
+                node->at<loco::DataType::FLOAT32>(_idx_input_data) -=
+                  error[_idx_error] * hessian[_idx_h];
               }
             }
           }
@@ -461,9 +464,9 @@ void asymmetric_wquant_per_channel(CircleConst *node, std::vector<float> &min,
     iterate_per_channel_with_order(node, quantize, true);
   }
 
-  node->dtype(loco::DataType::U8);      // Change the type of tensor
-  node->size<loco::DataType::U8>(size); // Resize tensor
-  for (uint32_t i = 0; i < size; ++i)
+  node->dtype(loco::DataType::U8);            // Change the type of tensor
+  node->size<loco::DataType::U8>(input_size); // Resize tensor
+  for (uint32_t i = 0; i < input_size; ++i)
   {
     node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
   }
@@ -476,11 +479,11 @@ void asymmetric_wdequant_per_channel(CircleConst *node, std::vector<float> &scal
   uint32_t size = node->size<loco::DataType::U8>();
   std::vector<float> dequantized_values(size);
 
-  auto dequantize = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
-    int channel_idx = indices[channel_dim_index];
+  auto dequantize = [&](uint32_t *indices, loco::TensorShape &dimension, int index_channel_dim) {
+    int idx_channel = indices[index_channel_dim];
     auto data = node->at<loco::DataType::U8>(cal_offset(dimension, indices));
     dequantized_values[cal_offset(dimension, indices)] =
-      static_cast<float>(data) * scaling_factor[channel_idx] + nudged_min[channel_idx];
+      static_cast<float>(data) * scaling_factor[idx_channel] + nudged_min[idx_channel];
   };
 
   iterate_per_channel_with_order(node, dequantize, false);
@@ -532,7 +535,7 @@ class QuantizeWeightsWithGPTQ final : public luci::CircleNodeMutableVisitor<void
     std::vector<int64_t> zp(min.size());
 
     asymmetric_wquant_per_channel(weights, min, max, scaling_factor, zp, nudged_min, nudged_max,
-                                  _output_type, hessian);
+                                  hessian, _output_type);
     asymmetric_wdequant_per_channel(weights, scaling_factor, nudged_min);
 
     auto quantparam = std::make_unique<CircleQuantParam>();