Refactor QuantizationUtils functions to use array parameters

This commit refactors two functions in QuantizationUtils to use array parameters instead of pointer parameters. ONE-DCO-1.0-Signed-off-by: y01000.you <[email protected]>
Samsung · Nov 26, 2024 · 79aa022 · 79aa022
1 parent 460f439
commit 79aa022
Show file tree

Hide file tree

Showing 3 changed files with 9 additions and 8 deletions.
diff --git a/compiler/luci/pass/src/QuantizationUtils.cpp b/compiler/luci/pass/src/QuantizationUtils.cpp
@@ -292,11 +292,9 @@ uint32_t cal_offset(loco::TensorShape &dimension, uint32_t *indices)
          indices[2] * dimension.dim(3).value() + indices[3];
 }
 
-uint32_t cal_offset_2d(loco::TensorShape &dimension, uint32_t *indices)
+uint32_t cal_offset_2d(loco::TensorShape &dimension, uint32_t indices[2])
 {
   assert(dimension.rank() == 2);
-  assert(sizeof(indices) / sizeof(*indices) == dimension.rank());
-
   return indices[0] * dimension.dim(1).value() + indices[1];
 }
 

diff --git a/compiler/luci/pass/src/QuantizationUtils.h b/compiler/luci/pass/src/QuantizationUtils.h
@@ -53,7 +53,7 @@ bool get_channel_dim_index(CircleConst *node, loco::TensorShape &dimension,
 
 // Calculate offset of the given indices in dimension
 uint32_t cal_offset(loco::TensorShape &dimension, uint32_t *indices);
-uint32_t cal_offset_2d(loco::TensorShape &dimension, uint32_t *indices);
+uint32_t cal_offset_2d(loco::TensorShape &dimension, uint32_t indices[2]);
 
 // Backward propagation of concatenation qparam
 void propagate_concat_quantparam(luci::CircleConcatenation *concat);

diff --git a/compiler/luci/pass/src/QuantizeWeightsWithGPTQPass.cpp b/compiler/luci/pass/src/QuantizeWeightsWithGPTQPass.cpp
@@ -519,9 +519,9 @@ class QuantizeWeightsWithGPTQ final : public luci::CircleNodeMutableVisitor<void
 
   void fake_quantize_cwq(luci::CircleConst *weights, std::vector<float> &hessian) const
   {
-    if (_output_type != loco::DataType::U4 || _output_type != loco::DataType::U8)
+    if (_output_type != loco::DataType::U4 && _output_type != loco::DataType::U8)
     {
-      throw std::runtime_error("GPTQ quantization supports U4/U8");
+      throw std::runtime_error("GPTQ quantization supports uint4/uint8");
     }
     // Find min/max per channel
     std::vector<float> min;
@@ -657,8 +657,11 @@ bool QuantizeWeightsWithGPTQPass::run(loco::Graph *g)
   if (_ctx->input_model_dtype != loco::DataType::FLOAT32)
     throw std::runtime_error("Weights-only quantization supports float32 input only");
 
-  if (_ctx->output_model_dtype != loco::DataType::U8)
-    throw std::runtime_error("GPTQ quantization supports uint8 output only");
+  if (_ctx->output_model_dtype != loco::DataType::U8 &&
+      _ctx->output_model_dtype != loco::DataType::U4)
+  {
+    throw std::runtime_error("GPTQ quantization supports uint4/uint8");
+  }
 
   auto info_by_name = layer_info_map(g, _ctx->layers_info);