forked from Samsung/ONE
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[record-hessian] Introduce HessianComputer (Samsung#14265)
This commit introduce gessian computer and hessian vector. ONE-DCO-1.0-Signed-off-by: Banseok Lee <[email protected]>
- Loading branch information
Showing
7 changed files
with
490 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
file(GLOB_RECURSE SOURCES "src/*.cpp") | ||
file(GLOB_RECURSE TESTS "src/*.test.cpp") | ||
list(REMOVE_ITEM SOURCES ${TESTS}) | ||
|
||
add_library(record-hessian STATIC ${SOURCES}) | ||
|
||
target_include_directories(record-hessian PUBLIC include) | ||
target_include_directories(record-hessian PRIVATE src) | ||
|
||
target_link_libraries(record-hessian luci_import) | ||
target_link_libraries(record-hessian luci_env) | ||
target_link_libraries(record-hessian luci_export) | ||
target_link_libraries(record-hessian luci_interpreter) | ||
target_link_libraries(record-hessian luci_log) | ||
target_link_libraries(record-hessian dio_hdf5) | ||
|
||
install(TARGETS record-hessian DESTINATION lib) | ||
install(DIRECTORY include/ DESTINATION include | ||
FILES_MATCHING PATTERN "*.h") | ||
|
||
if(NOT ENABLE_TEST) | ||
return() | ||
endif(NOT ENABLE_TEST) | ||
|
||
nnas_find_package(GTest REQUIRED) | ||
|
||
GTest_AddTest(record_hessian_tests ${TESTS}) | ||
target_include_directories(record_hessian_tests PRIVATE include) | ||
target_include_directories(record_hessian_tests PRIVATE src) | ||
target_link_libraries(record_hessian_tests luci_lang) | ||
target_link_libraries(record_hessian_tests luci_pass) | ||
target_link_libraries(record_hessian_tests loco) | ||
target_link_libraries(record_hessian_tests dio_hdf5) | ||
target_link_libraries(record_hessian_tests nncc_coverage) | ||
target_link_libraries(record_hessian_tests luci_interpreter) | ||
target_link_libraries(record_hessian_tests record-hessian) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# record-hessian | ||
|
||
_record-hessian_ calculates hessian metrix of activations for quantization. |
62 changes: 62 additions & 0 deletions
62
compiler/record-hessian/include/record-hessian/HessianComputer.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
/* | ||
* Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#ifndef __RECORD_HESSIAN_HESSIANCOMPUTER_H__ | ||
#define __RECORD_HESSIAN_HESSIANCOMPUTER_H__ | ||
|
||
#include "record-hessian/HessianVector.h" | ||
|
||
#include <luci/IR/CircleNode.h> | ||
#include <luci_interpreter/Interpreter.h> | ||
|
||
#include <memory> | ||
#include <vector> | ||
#include <unordered_map> | ||
|
||
namespace record_hessian | ||
{ | ||
/** | ||
* @brief Record approximated hessian matrix from | ||
* GPTQ paper(https://arxiv.org/abs/2210.17323). | ||
*/ | ||
using HessianMap = std::unordered_map<const luci::CircleNode *, std::vector<float>>; | ||
using HessianVectorMap = std::unordered_map<const luci::CircleNode *, HessianVector>; | ||
|
||
class HessianComputer | ||
{ | ||
public: | ||
// Record min/max of node | ||
void recordHessian(const luci::CircleNode *node, const luci_interpreter::Tensor *input_tensor); | ||
|
||
std::unique_ptr<HessianMap> getMap(); | ||
|
||
private: | ||
HessianVectorMap _hessian_map; | ||
const luci_interpreter::Tensor *_input_tensor = nullptr; | ||
|
||
void recordHessianForConv2D(const luci::CircleNode *node); | ||
|
||
void recordHessianForFullyConnected(const luci::CircleNode *node); | ||
}; | ||
|
||
void unfold(std::vector<float> &buf, uint32_t input_n, uint32_t input_h, uint32_t input_w, | ||
uint32_t input_c, uint32_t stride_h, uint32_t stride_w, uint32_t dilation_h, | ||
uint32_t dilation_w, uint32_t kernel_oc, uint32_t kernel_h, uint32_t kernel_w, | ||
uint32_t kernel_ic); | ||
|
||
} // namespace record_hessian | ||
|
||
#endif // __RECORD_HESSIAN_HESSIANCOMPUTER_H__ |
60 changes: 60 additions & 0 deletions
60
compiler/record-hessian/include/record-hessian/HessianVector.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
/* | ||
* Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#ifndef __RECORD_HESSIAN_HESSIANVECTOR_H__ | ||
#define __RECORD_HESSIAN_HESSIANVECTOR_H__ | ||
|
||
#include <luci/IR/CircleNodes.h> | ||
|
||
#include <cstddef> | ||
#include <vector> | ||
|
||
namespace record_hessian | ||
{ | ||
|
||
struct HessianVector | ||
{ | ||
std::vector<float> hessian; | ||
size_t count; | ||
|
||
HessianVector() : count(0) {} | ||
|
||
void update(const std::vector<float> &new_hessian) | ||
{ | ||
if (count == 0) | ||
{ | ||
hessian.resize(new_hessian.size()); | ||
} | ||
else if (hessian.size() != new_hessian.size()) | ||
{ | ||
hessian.resize(new_hessian.size()); | ||
} | ||
|
||
size_t numel = new_hessian.size(); | ||
float alpha = 1.f / static_cast<float>(count + 1); | ||
|
||
for (size_t i = 0; i < numel; ++i) | ||
{ | ||
hessian[i] = (hessian[i] * count + new_hessian[i]) * alpha; | ||
} | ||
|
||
count++; | ||
}; | ||
}; | ||
|
||
} // namespace record_hessian | ||
|
||
#endif // __RECORD_HESSIAN_HESSIANVECTOR_H__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
require("luci") | ||
require("luci-interpreter") | ||
require("dio-hdf5") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,218 @@ | ||
/* | ||
* Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#include "record-hessian/HessianComputer.h" | ||
|
||
#include <luci/IR/CircleQuantParam.h> | ||
|
||
namespace record_hessian | ||
{ | ||
|
||
/** | ||
* @brief unfold the vector with NHWC shape, inherently acting in an in-place manner. | ||
* @note (N, H, W, C) -> (N, L, K_h * K_w * C). | ||
* See details(https://pytorch.org/docs/stable/generated/torch.nn.Unfold.html). | ||
*/ | ||
void unfold(std::vector<float> &buf, uint32_t input_n, uint32_t input_h, uint32_t input_w, | ||
uint32_t input_c, uint32_t stride_h, uint32_t stride_w, uint32_t dilation_h, | ||
uint32_t dilation_w, uint32_t kernel_oc, uint32_t kernel_h, uint32_t kernel_w, | ||
uint32_t kernel_ic) | ||
{ | ||
assert(input_n > 0 && input_h > 0 && input_w > 0 && input_c > 0); | ||
assert(stride_h > 0 && stride_w > 0); | ||
assert(kernel_oc > 0 && kernel_h > 0 && kernel_w > 0 && kernel_ic > 0); | ||
|
||
if (input_c != kernel_ic) | ||
throw std::runtime_error("RecordHessian: Input channels do not match kernel channels."); | ||
int out_height = (input_h - dilation_h * (kernel_h - 1) - 1) / stride_h + 1; | ||
int out_width = (input_w - dilation_w * (kernel_w - 1) - 1) / stride_w + 1; | ||
int patch_size = kernel_h * kernel_w * kernel_ic; | ||
std::vector<float> unfolded_buf(input_n * out_height * out_width * patch_size, 0.0f); | ||
|
||
int index = 0; | ||
int in_y, in_x; | ||
for (int n = 0; n < input_n; ++n) | ||
{ | ||
for (int y = 0; y < out_height; ++y) | ||
{ | ||
for (int x = 0; x < out_width; ++x) | ||
{ | ||
for (int in_c = 0; in_c < input_c; ++in_c) | ||
{ | ||
for (int ky = 0; ky < kernel_h; ++ky) | ||
{ | ||
for (int kx = 0; kx < kernel_w; ++kx) | ||
{ | ||
in_y = y * stride_h + ky * dilation_h; | ||
in_x = x * stride_w + kx * dilation_w; | ||
if (in_y < input_h && in_x < input_w) | ||
{ | ||
unfolded_buf[index] = buf[((n * input_h + in_y) * input_w + in_x) * input_c + in_c]; | ||
} | ||
index++; | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
buf.swap(unfolded_buf); | ||
} | ||
|
||
void HessianComputer::recordHessianForFullyConnected(const luci::CircleNode *node) | ||
{ | ||
assert(_input_tensor->shape().num_dims() < 4); | ||
assert(_input_tensor->element_type() == luci_interpreter::DataType::FLOAT32); | ||
|
||
uint32_t size_in_ch; | ||
uint32_t length; | ||
|
||
const auto data = _input_tensor->data<float>(); | ||
const auto num_elements = _input_tensor->shape().num_elements(); | ||
std::vector<float> buf(data, data + num_elements); | ||
|
||
if (_input_tensor->shape().num_dims() == 3) | ||
{ | ||
size_in_ch = _input_tensor->shape().dim(2); // input_tensor [batch, length, channel] | ||
} | ||
else if (_input_tensor->shape().num_dims() == 2) | ||
{ | ||
size_in_ch = _input_tensor->shape().dim(1); // input_tensor [length, channel] | ||
} | ||
else | ||
{ | ||
throw std::runtime_error("RecordHessian: Unsupported node rank"); | ||
} | ||
assert(size_in_ch != 0); | ||
length = num_elements / size_in_ch; | ||
|
||
std::vector<float> hessian(size_in_ch * size_in_ch, 0); | ||
|
||
for (int i = 0; i < size_in_ch; ++i) | ||
{ | ||
for (int j = 0; j < size_in_ch; ++j) | ||
{ | ||
float sum = 0; | ||
for (int k = 0; k < length; ++k) | ||
{ | ||
sum += buf[i + k * size_in_ch] * buf[j + k * size_in_ch]; | ||
} | ||
hessian[i * size_in_ch + j] = 2 * sum; | ||
} | ||
} | ||
|
||
HessianVector &vector = _hessian_map[node]; | ||
vector.update(hessian); | ||
} | ||
|
||
void HessianComputer::recordHessianForConv2D(const luci::CircleNode *node) | ||
{ | ||
assert(_input_tensor->shape().num_dims() == 4); | ||
assert(_input_tensor->element_type() == luci_interpreter::DataType::FLOAT32); | ||
|
||
const auto circle_conv2d = loco::must_cast<const luci::CircleConv2D *>(node); | ||
const auto node_filter = loco::must_cast<luci::CircleConst *>((circle_conv2d)->filter()); | ||
assert(circle_conv2d->rank() >= 4); | ||
assert(node_filter->dtype() == loco::DataType::FLOAT32); | ||
assert(node_filter->rank() == 4); | ||
|
||
uint32_t size_filter = node_filter->size<loco::DataType::FLOAT32>(); | ||
uint32_t size_in_ch = | ||
node_filter->size<loco::DataType::FLOAT32>() / circle_conv2d->dim(3).value(); | ||
|
||
uint32_t input_n = _input_tensor->shape().dim(0); | ||
uint32_t input_h = _input_tensor->shape().dim(1); | ||
uint32_t input_w = _input_tensor->shape().dim(2); | ||
uint32_t input_c = _input_tensor->shape().dim(3); | ||
|
||
uint32_t stride_h = circle_conv2d->stride()->h(); | ||
uint32_t stride_w = circle_conv2d->stride()->w(); | ||
uint32_t dilation_h = circle_conv2d->dilation()->h(); | ||
uint32_t dilation_w = circle_conv2d->dilation()->w(); | ||
|
||
uint32_t kernel_oc = node_filter->dim(0).value(); | ||
uint32_t kernel_h = node_filter->dim(1).value(); | ||
uint32_t kernel_w = node_filter->dim(2).value(); | ||
uint32_t kernel_ic = node_filter->dim(3).value(); | ||
|
||
const auto data = _input_tensor->data<float>(); | ||
const auto num_elements = _input_tensor->shape().num_elements(); | ||
assert(data != 0); | ||
assert(num_elements != 0); | ||
std::vector<float> buf(data, data + num_elements); | ||
|
||
unfold(buf, input_n, input_h, input_w, input_c, stride_h, stride_w, dilation_h, dilation_w, | ||
kernel_oc, kernel_h, kernel_w, kernel_ic); | ||
assert(size_in_ch != 0); | ||
uint32_t length = buf.size() / size_in_ch; | ||
|
||
std::vector<float> hessian(size_in_ch * size_in_ch, 0); | ||
for (int i = 0; i < size_in_ch; ++i) | ||
{ | ||
for (int j = 0; j < size_in_ch; ++j) | ||
{ | ||
float sum = 0; | ||
for (int k = 0; k < length; ++k) | ||
{ | ||
sum += buf[i + k * size_in_ch] * buf[j + k * size_in_ch]; | ||
} | ||
hessian[i * size_in_ch + j] = 2 * sum; | ||
} | ||
} | ||
|
||
HessianVector &vector = _hessian_map[node]; | ||
vector.update(hessian); | ||
} | ||
|
||
void HessianComputer::recordHessian(const luci::CircleNode *node, | ||
const luci_interpreter::Tensor *input_tensor) | ||
{ | ||
if (node == nullptr || input_tensor == nullptr) | ||
throw std::invalid_argument("RecordHessian: node or input_tensor is null."); | ||
|
||
if (input_tensor->element_type() != luci_interpreter::DataType::FLOAT32) | ||
throw std::runtime_error("RecordHessian: Unsupported dtype: only FLOAT32 is supported."); | ||
|
||
_input_tensor = input_tensor; | ||
|
||
switch (node->opcode()) | ||
{ | ||
case luci::CircleOpcode::FULLY_CONNECTED: | ||
recordHessianForFullyConnected(node); | ||
break; | ||
case luci::CircleOpcode::CONV_2D: | ||
recordHessianForConv2D(node); | ||
break; | ||
default: | ||
throw std::runtime_error("RecordHessian: " + node->name() + " is unsupported op."); | ||
} | ||
} | ||
|
||
std::unique_ptr<HessianMap> HessianComputer::getMap() | ||
{ | ||
auto hessian_map = std::make_unique<HessianMap>(); | ||
|
||
for (auto item : _hessian_map) | ||
{ | ||
auto &vec = (*hessian_map)[item.first]; | ||
vec = item.second.hessian; | ||
} | ||
|
||
return hessian_map; | ||
} | ||
|
||
} // namespace record_hessian |
Oops, something went wrong.