Skip to content

Commit

Permalink
[record-hessian] Introduce HessianComputer (Samsung#14265)
Browse files Browse the repository at this point in the history
This commit introduce gessian computer and hessian vector.

ONE-DCO-1.0-Signed-off-by: Banseok Lee <[email protected]>
  • Loading branch information
BLee-bot authored Nov 1, 2024
1 parent cf45dcc commit ccf6510
Show file tree
Hide file tree
Showing 7 changed files with 490 additions and 0 deletions.
36 changes: 36 additions & 0 deletions compiler/record-hessian/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
file(GLOB_RECURSE SOURCES "src/*.cpp")
file(GLOB_RECURSE TESTS "src/*.test.cpp")
list(REMOVE_ITEM SOURCES ${TESTS})

add_library(record-hessian STATIC ${SOURCES})

target_include_directories(record-hessian PUBLIC include)
target_include_directories(record-hessian PRIVATE src)

target_link_libraries(record-hessian luci_import)
target_link_libraries(record-hessian luci_env)
target_link_libraries(record-hessian luci_export)
target_link_libraries(record-hessian luci_interpreter)
target_link_libraries(record-hessian luci_log)
target_link_libraries(record-hessian dio_hdf5)

install(TARGETS record-hessian DESTINATION lib)
install(DIRECTORY include/ DESTINATION include
FILES_MATCHING PATTERN "*.h")

if(NOT ENABLE_TEST)
return()
endif(NOT ENABLE_TEST)

nnas_find_package(GTest REQUIRED)

GTest_AddTest(record_hessian_tests ${TESTS})
target_include_directories(record_hessian_tests PRIVATE include)
target_include_directories(record_hessian_tests PRIVATE src)
target_link_libraries(record_hessian_tests luci_lang)
target_link_libraries(record_hessian_tests luci_pass)
target_link_libraries(record_hessian_tests loco)
target_link_libraries(record_hessian_tests dio_hdf5)
target_link_libraries(record_hessian_tests nncc_coverage)
target_link_libraries(record_hessian_tests luci_interpreter)
target_link_libraries(record_hessian_tests record-hessian)
3 changes: 3 additions & 0 deletions compiler/record-hessian/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# record-hessian

_record-hessian_ calculates hessian metrix of activations for quantization.
62 changes: 62 additions & 0 deletions compiler/record-hessian/include/record-hessian/HessianComputer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef __RECORD_HESSIAN_HESSIANCOMPUTER_H__
#define __RECORD_HESSIAN_HESSIANCOMPUTER_H__

#include "record-hessian/HessianVector.h"

#include <luci/IR/CircleNode.h>
#include <luci_interpreter/Interpreter.h>

#include <memory>
#include <vector>
#include <unordered_map>

namespace record_hessian
{
/**
* @brief Record approximated hessian matrix from
* GPTQ paper(https://arxiv.org/abs/2210.17323).
*/
using HessianMap = std::unordered_map<const luci::CircleNode *, std::vector<float>>;
using HessianVectorMap = std::unordered_map<const luci::CircleNode *, HessianVector>;

class HessianComputer
{
public:
// Record min/max of node
void recordHessian(const luci::CircleNode *node, const luci_interpreter::Tensor *input_tensor);

std::unique_ptr<HessianMap> getMap();

private:
HessianVectorMap _hessian_map;
const luci_interpreter::Tensor *_input_tensor = nullptr;

void recordHessianForConv2D(const luci::CircleNode *node);

void recordHessianForFullyConnected(const luci::CircleNode *node);
};

void unfold(std::vector<float> &buf, uint32_t input_n, uint32_t input_h, uint32_t input_w,
uint32_t input_c, uint32_t stride_h, uint32_t stride_w, uint32_t dilation_h,
uint32_t dilation_w, uint32_t kernel_oc, uint32_t kernel_h, uint32_t kernel_w,
uint32_t kernel_ic);

} // namespace record_hessian

#endif // __RECORD_HESSIAN_HESSIANCOMPUTER_H__
60 changes: 60 additions & 0 deletions compiler/record-hessian/include/record-hessian/HessianVector.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef __RECORD_HESSIAN_HESSIANVECTOR_H__
#define __RECORD_HESSIAN_HESSIANVECTOR_H__

#include <luci/IR/CircleNodes.h>

#include <cstddef>
#include <vector>

namespace record_hessian
{

struct HessianVector
{
std::vector<float> hessian;
size_t count;

HessianVector() : count(0) {}

void update(const std::vector<float> &new_hessian)
{
if (count == 0)
{
hessian.resize(new_hessian.size());
}
else if (hessian.size() != new_hessian.size())
{
hessian.resize(new_hessian.size());
}

size_t numel = new_hessian.size();
float alpha = 1.f / static_cast<float>(count + 1);

for (size_t i = 0; i < numel; ++i)
{
hessian[i] = (hessian[i] * count + new_hessian[i]) * alpha;
}

count++;
};
};

} // namespace record_hessian

#endif // __RECORD_HESSIAN_HESSIANVECTOR_H__
3 changes: 3 additions & 0 deletions compiler/record-hessian/requires.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
require("luci")
require("luci-interpreter")
require("dio-hdf5")
218 changes: 218 additions & 0 deletions compiler/record-hessian/src/HessianComputer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
/*
* Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "record-hessian/HessianComputer.h"

#include <luci/IR/CircleQuantParam.h>

namespace record_hessian
{

/**
* @brief unfold the vector with NHWC shape, inherently acting in an in-place manner.
* @note (N, H, W, C) -> (N, L, K_h * K_w * C).
* See details(https://pytorch.org/docs/stable/generated/torch.nn.Unfold.html).
*/
void unfold(std::vector<float> &buf, uint32_t input_n, uint32_t input_h, uint32_t input_w,
uint32_t input_c, uint32_t stride_h, uint32_t stride_w, uint32_t dilation_h,
uint32_t dilation_w, uint32_t kernel_oc, uint32_t kernel_h, uint32_t kernel_w,
uint32_t kernel_ic)
{
assert(input_n > 0 && input_h > 0 && input_w > 0 && input_c > 0);
assert(stride_h > 0 && stride_w > 0);
assert(kernel_oc > 0 && kernel_h > 0 && kernel_w > 0 && kernel_ic > 0);

if (input_c != kernel_ic)
throw std::runtime_error("RecordHessian: Input channels do not match kernel channels.");
int out_height = (input_h - dilation_h * (kernel_h - 1) - 1) / stride_h + 1;
int out_width = (input_w - dilation_w * (kernel_w - 1) - 1) / stride_w + 1;
int patch_size = kernel_h * kernel_w * kernel_ic;
std::vector<float> unfolded_buf(input_n * out_height * out_width * patch_size, 0.0f);

int index = 0;
int in_y, in_x;
for (int n = 0; n < input_n; ++n)
{
for (int y = 0; y < out_height; ++y)
{
for (int x = 0; x < out_width; ++x)
{
for (int in_c = 0; in_c < input_c; ++in_c)
{
for (int ky = 0; ky < kernel_h; ++ky)
{
for (int kx = 0; kx < kernel_w; ++kx)
{
in_y = y * stride_h + ky * dilation_h;
in_x = x * stride_w + kx * dilation_w;
if (in_y < input_h && in_x < input_w)
{
unfolded_buf[index] = buf[((n * input_h + in_y) * input_w + in_x) * input_c + in_c];
}
index++;
}
}
}
}
}
}

buf.swap(unfolded_buf);
}

void HessianComputer::recordHessianForFullyConnected(const luci::CircleNode *node)
{
assert(_input_tensor->shape().num_dims() < 4);
assert(_input_tensor->element_type() == luci_interpreter::DataType::FLOAT32);

uint32_t size_in_ch;
uint32_t length;

const auto data = _input_tensor->data<float>();
const auto num_elements = _input_tensor->shape().num_elements();
std::vector<float> buf(data, data + num_elements);

if (_input_tensor->shape().num_dims() == 3)
{
size_in_ch = _input_tensor->shape().dim(2); // input_tensor [batch, length, channel]
}
else if (_input_tensor->shape().num_dims() == 2)
{
size_in_ch = _input_tensor->shape().dim(1); // input_tensor [length, channel]
}
else
{
throw std::runtime_error("RecordHessian: Unsupported node rank");
}
assert(size_in_ch != 0);
length = num_elements / size_in_ch;

std::vector<float> hessian(size_in_ch * size_in_ch, 0);

for (int i = 0; i < size_in_ch; ++i)
{
for (int j = 0; j < size_in_ch; ++j)
{
float sum = 0;
for (int k = 0; k < length; ++k)
{
sum += buf[i + k * size_in_ch] * buf[j + k * size_in_ch];
}
hessian[i * size_in_ch + j] = 2 * sum;
}
}

HessianVector &vector = _hessian_map[node];
vector.update(hessian);
}

void HessianComputer::recordHessianForConv2D(const luci::CircleNode *node)
{
assert(_input_tensor->shape().num_dims() == 4);
assert(_input_tensor->element_type() == luci_interpreter::DataType::FLOAT32);

const auto circle_conv2d = loco::must_cast<const luci::CircleConv2D *>(node);
const auto node_filter = loco::must_cast<luci::CircleConst *>((circle_conv2d)->filter());
assert(circle_conv2d->rank() >= 4);
assert(node_filter->dtype() == loco::DataType::FLOAT32);
assert(node_filter->rank() == 4);

uint32_t size_filter = node_filter->size<loco::DataType::FLOAT32>();
uint32_t size_in_ch =
node_filter->size<loco::DataType::FLOAT32>() / circle_conv2d->dim(3).value();

uint32_t input_n = _input_tensor->shape().dim(0);
uint32_t input_h = _input_tensor->shape().dim(1);
uint32_t input_w = _input_tensor->shape().dim(2);
uint32_t input_c = _input_tensor->shape().dim(3);

uint32_t stride_h = circle_conv2d->stride()->h();
uint32_t stride_w = circle_conv2d->stride()->w();
uint32_t dilation_h = circle_conv2d->dilation()->h();
uint32_t dilation_w = circle_conv2d->dilation()->w();

uint32_t kernel_oc = node_filter->dim(0).value();
uint32_t kernel_h = node_filter->dim(1).value();
uint32_t kernel_w = node_filter->dim(2).value();
uint32_t kernel_ic = node_filter->dim(3).value();

const auto data = _input_tensor->data<float>();
const auto num_elements = _input_tensor->shape().num_elements();
assert(data != 0);
assert(num_elements != 0);
std::vector<float> buf(data, data + num_elements);

unfold(buf, input_n, input_h, input_w, input_c, stride_h, stride_w, dilation_h, dilation_w,
kernel_oc, kernel_h, kernel_w, kernel_ic);
assert(size_in_ch != 0);
uint32_t length = buf.size() / size_in_ch;

std::vector<float> hessian(size_in_ch * size_in_ch, 0);
for (int i = 0; i < size_in_ch; ++i)
{
for (int j = 0; j < size_in_ch; ++j)
{
float sum = 0;
for (int k = 0; k < length; ++k)
{
sum += buf[i + k * size_in_ch] * buf[j + k * size_in_ch];
}
hessian[i * size_in_ch + j] = 2 * sum;
}
}

HessianVector &vector = _hessian_map[node];
vector.update(hessian);
}

void HessianComputer::recordHessian(const luci::CircleNode *node,
const luci_interpreter::Tensor *input_tensor)
{
if (node == nullptr || input_tensor == nullptr)
throw std::invalid_argument("RecordHessian: node or input_tensor is null.");

if (input_tensor->element_type() != luci_interpreter::DataType::FLOAT32)
throw std::runtime_error("RecordHessian: Unsupported dtype: only FLOAT32 is supported.");

_input_tensor = input_tensor;

switch (node->opcode())
{
case luci::CircleOpcode::FULLY_CONNECTED:
recordHessianForFullyConnected(node);
break;
case luci::CircleOpcode::CONV_2D:
recordHessianForConv2D(node);
break;
default:
throw std::runtime_error("RecordHessian: " + node->name() + " is unsupported op.");
}
}

std::unique_ptr<HessianMap> HessianComputer::getMap()
{
auto hessian_map = std::make_unique<HessianMap>();

for (auto item : _hessian_map)
{
auto &vec = (*hessian_map)[item.first];
vec = item.second.hessian;
}

return hessian_map;
}

} // namespace record_hessian
Loading

0 comments on commit ccf6510

Please sign in to comment.