diff --git a/runtime/onert/backend/cpu/CMakeLists.txt b/runtime/onert/backend/cpu/CMakeLists.txt index 92cdf3c18d1..b55894d7e34 100644 --- a/runtime/onert/backend/cpu/CMakeLists.txt +++ b/runtime/onert/backend/cpu/CMakeLists.txt @@ -1,6 +1,8 @@ nnfw_find_package(Ruy REQUIRED) file(GLOB_RECURSE SOURCES "*.cc") +file(GLOB_RECURSE TESTS "*.test.cc") +list(REMOVE_ITEM SOURCES ${TESTS}) add_library(${LIB_ONERT_BACKEND_CPU} SHARED ${SOURCES}) @@ -21,3 +23,25 @@ set_target_properties(${LIB_ONERT_BACKEND_CPU} PROPERTIES INSTALL_RPATH "$ORIGIN:$ORIGIN/..") install(TARGETS ${LIB_ONERT_BACKEND_CPU} DESTINATION lib/nnfw/backend) + +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + +# Unit Tests +set(TEST_ONERT_CPU_BACKEND test_onert_cpu_backend) + +add_executable(${TEST_ONERT_CPU_BACKEND} ${TESTS}) + +target_link_libraries(${TEST_ONERT_CPU_BACKEND} ${LIB_ONERT_BACKEND_CPU}) +# Requires linking nnfw_coverage: check header coverage +target_link_libraries(${TEST_ONERT_CPU_BACKEND} nnfw_coverage) +target_link_libraries(${TEST_ONERT_CPU_BACKEND} onert_core) +target_link_libraries(${TEST_ONERT_CPU_BACKEND} gtest gtest_main dl ${LIB_PTHREAD}) + +# Set install rpath to find onert_core, onert_backend_cpu, etc +set_target_properties(${TEST_ONERT_CPU_BACKEND} PROPERTIES + INSTALL_RPATH "$ORIGIN/../lib/nnfw:$ORIGIN/../lib/nnfw/backend") + +add_test(${TEST_ONERT_CPU_BACKEND} ${TEST_ONERT_CPU_BACKEND}) +install(TARGETS ${TEST_ONERT_CPU_BACKEND} DESTINATION unittest) diff --git a/runtime/onert/backend/cpu/SharedMemoryOperands.cc b/runtime/onert/backend/cpu/SharedMemoryOperands.cc new file mode 100644 index 00000000000..2f357b6b380 --- /dev/null +++ b/runtime/onert/backend/cpu/SharedMemoryOperands.cc @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "SharedMemoryOperands.h" + +namespace onert +{ +namespace backend +{ +namespace cpu +{ + +namespace +{ +// To handle cases like Reshape->Reshape->Reshape... chain where the memory is shared. +// In such a case we should re-assign indexes to the first Reshape input. +void reassign_indexes_to_single_sources( + ir::OperandIndexMap &shared_memory_operand_map) +{ + for (auto [shared_ind, source_ind] : shared_memory_operand_map) + { + bool other_source_found = false; + auto it = std::end(shared_memory_operand_map); + while ((it = shared_memory_operand_map.find(source_ind)) != std::end(shared_memory_operand_map)) + { + source_ind = shared_memory_operand_map[source_ind]; + other_source_found = true; + } + if (other_source_found) + { + shared_memory_operand_map[shared_ind] = source_ind; + } + } +} + +bool is_memory_sharing_allowed(const ir::IGraph &graph, const ir::IOperation &op) +{ + const std::unordered_set ops_with_possible_memory_sharing = { + ir::OpCode::Reshape, ir::OpCode::ExpandDims, ir::OpCode::Squeeze}; + + if (ops_with_possible_memory_sharing.find(op.opcode()) == + std::end(ops_with_possible_memory_sharing)) + { + return false; + } + if (graph.operands().at(op.getInputs().at(0)).info().isDynamic()) + { + return false; + } + if (graph.operands().at(op.getOutputs().at(0)).info().isDynamic()) + { + return false; + } + const auto op_input_output = {op.getInputs().at(0), op.getOutputs().at(0)}; + const bool is_model_input_output = std::any_of( + std::begin(op_input_output), std::end(op_input_output), [&graph](const ir::OperandIndex &ind) { + return graph.getInputs().contains(ind) || graph.getOutputs().contains(ind); + }); + return !is_model_input_output; +}; + +} // namespace + +ir::OperandIndexMap findSharedMemoryOperandIndexes(const ir::IGraph &graph) +{ + ir::OperandIndexMap shared_memory_operand_map; + graph.operations().iterate([&](const ir::OperationIndex &, const ir::IOperation &op) { + if (is_memory_sharing_allowed(graph, op)) + { + assert(op.getInputs().size() == 1 || op.getInputs().size() == 2); + assert(op.getOutputs().size() == 1); + shared_memory_operand_map[op.getOutputs().at(0)] = op.getInputs().at(0); + } + }); + reassign_indexes_to_single_sources(shared_memory_operand_map); + return shared_memory_operand_map; +} + +} // namespace cpu +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/cpu/SharedMemoryOperands.h b/runtime/onert/backend/cpu/SharedMemoryOperands.h new file mode 100644 index 00000000000..3e4289592ef --- /dev/null +++ b/runtime/onert/backend/cpu/SharedMemoryOperands.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_CPU_SHARED_MEMORY_OPERANDS_H__ +#define __ONERT_BACKEND_CPU_SHARED_MEMORY_OPERANDS_H__ + +#include "ir/IGraph.h" +#include "ir/OperandIndexMap.h" + +namespace onert +{ +namespace backend +{ +namespace cpu +{ +/* + * Find indexed of operands assigned to tensors which can share memory (indicate the same buffer). + * Note that it's applicable for operations that do NOT change data but only shape like Reshape. + */ +ir::OperandIndexMap findSharedMemoryOperandIndexes(const ir::IGraph &graph); + +} // namespace cpu +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_CPU_SHARED_MEMORY_OPERANDS_H__ diff --git a/runtime/onert/backend/cpu/SharedMemoryOperands.test.cc b/runtime/onert/backend/cpu/SharedMemoryOperands.test.cc new file mode 100644 index 00000000000..9760fb55e03 --- /dev/null +++ b/runtime/onert/backend/cpu/SharedMemoryOperands.test.cc @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include "SharedMemoryOperands.h" + +#include "ir/Graph.h" +#include "ir/operation/Permute.h" +#include "ir/operation/Squeeze.h" +#include "ir/operation/Reshape.h" + +using namespace onert::backend::cpu; +using namespace onert::ir; + +// Add node other than Reshape/ExpandDims/Squeeze. +// It is used for cases where Reshape input/output is not input/output on the whole model. +namespace +{ +void addNotOptimizedNode(Graph *graph, const OperandIndex &input, const OperandIndex &output) +{ + graph->addOperation(std::make_unique(input, output)); +} +} // namespace + +TEST(SharedMemoryOperands, no_shared_memory_graph) +{ + auto graph = std::make_unique(); + TypeInfo data_type{DataType::FLOAT32}; + const auto not_optim_in = graph->addOperand({4}, data_type); + const auto not_optim_out = graph->addOperand({4}, data_type); + addNotOptimizedNode(graph.get(), not_optim_in, not_optim_out); + graph->addInput(not_optim_in); + graph->addOutput(not_optim_out); + graph->verify(); + + const auto indexes_map = findSharedMemoryOperandIndexes(*graph); + + ASSERT_EQ(indexes_map.size(), 0); +} + +TEST(SharedMemoryOperands, single_reshape_graph) +{ + auto graph = std::make_unique(); + TypeInfo data_type{DataType::FLOAT32}; + const auto not_optim_in = graph->addOperand({4}, data_type); + const auto reshape_input = graph->addOperand({4}, data_type); + addNotOptimizedNode(graph.get(), not_optim_in, reshape_input); + const auto reshape_output = graph->addOperand({2, 2}, data_type); + operation::Reshape::Param shape; + shape.new_shape = {2, 2}; + TypeInfo shape_type{DataType::INT32}; + const auto reshape_shape = graph->addOperand({2}, shape_type); + graph->addOperation( + std::make_unique(OperandIndexSequence{reshape_input, reshape_shape}, + OperandIndexSequence{reshape_output}, shape)); + const auto not_optim_out_2 = graph->addOperand({2, 2}, data_type); + addNotOptimizedNode(graph.get(), reshape_output, not_optim_out_2); + graph->addInput(not_optim_in); + graph->addOutput(not_optim_out_2); + graph->verify(); + + const auto indexes_map = findSharedMemoryOperandIndexes(*graph); + + ASSERT_EQ(indexes_map.size(), 1); + EXPECT_EQ(indexes_map.begin()->first, 2); + EXPECT_EQ(indexes_map.begin()->second, 1); +} + +TEST(SharedMemoryOperands, double_reshape_graph) +{ + auto graph = std::make_unique(); + TypeInfo data_type{DataType::FLOAT32}; + const auto not_optim_in = graph->addOperand({4}, data_type); + const auto reshape1_input = graph->addOperand({4}, data_type); + addNotOptimizedNode(graph.get(), not_optim_in, reshape1_input); + const auto reshape1_output = graph->addOperand({2, 2}, data_type); + operation::Reshape::Param shape; + shape.new_shape = {2, 2}; + TypeInfo shape_type{DataType::INT32}; + const auto reshape_shape = graph->addOperand({2}, shape_type); + graph->addOperation( + std::make_unique(OperandIndexSequence{reshape1_input, reshape_shape}, + OperandIndexSequence{reshape1_output}, shape)); + const auto reshape2_output = graph->addOperand({2, 2}, data_type); + graph->addOperation( + std::make_unique(OperandIndexSequence{reshape1_output, reshape_shape}, + OperandIndexSequence{reshape2_output}, shape)); + const auto not_optim_out_2 = graph->addOperand({2, 2}, data_type); + addNotOptimizedNode(graph.get(), reshape2_output, not_optim_out_2); + graph->addInput(not_optim_in); + graph->addOutput(not_optim_out_2); + graph->verify(); + + const auto indexes_map = findSharedMemoryOperandIndexes(*graph); + + ASSERT_EQ(indexes_map.size(), 2); + auto map_it = indexes_map.begin(); + EXPECT_EQ(map_it->first, 2); + EXPECT_EQ(map_it->second, 1); + ++map_it; + EXPECT_EQ(map_it->first, 4); + EXPECT_EQ(map_it->second, 1); +} + +TEST(SharedMemoryOperands, dyn_output_reshape_graph) +{ + auto graph = std::make_unique(); + TypeInfo data_type{DataType::FLOAT32}; + const auto not_optim_in = graph->addOperand({4}, data_type); + const auto reshape_input = graph->addOperand({4}, data_type); + addNotOptimizedNode(graph.get(), not_optim_in, reshape_input); + const auto reshape_output = graph->addOperand({}, data_type); + graph->operands().at(reshape_output).info().setDynamic(); + operation::Reshape::Param shape; + TypeInfo shape_type{DataType::INT32}; + const auto reshape_shape = graph->addOperand({2}, shape_type); + graph->addOperation( + std::make_unique(OperandIndexSequence{reshape_input, reshape_shape}, + OperandIndexSequence{reshape_output}, shape)); + const auto not_optim_out_2 = graph->addOperand({}, data_type); + addNotOptimizedNode(graph.get(), reshape_output, not_optim_out_2); + graph->addInput(not_optim_in); + graph->addOutput(not_optim_out_2); + graph->verify(); + + const auto indexes_map = findSharedMemoryOperandIndexes(*graph); + + ASSERT_EQ(indexes_map.size(), 0); +} + +TEST(SharedMemoryOperands, model_input_reshape_graph) +{ + auto graph = std::make_unique(); + TypeInfo data_type{DataType::FLOAT32}; + const auto reshape_input = graph->addOperand({4}, data_type); + const auto reshape_output = graph->addOperand({2, 2}, data_type); + operation::Reshape::Param shape; + shape.new_shape = {2, 2}; + TypeInfo shape_type{DataType::INT32}; + const auto reshape_shape = graph->addOperand({2}, shape_type); + graph->addOperation( + std::make_unique(OperandIndexSequence{reshape_input, reshape_shape}, + OperandIndexSequence{reshape_output}, shape)); + const auto not_optim_out = graph->addOperand({2, 2}, data_type); + addNotOptimizedNode(graph.get(), reshape_output, not_optim_out); + graph->addInput(reshape_input); + graph->addOutput(not_optim_out); + graph->verify(); + + const auto indexes_map = findSharedMemoryOperandIndexes(*graph); + + ASSERT_EQ(indexes_map.size(), 0); +} + +TEST(SharedMemoryOperands, single_squeeze_graph) +{ + auto graph = std::make_unique(); + TypeInfo data_type{DataType::FLOAT32}; + const auto not_optim_in = graph->addOperand({4, 1}, data_type); + const auto squeeze_input = graph->addOperand({4, 1}, data_type); + addNotOptimizedNode(graph.get(), not_optim_in, squeeze_input); + const auto squeeze_output = graph->addOperand({4}, data_type); + operation::Squeeze::Param axes; + axes.dims[0] = 1; + axes.ndim = 1; + graph->addOperation(std::make_unique( + OperandIndexSequence{squeeze_input}, OperandIndexSequence{squeeze_output}, axes)); + const auto not_optim_out_2 = graph->addOperand({4}, data_type); + addNotOptimizedNode(graph.get(), squeeze_output, not_optim_out_2); + graph->addInput(not_optim_in); + graph->addOutput(not_optim_out_2); + graph->verify(); + + const auto indexes_map = findSharedMemoryOperandIndexes(*graph); + + ASSERT_EQ(indexes_map.size(), 1); + EXPECT_EQ(indexes_map.begin()->first, 2); + EXPECT_EQ(indexes_map.begin()->second, 1); +} + +TEST(SharedMemoryOperands, multiple_reshape_input_consumers) +{ + auto graph = std::make_unique(); + TypeInfo data_type{DataType::FLOAT32}; + const auto not_optim_in = graph->addOperand({4}, data_type); + const auto reshape_input = graph->addOperand({4}, data_type); + addNotOptimizedNode(graph.get(), not_optim_in, reshape_input); + const auto reshape_output = graph->addOperand({2, 2}, data_type); + operation::Reshape::Param shape; + shape.new_shape = {2, 2}; + TypeInfo shape_type{DataType::INT32}; + const auto reshape_shape = graph->addOperand({2}, shape_type); + graph->addOperation( + std::make_unique(OperandIndexSequence{reshape_input, reshape_shape}, + OperandIndexSequence{reshape_output}, shape)); + const auto not_optim_out_2 = graph->addOperand({2, 2}, data_type); + addNotOptimizedNode(graph.get(), reshape_output, not_optim_out_2); + const auto not_optim_out_3 = graph->addOperand({4}, data_type); + addNotOptimizedNode(graph.get(), reshape_input, not_optim_out_3); + graph->addInput(not_optim_in); + graph->addOutput(not_optim_out_2); + graph->addOutput(not_optim_out_3); + graph->verify(); + + const auto indexes_map = findSharedMemoryOperandIndexes(*graph); + + ASSERT_EQ(indexes_map.size(), 1); + EXPECT_EQ(indexes_map.begin()->first, 2); + EXPECT_EQ(indexes_map.begin()->second, 1); +}