Skip to content

Commit

Permalink
[onert] Propagate shared memory operand indexes to cpu backend (Samsu…
Browse files Browse the repository at this point in the history
…ng#14230)

This commit adds propagation of shared memory operand indexes to cpu backend.
Note that the propagated indexes map is not filled yet.

ONE-DCO-1.0-Signed-off-by: Mateusz Bencer [email protected]
  • Loading branch information
mbencer authored Nov 29, 2024
1 parent 4f30a00 commit a5e770a
Show file tree
Hide file tree
Showing 8 changed files with 78 additions and 40 deletions.
3 changes: 2 additions & 1 deletion runtime/onert/backend/cpu/Backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ class Backend : public ::onert::backend::Backend
auto &graph = *data.graph;
auto context = std::make_unique<BackendContext>(this, std::move(data));
auto tr = std::make_shared<basic::TensorRegistry>();
auto tb = std::make_shared<TensorBuilder>(tr);
// TODO: Use findSharedMemoryOperandIndexes method here
auto tb = std::make_shared<TensorBuilder>(tr, ir::OperandIndexMap<ir::OperandIndex>{});
context->tensor_registry = tr;
context->tensor_builder = tb;
context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, custom_kernel_builder,
Expand Down
9 changes: 7 additions & 2 deletions runtime/onert/backend/cpu/BackendContext.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,11 @@ namespace backend
namespace cpu
{

ITensorRegistry *BackendContext::genTensors() { return basic::genTensors(*this); }
ITensorRegistry *BackendContext::genTensors()
{
return basic::genTensors(tensor_builder, *graph(), external_operands(), tensor_registry,
data().op_order, tensor_builder->getSharedMemoryOperandIndexes());
}

FunctionMap BackendContext::genKernels()
{
Expand All @@ -43,7 +47,8 @@ FunctionMap BackendContext::genKernels()
ret.emplace(op_ind, std::move(fn_seq));
}

basic::initConsts(*this);
basic::initConsts(graph()->operands(), external_operands(), tensor_registry.get(),
tensor_builder->getSharedMemoryOperandIndexes());

// NOTE For memory optimization, we want to free some operand data
const_cast<ir::Graph &>(*_data.graph)
Expand Down
47 changes: 28 additions & 19 deletions runtime/onert/core/include/backend/basic/BackendContextHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,19 +34,18 @@ namespace basic
{

// TODO Remove the template param BackendContext once unification of cpu backend context is done
template <typename T_BackendContext> void planTensors(const T_BackendContext &ctx)
template <typename T_TensorBuilder>
void planTensors(const std::shared_ptr<T_TensorBuilder> &tensor_builder, const ir::Graph &graph,
const util::Set<ir::OperandIndex> &external_operands,
const std::vector<onert::ir::OperationIndex> &op_order)
{
const ir::Graph &graph = *ctx.graph();
const auto &order = ctx.data().op_order;
auto tensor_builder = ctx.tensor_builder;

ir::OperandIndexMap<uint32_t> uses_map;
ir::OperandIndexMap<uint32_t> def_map;
ir::OperandIndexSequence constants;

// Prepare scanning
graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
if (ctx.external_operands().contains(ind))
if (external_operands.contains(ind))
return;

// TODO Check if we need to handle unused tensors
Expand Down Expand Up @@ -95,7 +94,7 @@ template <typename T_BackendContext> void planTensors(const T_BackendContext &ct
// 1. Scan DEF of outputs. If the DEF, allocate it
// 2. Scan DEF of inputs. If variable tensor, allocate it
// 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
for (const auto &op_ind : order)
for (const auto &op_ind : op_order)
{
const auto &op = graph.operations().at(op_ind);
auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
Expand All @@ -104,7 +103,7 @@ template <typename T_BackendContext> void planTensors(const T_BackendContext &ct
// Define outputs
for (const auto &ind : op_outputs)
{
if (ctx.external_operands().contains(ind))
if (external_operands.contains(ind))
continue;
if (!tensor_builder->isRegistered(ind))
continue;
Expand All @@ -121,7 +120,7 @@ template <typename T_BackendContext> void planTensors(const T_BackendContext &ct
// non-constant because of less memory usage by memory planning in here
for (const auto &ind : op_inputs)
{
if (ctx.external_operands().contains(ind))
if (external_operands.contains(ind))
continue;
if (!tensor_builder->isRegistered(ind))
continue;
Expand All @@ -138,7 +137,7 @@ template <typename T_BackendContext> void planTensors(const T_BackendContext &ct

for (const auto &ind : op_inputs)
{
if (ctx.external_operands().contains(ind))
if (external_operands.contains(ind))
continue;
if (!tensor_builder->isRegistered(ind))
continue;
Expand Down Expand Up @@ -177,21 +176,24 @@ template <typename T_BackendContext> void planTensors(const T_BackendContext &ct
[](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
}

template <typename T_BackendContext> ITensorRegistry *genTensors(T_BackendContext &ctx)
template <typename T_TensorBuilder>
ITensorRegistry *
genTensors(const std::shared_ptr<T_TensorBuilder> &tensor_builder, const ir::Graph &graph,
const util::Set<ir::OperandIndex> &external_operands,
const std::shared_ptr<ITensorRegistry> &tensor_registry,
const std::vector<onert::ir::OperationIndex> &op_order,
const ir::OperandIndexMap<ir::OperandIndex> & /*shared_memory_operand_idx*/)
{
const ir::Graph &graph = *ctx.graph();
auto tensor_builder = ctx.tensor_builder;

graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
if (ctx.external_operands().contains(ind))
if (external_operands.contains(ind))
return;
tensor_builder->registerTensorInfo(ind, obj.info());
});

// TODO Get compiler options from compiler, and use it rather than getting it from Env
if (util::getConfigString(util::config::EXECUTOR) == "Linear")
{
basic::planTensors(ctx);
basic::planTensors(tensor_builder, graph, external_operands, op_order);
}
else
{
Expand All @@ -205,12 +207,19 @@ template <typename T_BackendContext> ITensorRegistry *genTensors(T_BackendContex

tensor_builder->allocate();

return ctx.tensor_registry.get();
return tensor_registry.get();
}

template <typename T_BackendContext> ITensorRegistry *genTensors(T_BackendContext &ctx)
{
return genTensors(ctx.tensor_builder, *ctx.graph(), ctx.external_operands(), ctx.tensor_registry,
ctx.data().op_order, {});
}

inline void initConsts(const ir::Operands &operands,
const util::Set<ir::OperandIndex> &external_operands,
ITensorRegistry *tensor_registry)
ITensorRegistry *tensor_registry,
const ir::OperandIndexMap<ir::OperandIndex> & /*shared_memory_operands_map*/)
{
operands.iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
if (external_operands.contains(ind) || !operand.isConstant())
Expand All @@ -234,7 +243,7 @@ inline void initConsts(const ir::Operands &operands,

inline void initConsts(BackendContext &ctx)
{
initConsts(ctx.graph()->operands(), ctx.external_operands(), ctx.tensor_registry.get());
initConsts(ctx.graph()->operands(), ctx.external_operands(), ctx.tensor_registry.get(), {});
}

} // namespace basic
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,11 @@ class StaticTensorManager
{
public:
StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
DynamicTensorManager *dynamic_tensor_manager);
DynamicTensorManager *dynamic_tensor_manager,
const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_indexes);
StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg, const std::string planner_id,
DynamicTensorManager *dynamic_tensor_manager);
DynamicTensorManager *dynamic_tensor_manager,
const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_indexes);
virtual ~StaticTensorManager() = default;

void allocateNonconsts(void);
Expand All @@ -57,6 +59,7 @@ class StaticTensorManager
const std::shared_ptr<TensorRegistry> _tensors;
ir::OperandIndexMap<bool> _as_constants;
DynamicTensorManager *_dynamic_tensor_manager;
ir::OperandIndexMap<ir::OperandIndex> _shared_memory_operand_indexes;
};

} // namespace basic
Expand Down
9 changes: 7 additions & 2 deletions runtime/onert/core/include/backend/basic/TensorBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,10 @@ namespace basic
class TensorBuilder
{
public:
TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg);
TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg, const std::string planner_id);
TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg,
const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_indexes = {});
TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg, const std::string planner_id,
const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_indexes = {});

/**
* @brief Register tensor information to allocate on CPU backend
Expand All @@ -54,13 +56,16 @@ class TensorBuilder

void allocate(void);

const ir::OperandIndexMap<ir::OperandIndex> &getSharedMemoryOperandIndexes() const;

DynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); }

private:
const std::shared_ptr<TensorRegistry> _tensor_reg;
std::unique_ptr<DynamicTensorManager> _dynamic_tensor_mgr;
std::unique_ptr<StaticTensorManager> _static_tensor_mgr;
ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
ir::OperandIndexMap<ir::OperandIndex> _shared_memory_operand_indexes;
};

} // namespace basic
Expand Down
18 changes: 11 additions & 7 deletions runtime/onert/core/src/backend/basic/StaticTensorManager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,23 @@ namespace backend
namespace basic
{

StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
DynamicTensorManager *dynamic_tensor_manager)
StaticTensorManager::StaticTensorManager(
const std::shared_ptr<TensorRegistry> &reg, DynamicTensorManager *dynamic_tensor_manager,
const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_indexes)
: _nonconst_mgr{new MemoryManager()}, _tensors{reg},
_dynamic_tensor_manager{dynamic_tensor_manager}
_dynamic_tensor_manager{dynamic_tensor_manager},
_shared_memory_operand_indexes{shared_memory_operand_indexes}
{
// DO NOTHING
}

StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
const std::string planner_id,
DynamicTensorManager *dynamic_tensor_manager)
StaticTensorManager::StaticTensorManager(
const std::shared_ptr<TensorRegistry> &reg, const std::string planner_id,
DynamicTensorManager *dynamic_tensor_manager,
const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_indexes)
: _nonconst_mgr{new MemoryManager(planner_id)}, _tensors{reg},
_dynamic_tensor_manager{dynamic_tensor_manager}
_dynamic_tensor_manager{dynamic_tensor_manager},
_shared_memory_operand_indexes{shared_memory_operand_indexes}
{
// DO NOTHING
}
Expand Down
21 changes: 16 additions & 5 deletions runtime/onert/core/src/backend/basic/TensorBuilder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,23 @@ namespace backend
namespace basic
{

TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg)
TensorBuilder::TensorBuilder(
const std::shared_ptr<TensorRegistry> &tensor_reg,
const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_indexes)
: _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg)},
_static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
_static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get(),
shared_memory_operand_indexes)},
_shared_memory_operand_indexes{shared_memory_operand_indexes}
{
/* empty */
}

TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg,
const std::string planner_id)
TensorBuilder::TensorBuilder(
const std::shared_ptr<TensorRegistry> &tensor_reg, const std::string planner_id,
const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_indexes)
: _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg)},
_static_tensor_mgr{new StaticTensorManager(_tensor_reg, planner_id, _dynamic_tensor_mgr.get())}
_static_tensor_mgr{new StaticTensorManager(_tensor_reg, planner_id, _dynamic_tensor_mgr.get(),
shared_memory_operand_indexes)}
{
/* empty */
}
Expand Down Expand Up @@ -83,6 +89,11 @@ bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const

void TensorBuilder::allocate(void) { _static_tensor_mgr->allocateNonconsts(); }

const ir::OperandIndexMap<ir::OperandIndex> &TensorBuilder::getSharedMemoryOperandIndexes() const
{
return _shared_memory_operand_indexes;
}

} // namespace basic
} // namespace backend
} // namespace onert
4 changes: 2 additions & 2 deletions runtime/onert/core/src/backend/builtin/TensorBuilder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ namespace builtin

TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg)
: _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg->base_reg())},
_static_tensor_mgr{
new basic::StaticTensorManager(_tensor_reg->base_reg(), _dynamic_tensor_mgr.get())}
_static_tensor_mgr{new basic::StaticTensorManager(
_tensor_reg->base_reg(), _dynamic_tensor_mgr.get(), ir::OperandIndexMap<ir::OperandIndex>{})}
{
/* empty */
}
Expand Down

0 comments on commit a5e770a

Please sign in to comment.