[onert] Propagate shared memory operand indexes to cpu backend (Samsu…

…ng#14230) This commit adds propagation of shared memory operand indexes to cpu backend. Note that the propagated indexes map is not filled yet. ONE-DCO-1.0-Signed-off-by: Mateusz Bencer [email protected]
lemmaa · Nov 29, 2024 · a5e770a · a5e770a
1 parent 4f30a00
commit a5e770a
Show file tree

Hide file tree

Showing 8 changed files with 78 additions and 40 deletions.
diff --git a/runtime/onert/backend/cpu/Backend.h b/runtime/onert/backend/cpu/Backend.h
@@ -45,7 +45,8 @@ class Backend : public ::onert::backend::Backend
     auto &graph = *data.graph;
     auto context = std::make_unique<BackendContext>(this, std::move(data));
     auto tr = std::make_shared<basic::TensorRegistry>();
-    auto tb = std::make_shared<TensorBuilder>(tr);
+    // TODO: Use findSharedMemoryOperandIndexes method here
+    auto tb = std::make_shared<TensorBuilder>(tr, ir::OperandIndexMap<ir::OperandIndex>{});
     context->tensor_registry = tr;
     context->tensor_builder = tb;
     context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, custom_kernel_builder,

diff --git a/runtime/onert/backend/cpu/BackendContext.cc b/runtime/onert/backend/cpu/BackendContext.cc
@@ -31,7 +31,11 @@ namespace backend
 namespace cpu
 {
 
-ITensorRegistry *BackendContext::genTensors() { return basic::genTensors(*this); }
+ITensorRegistry *BackendContext::genTensors()
+{
+  return basic::genTensors(tensor_builder, *graph(), external_operands(), tensor_registry,
+                           data().op_order, tensor_builder->getSharedMemoryOperandIndexes());
+}
 
 FunctionMap BackendContext::genKernels()
 {
@@ -43,7 +47,8 @@ FunctionMap BackendContext::genKernels()
     ret.emplace(op_ind, std::move(fn_seq));
   }
 
-  basic::initConsts(*this);
+  basic::initConsts(graph()->operands(), external_operands(), tensor_registry.get(),
+                    tensor_builder->getSharedMemoryOperandIndexes());
 
   // NOTE For memory optimization, we want to free some operand data
   const_cast<ir::Graph &>(*_data.graph)

diff --git a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
@@ -34,19 +34,18 @@ namespace basic
 {
 
 // TODO Remove the template param BackendContext once unification of cpu backend context is done
-template <typename T_BackendContext> void planTensors(const T_BackendContext &ctx)
+template <typename T_TensorBuilder>
+void planTensors(const std::shared_ptr<T_TensorBuilder> &tensor_builder, const ir::Graph &graph,
+                 const util::Set<ir::OperandIndex> &external_operands,
+                 const std::vector<onert::ir::OperationIndex> &op_order)
 {
-  const ir::Graph &graph = *ctx.graph();
-  const auto &order = ctx.data().op_order;
-  auto tensor_builder = ctx.tensor_builder;
-
   ir::OperandIndexMap<uint32_t> uses_map;
   ir::OperandIndexMap<uint32_t> def_map;
   ir::OperandIndexSequence constants;
 
   // Prepare scanning
   graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
-    if (ctx.external_operands().contains(ind))
+    if (external_operands.contains(ind))
       return;
 
     // TODO Check if we need to handle unused tensors
@@ -95,7 +94,7 @@ template <typename T_BackendContext> void planTensors(const T_BackendContext &ct
   // 1. Scan DEF of outputs. If the DEF, allocate it
   // 2. Scan DEF of inputs. If variable tensor, allocate it
   // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
-  for (const auto &op_ind : order)
+  for (const auto &op_ind : op_order)
   {
     const auto &op = graph.operations().at(op_ind);
     auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
@@ -104,7 +103,7 @@ template <typename T_BackendContext> void planTensors(const T_BackendContext &ct
     // Define outputs
     for (const auto &ind : op_outputs)
     {
-      if (ctx.external_operands().contains(ind))
+      if (external_operands.contains(ind))
         continue;
       if (!tensor_builder->isRegistered(ind))
         continue;
@@ -121,7 +120,7 @@ template <typename T_BackendContext> void planTensors(const T_BackendContext &ct
     // non-constant because of less memory usage by memory planning in here
     for (const auto &ind : op_inputs)
     {
-      if (ctx.external_operands().contains(ind))
+      if (external_operands.contains(ind))
         continue;
       if (!tensor_builder->isRegistered(ind))
         continue;
@@ -138,7 +137,7 @@ template <typename T_BackendContext> void planTensors(const T_BackendContext &ct
 
     for (const auto &ind : op_inputs)
     {
-      if (ctx.external_operands().contains(ind))
+      if (external_operands.contains(ind))
         continue;
       if (!tensor_builder->isRegistered(ind))
         continue;
@@ -177,21 +176,24 @@ template <typename T_BackendContext> void planTensors(const T_BackendContext &ct
                 [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
 }
 
-template <typename T_BackendContext> ITensorRegistry *genTensors(T_BackendContext &ctx)
+template <typename T_TensorBuilder>
+ITensorRegistry *
+genTensors(const std::shared_ptr<T_TensorBuilder> &tensor_builder, const ir::Graph &graph,
+           const util::Set<ir::OperandIndex> &external_operands,
+           const std::shared_ptr<ITensorRegistry> &tensor_registry,
+           const std::vector<onert::ir::OperationIndex> &op_order,
+           const ir::OperandIndexMap<ir::OperandIndex> & /*shared_memory_operand_idx*/)
 {
-  const ir::Graph &graph = *ctx.graph();
-  auto tensor_builder = ctx.tensor_builder;
-
   graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
-    if (ctx.external_operands().contains(ind))
+    if (external_operands.contains(ind))
       return;
     tensor_builder->registerTensorInfo(ind, obj.info());
   });
 
   // TODO Get compiler options from compiler, and use it rather than getting it from Env
   if (util::getConfigString(util::config::EXECUTOR) == "Linear")
   {
-    basic::planTensors(ctx);
+    basic::planTensors(tensor_builder, graph, external_operands, op_order);
   }
   else
   {
@@ -205,12 +207,19 @@ template <typename T_BackendContext> ITensorRegistry *genTensors(T_BackendContex
 
   tensor_builder->allocate();
 
-  return ctx.tensor_registry.get();
+  return tensor_registry.get();
+}
+
+template <typename T_BackendContext> ITensorRegistry *genTensors(T_BackendContext &ctx)
+{
+  return genTensors(ctx.tensor_builder, *ctx.graph(), ctx.external_operands(), ctx.tensor_registry,
+                    ctx.data().op_order, {});
 }
 
 inline void initConsts(const ir::Operands &operands,
                        const util::Set<ir::OperandIndex> &external_operands,
-                       ITensorRegistry *tensor_registry)
+                       ITensorRegistry *tensor_registry,
+                       const ir::OperandIndexMap<ir::OperandIndex> & /*shared_memory_operands_map*/)
 {
   operands.iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
     if (external_operands.contains(ind) || !operand.isConstant())
@@ -234,7 +243,7 @@ inline void initConsts(const ir::Operands &operands,
 
 inline void initConsts(BackendContext &ctx)
 {
-  initConsts(ctx.graph()->operands(), ctx.external_operands(), ctx.tensor_registry.get());
+  initConsts(ctx.graph()->operands(), ctx.external_operands(), ctx.tensor_registry.get(), {});
 }
 
 } // namespace basic

diff --git a/runtime/onert/core/include/backend/basic/StaticTensorManager.h b/runtime/onert/core/include/backend/basic/StaticTensorManager.h
@@ -37,9 +37,11 @@ class StaticTensorManager
 {
 public:
   StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
-                      DynamicTensorManager *dynamic_tensor_manager);
+                      DynamicTensorManager *dynamic_tensor_manager,
+                      const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_indexes);
   StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg, const std::string planner_id,
-                      DynamicTensorManager *dynamic_tensor_manager);
+                      DynamicTensorManager *dynamic_tensor_manager,
+                      const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_indexes);
   virtual ~StaticTensorManager() = default;
 
   void allocateNonconsts(void);
@@ -57,6 +59,7 @@ class StaticTensorManager
   const std::shared_ptr<TensorRegistry> _tensors;
   ir::OperandIndexMap<bool> _as_constants;
   DynamicTensorManager *_dynamic_tensor_manager;
+  ir::OperandIndexMap<ir::OperandIndex> _shared_memory_operand_indexes;
 };
 
 } // namespace basic

diff --git a/runtime/onert/core/include/backend/basic/TensorBuilder.h b/runtime/onert/core/include/backend/basic/TensorBuilder.h
@@ -37,8 +37,10 @@ namespace basic
 class TensorBuilder
 {
 public:
-  TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg);
-  TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg, const std::string planner_id);
+  TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg,
+                const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_indexes = {});
+  TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg, const std::string planner_id,
+                const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_indexes = {});
 
   /**
    * @brief     Register tensor information to allocate on CPU backend
@@ -54,13 +56,16 @@ class TensorBuilder
 
   void allocate(void);
 
+  const ir::OperandIndexMap<ir::OperandIndex> &getSharedMemoryOperandIndexes() const;
+
   DynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); }
 
 private:
   const std::shared_ptr<TensorRegistry> _tensor_reg;
   std::unique_ptr<DynamicTensorManager> _dynamic_tensor_mgr;
   std::unique_ptr<StaticTensorManager> _static_tensor_mgr;
   ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
+  ir::OperandIndexMap<ir::OperandIndex> _shared_memory_operand_indexes;
 };
 
 } // namespace basic

diff --git a/runtime/onert/core/src/backend/basic/StaticTensorManager.cc b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc
@@ -27,19 +27,23 @@ namespace backend
 namespace basic
 {
 
-StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
-                                         DynamicTensorManager *dynamic_tensor_manager)
+StaticTensorManager::StaticTensorManager(
+  const std::shared_ptr<TensorRegistry> &reg, DynamicTensorManager *dynamic_tensor_manager,
+  const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_indexes)
   : _nonconst_mgr{new MemoryManager()}, _tensors{reg},
-    _dynamic_tensor_manager{dynamic_tensor_manager}
+    _dynamic_tensor_manager{dynamic_tensor_manager},
+    _shared_memory_operand_indexes{shared_memory_operand_indexes}
 {
   // DO NOTHING
 }
 
-StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
-                                         const std::string planner_id,
-                                         DynamicTensorManager *dynamic_tensor_manager)
+StaticTensorManager::StaticTensorManager(
+  const std::shared_ptr<TensorRegistry> &reg, const std::string planner_id,
+  DynamicTensorManager *dynamic_tensor_manager,
+  const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_indexes)
   : _nonconst_mgr{new MemoryManager(planner_id)}, _tensors{reg},
-    _dynamic_tensor_manager{dynamic_tensor_manager}
+    _dynamic_tensor_manager{dynamic_tensor_manager},
+    _shared_memory_operand_indexes{shared_memory_operand_indexes}
 {
   // DO NOTHING
 }

diff --git a/runtime/onert/core/src/backend/basic/TensorBuilder.cc b/runtime/onert/core/src/backend/basic/TensorBuilder.cc
@@ -27,17 +27,23 @@ namespace backend
 namespace basic
 {
 
-TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg)
+TensorBuilder::TensorBuilder(
+  const std::shared_ptr<TensorRegistry> &tensor_reg,
+  const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_indexes)
   : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg)},
-    _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
+    _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get(),
+                                               shared_memory_operand_indexes)},
+    _shared_memory_operand_indexes{shared_memory_operand_indexes}
 {
   /* empty */
 }
 
-TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg,
-                             const std::string planner_id)
+TensorBuilder::TensorBuilder(
+  const std::shared_ptr<TensorRegistry> &tensor_reg, const std::string planner_id,
+  const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_indexes)
   : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg)},
-    _static_tensor_mgr{new StaticTensorManager(_tensor_reg, planner_id, _dynamic_tensor_mgr.get())}
+    _static_tensor_mgr{new StaticTensorManager(_tensor_reg, planner_id, _dynamic_tensor_mgr.get(),
+                                               shared_memory_operand_indexes)}
 {
   /* empty */
 }
@@ -83,6 +89,11 @@ bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
 
 void TensorBuilder::allocate(void) { _static_tensor_mgr->allocateNonconsts(); }
 
+const ir::OperandIndexMap<ir::OperandIndex> &TensorBuilder::getSharedMemoryOperandIndexes() const
+{
+  return _shared_memory_operand_indexes;
+}
+
 } // namespace basic
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/core/src/backend/builtin/TensorBuilder.cc b/runtime/onert/core/src/backend/builtin/TensorBuilder.cc
@@ -29,8 +29,8 @@ namespace builtin
 
 TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg)
   : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg->base_reg())},
-    _static_tensor_mgr{
-      new basic::StaticTensorManager(_tensor_reg->base_reg(), _dynamic_tensor_mgr.get())}
+    _static_tensor_mgr{new basic::StaticTensorManager(
+      _tensor_reg->base_reg(), _dynamic_tensor_mgr.get(), ir::OperandIndexMap<ir::OperandIndex>{})}
 {
   /* empty */
 }