diff --git a/src/tim/transform/layout_infer_context.h b/src/tim/transform/layout_infer_context.h index d63960c91..6a490777f 100644 --- a/src/tim/transform/layout_infer_context.h +++ b/src/tim/transform/layout_infer_context.h @@ -1,16 +1,18 @@ #ifndef TIM_VX_LAYOUT_INFER_CONTEXT_H_ #define TIM_VX_LAYOUT_INFER_CONTEXT_H_ + #include "permute_vector.h" #include "tim/transform/layout_inference.h" +#include + namespace tim { namespace transform { namespace layout_inference_impl { class LayoutInferContext { public: LayoutInferContext(const std::shared_ptr& src_graph, - std::shared_ptr& infer_graph) - : src_graph_(src_graph), infer_graph_(infer_graph) {} + std::shared_ptr& infer_graph); void SetPermuteVector(std::shared_ptr tensor, std::shared_ptr pv); const std::shared_ptr GetPermuteVector( @@ -22,12 +24,16 @@ class LayoutInferContext { const std::shared_ptr& t_layout); std::shared_ptr GetMapedTensor( const std::shared_ptr& t_src) const; + std::shared_ptr GetMappedGraphInputTensor( + const std::shared_ptr& t_src) const; + std::shared_ptr GetMappedGraphOutputTensor( + const std::shared_ptr& t_src) const; void UpdateGraphInputMap(const std::shared_ptr& i_src, const std::shared_ptr& i_layout); void UpdateGraphOutputMap(const std::shared_ptr& o_src, - const std::shared_ptr& o_layout); + const std::shared_ptr& o_layout); std::map, std::shared_ptr> GetGraphInputMap() const { @@ -44,7 +50,7 @@ class LayoutInferContext { private: std::map, std::shared_ptr> tensor_pv_; - std::vector> visited_op_; + std::unordered_map, bool> op_visited_; // tensor_in_src -> tensor_in_layout std::map, std::shared_ptr> tensor_map_; diff --git a/src/tim/transform/layout_inference.cc b/src/tim/transform/layout_inference.cc index fd2f16366..c517e3cc2 100644 --- a/src/tim/transform/layout_inference.cc +++ b/src/tim/transform/layout_inference.cc @@ -73,7 +73,7 @@ #include "ops/roi_pool_layout_inference.h" #include -#include +#include #include "tim/vx/context.h" #include "tim/vx/graph.h" @@ -87,7 +87,16 @@ std::vector> HandleLayoutInfer( std::shared_ptr& ctx, const std::shared_ptr& op); -// Implemention for LayoutInferContext +// Implementation for LayoutInferContext +LayoutInferContext::LayoutInferContext( + const std::shared_ptr& src_graph, + std::shared_ptr& infer_graph) + : src_graph_(src_graph), infer_graph_(infer_graph) { + for (const auto& op : src_graph->OpVector()) { + op_visited_[op] = false; + } +} + void LayoutInferContext::SetPermuteVector(std::shared_ptr tensor, std::shared_ptr pv) { if (tensor_pv_.end() != tensor_pv_.find(tensor)) { @@ -110,27 +119,19 @@ const std::shared_ptr LayoutInferContext::GetPermuteVector( } void LayoutInferContext::MarkVisited(const std::shared_ptr& op) { - if (visited_op_.end() != - std::find(visited_op_.begin(), visited_op_.end(), op)) { - VSILOGW("The operation has been mark as visited."); - } else { - visited_op_.push_back(op); - } + op_visited_[op] = true; } -bool LayoutInferContext::IsVisited(const std::shared_ptr& op) const { - if (visited_op_.end() != - std::find(visited_op_.begin(), visited_op_.end(), op)) { - return true; - } else { - return false; - } +bool LayoutInferContext::IsVisited( + const std::shared_ptr& op) const { + return op_visited_.at(op); } bool LayoutInferContext::IsReadyForInfer( const std::shared_ptr& op) const { for (const auto& tensor : op->impl()->InputsTensor()) { - if (!tensor->IsConstTensor() && tensor->GetId() != (uint32_t)-1 && + if (!tensor->IsConstTensor() && + tensor->GetId() != static_cast(-1) && (tensor_pv_.end() == tensor_pv_.find(tensor))) { return false; } @@ -149,21 +150,43 @@ std::shared_ptr LayoutInferContext::GetMapedTensor( auto it = tensor_map_.find(t_src); if (it != tensor_map_.end()) { return it->second; - } else { - VSILOGE("Tensor has not beed inserted in tensor map."); - assert(false); } + VSILOGE("Tensor has not beed inserted in tensor map."); + return nullptr; +} + +std::shared_ptr LayoutInferContext::GetMappedGraphInputTensor( + const std::shared_ptr& t_src) const { + auto it = graph_input_map_.find(t_src); + if (it != tensor_map_.end()) { + return it->second; + } + + VSILOGE("Tensor has not beed inserted in graph input tensor map."); return nullptr; } -void LayoutInferContext::UpdateGraphInputMap(const std::shared_ptr& i_src, - const std::shared_ptr& i_layout) { +std::shared_ptr LayoutInferContext::GetMappedGraphOutputTensor( + const std::shared_ptr& t_src) const { + auto it = graph_output_map_.find(t_src); + if (it != tensor_map_.end()) { + return it->second; + } + + VSILOGE("Tensor has not beed inserted in graph output tensor map."); + return nullptr; +} + +void LayoutInferContext::UpdateGraphInputMap( + const std::shared_ptr& i_src, + const std::shared_ptr& i_layout) { graph_input_map_[i_src] = i_layout; } -void LayoutInferContext::UpdateGraphOutputMap(const std::shared_ptr& o_src, - const std::shared_ptr& o_layout) { +void LayoutInferContext::UpdateGraphOutputMap( + const std::shared_ptr& o_src, + const std::shared_ptr& o_layout) { graph_output_map_[o_src] = o_layout; } @@ -173,39 +196,40 @@ void LayoutInferContext::UpdateGraphOutputMap(const std::shared_ptr& op_infer->OnInputs(next_tensors); \ op_infer->OnOutputs(next_tensors); \ break; \ - } \ - -#define REGIST_REDUCE_LAYOUT_INFERENCE(op_idx) \ - case op_idx: { \ - auto reduce_type = op->impl()->node()->nn_param.reduce.type; \ - switch (reduce_type) { \ - REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_MEAN, ReduceMean); \ - REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_MAX, ReduceMax); \ - REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_MIN, ReduceMin); \ - REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_PROD, ReduceProd); \ - REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_ANY, ReduceAny); \ - REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_SUM, ReduceSum); \ - REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_ALL, ReduceAll); \ - default: \ - VSILOGW("Op %d: Default layout inference pass for reduce.", reduce_type);\ - assert(false); \ - } \ - break; \ - } \ - -#define REGIST_LOGICAL_LAYOUT_INFERENCE(op_idx) \ - case op_idx: { \ - auto logical_type = op->impl()->node()->nn_param.relational_ops.op; \ - switch (logical_type) \ - { \ - REGIST_LAYOUT_INFERENCE(VSI_NN_LOGICAL_AND, LogicalAnd); \ - REGIST_LAYOUT_INFERENCE(VSI_NN_LOGICAL_OR, LogicalOr); \ - default: \ - VSILOGW("Op %d: Default layout inference pass for logical.", logical_type);\ - assert(false); \ - } \ - break; \ - } \ + } + +#define REGIST_REDUCE_LAYOUT_INFERENCE(op_idx) \ + case op_idx: { \ + auto reduce_type = op->impl()->node()->nn_param.reduce.type; \ + switch (reduce_type) { \ + REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_MEAN, ReduceMean); \ + REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_MAX, ReduceMax); \ + REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_MIN, ReduceMin); \ + REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_PROD, ReduceProd); \ + REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_ANY, ReduceAny); \ + REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_SUM, ReduceSum); \ + REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_ALL, ReduceAll); \ + default: \ + VSILOGW("Op %d: Default layout inference pass for reduce.", \ + reduce_type); \ + assert(false); \ + } \ + break; \ + } + +#define REGIST_LOGICAL_LAYOUT_INFERENCE(op_idx) \ + case op_idx: { \ + auto logical_type = op->impl()->node()->nn_param.relational_ops.op; \ + switch (logical_type) { \ + REGIST_LAYOUT_INFERENCE(VSI_NN_LOGICAL_AND, LogicalAnd); \ + REGIST_LAYOUT_INFERENCE(VSI_NN_LOGICAL_OR, LogicalOr); \ + default: \ + VSILOGW("Op %d: Default layout inference pass for logical.", \ + logical_type); \ + assert(false); \ + } \ + break; \ + } std::vector> HandleLayoutInfer( std::shared_ptr& ctx, @@ -279,8 +303,10 @@ std::vector> HandleLayoutInfer( REGIST_LAYOUT_INFERENCE(VSI_NN_OP_CONV3D, Conv3d); REGIST_LAYOUT_INFERENCE(VSI_NN_OP_LSTM_OVXLIB, UnidirectionalLstm); REGIST_LAYOUT_INFERENCE(VSI_NN_OP_EXPAND_BROADCAST, Broadcast); - REGIST_LAYOUT_INFERENCE(VSI_NN_OP_UNIDIRECTIONAL_SEQUENCE_RNN, UnidirectionalRnn); - REGIST_LAYOUT_INFERENCE(VSI_NN_OP_BIDIRECTIONAL_SEQUENCE_RNN, BidirectionalRnn); + REGIST_LAYOUT_INFERENCE(VSI_NN_OP_UNIDIRECTIONAL_SEQUENCE_RNN, + UnidirectionalRnn); + REGIST_LAYOUT_INFERENCE(VSI_NN_OP_BIDIRECTIONAL_SEQUENCE_RNN, + BidirectionalRnn); #ifdef VSI_FEAT_OP_CUSTOM_TINY_YOLOV4_POSTPROCESS REGIST_LAYOUT_INFERENCE(VSI_NN_OP_CUSTOM_TINY_YOLOV4_POSTPROCESS, Yolov4); #endif @@ -312,13 +338,13 @@ LayoutInference( std::make_shared(src_graph, infer_graph); - std::deque> tensor_queue; + std::queue> tensor_queue; auto graph_inputs = src_graph->InputsTensor(); for (const auto& t_src : graph_inputs) { auto input = infer_graph->CreateTensor(t_src->GetSpec()); layout_infer_ctx->UpdateTensorMap(t_src, input); layout_infer_ctx->UpdateGraphInputMap(t_src, input); - tensor_queue.push_back(t_src); + tensor_queue.push(t_src); layout_infer_ctx->SetPermuteVector( t_src, tensor_pv_map.find(t_src) != tensor_pv_map.end() ? tensor_pv_map[t_src] @@ -329,27 +355,39 @@ LayoutInference( for (auto const_in : const_inputs) { std::vector dataRef(const_in->GetSpec().GetByteSize()); const_in->CopyDataFromTensor(dataRef.data()); - auto input = - infer_graph->CreateTensor(const_in->GetSpec(), (const void*)dataRef.data()); + auto input = infer_graph->CreateTensor(const_in->GetSpec(), + (const void*)dataRef.data()); layout_infer_ctx->UpdateTensorMap(const_in, input); - tensor_queue.push_back(const_in); + tensor_queue.push(const_in); layout_infer_ctx->SetPermuteVector( const_in, tensor_pv_map.find(const_in) != tensor_pv_map.end() - ? tensor_pv_map[const_in] - : MakeShared(const_in->GetShape().size())); + ? tensor_pv_map[const_in] + : MakeShared(const_in->GetShape().size())); + } + + auto graph_outputs = src_graph->OutputsTensor(); + for (const auto& t_src : graph_outputs) { + auto output = infer_graph->CreateTensor(t_src->GetSpec()); + layout_infer_ctx->UpdateTensorMap(t_src, output); + layout_infer_ctx->UpdateGraphOutputMap(t_src, output); + tensor_queue.push(t_src); + layout_infer_ctx->SetPermuteVector( + t_src, tensor_pv_map.find(t_src) != tensor_pv_map.end() + ? tensor_pv_map[t_src] + : MakeShared(t_src->GetShape().size())); } while (!tensor_queue.empty()) { auto tensor = tensor_queue.front(); - tensor_queue.pop_front(); + tensor_queue.pop(); const auto& consumers = src_graph->GetConsumersOp(tensor); for (const auto& op : consumers) { - if (!layout_infer_ctx->IsVisited(op) && op->impl()->kind_ !=-1 && + if (!layout_infer_ctx->IsVisited(op) && op->impl()->kind_ != -1 && layout_infer_ctx->IsReadyForInfer(op)) { auto next_tensors = layout_inference_impl::HandleLayoutInfer(layout_infer_ctx, op); for (const auto& t : next_tensors) { - tensor_queue.push_back(t); + tensor_queue.push(t); } } } diff --git a/src/tim/transform/ops/op_layout_inference.cc b/src/tim/transform/ops/op_layout_inference.cc index 7275a2873..d3df200d0 100644 --- a/src/tim/transform/ops/op_layout_inference.cc +++ b/src/tim/transform/ops/op_layout_inference.cc @@ -38,16 +38,13 @@ void OpLayoutInfer::OnOutputs( auto graph_outputs = context_->src_graph_->OutputsTensor(); auto op_outputs = op_->impl()->OutputsTensor(); for (const auto& out : op_outputs) { - if (graph_outputs.end() != - std::find(graph_outputs.begin(), graph_outputs.end(), out)) { - context_->UpdateGraphOutputMap(out, context_->GetMapedTensor(out)); + if (graph_outputs.cend() != + std::find(graph_outputs.cbegin(), graph_outputs.cend(), out)) { auto pv = context_->GetPermuteVector(out); if (!pv->IsAligned()) { auto perm_out = InsertPermute(context_->GetMapedTensor(out), pv->Reverse(), true, out); - // Update graph out tensor context_->UpdateTensorMap(out, perm_out); - context_->UpdateGraphOutputMap(out, perm_out); } if (!context_->src_graph_->GetConsumersOp(out).empty()) { // The tensor is output of graph, but it also is the input of other operations @@ -65,19 +62,18 @@ void OpLayoutInfer::OnOutputs( std::shared_ptr OpLayoutInfer::InsertPermute( std::shared_ptr input, std::shared_ptr perm, bool is_graph_output, std::shared_ptr src_out) { - auto out_spec = input->GetSpec(); + std::shared_ptr out_tensor; if (is_graph_output) { - auto out_shape = src_out->GetShape(); - out_spec.SetShape(out_shape); - out_spec.SetAttribute(vx::TensorAttribute::OUTPUT); + out_tensor = context_->GetMappedGraphOutputTensor(src_out); } else { - out_spec.SetAttribute(vx::TensorAttribute::TRANSIENT); - } - if (out_spec.quantization_.Type() == vx::QuantType::SYMMETRIC_PER_CHANNEL) { - out_spec.quantization_.SetChannelDim( - MapAxis(perm->AsStdVec(), out_spec.quantization_.ChannelDim())); + auto out_spec = input->GetSpec().AsTransientSpec(); + if (out_spec.quantization_.Type() == vx::QuantType::SYMMETRIC_PER_CHANNEL) { + out_spec.quantization_.SetChannelDim( + MapAxis(perm->AsStdVec(), out_spec.quantization_.ChannelDim())); + } + out_tensor = context_->infer_graph_->CreateTensor(out_spec); } - auto out_tensor = context_->infer_graph_->CreateTensor(out_spec); + auto perm_op = context_->infer_graph_->CreateOperation( perm->AsStdVec()); (*perm_op).BindInput(input).BindOutput(out_tensor); @@ -88,20 +84,28 @@ std::vector> OpLayoutInfer::CreateOutputsTensor( std::shared_ptr required_pv) { std::vector> outputs_tensor; - if (op_->impl()->OutputsTensor().size() > 1) { + auto op_outputs = op_->impl()->OutputsTensor(); + if (op_outputs.size() > 1) { // todo(sven): potential bug here if node have multi-output and require layout inference std::cout << "warning at " << __FUNCTION__ << ", #" << __LINE__ << std::endl; } - for (const auto& o : op_->impl()->OutputsTensor()) { + for (const auto& o : op_outputs) { auto in_shape = o->GetShape(); auto out_spec = o->GetSpec(); - if (!(required_pv->IsAligned())) { + if (!required_pv->IsAligned()) { out_spec = out_spec.AsTransientSpec(); } - auto t_infer = context_->infer_graph_->CreateTensor(out_spec); - context_->UpdateTensorMap(o, t_infer); + + std::shared_ptr t_infer; + if (out_spec.GetTensorAttribute() == vx::OUTPUT) { + t_infer = context_->GetMapedTensor(o); + } else { + t_infer = context_->infer_graph_->CreateTensor(out_spec); + context_->UpdateTensorMap(o, t_infer); + } + outputs_tensor.push_back(t_infer); } return outputs_tensor; @@ -111,19 +115,26 @@ std::vector> OpLayoutInfer::CreateOutputsTensor( const std::vector>& required_pv) { std::vector> outputs_tensor; - assert(required_pv.size() == (op_->impl()->OutputsTensor().size())); + auto op_outputs = op_->impl()->OutputsTensor(); + assert(required_pv.size() == (op_outputs.size())); - uint32_t i = 0; - for (const auto& o : op_->impl()->OutputsTensor()) { + for (size_t i = 0; i < op_outputs.size(); i++) { + const auto& o = op_outputs[i]; auto in_shape = o->GetShape(); auto out_spec = o->GetSpec(); - if (!(required_pv[i]->IsAligned())) { + if (!required_pv[i]->IsAligned()) { out_spec = out_spec.AsTransientSpec(); } - auto t_infer = context_->infer_graph_->CreateTensor(out_spec); - context_->UpdateTensorMap(o, t_infer); + + std::shared_ptr t_infer; + if (out_spec.GetTensorAttribute() == vx::OUTPUT) { + t_infer = context_->GetMapedTensor(o); + } else { + t_infer = context_->infer_graph_->CreateTensor(out_spec); + context_->UpdateTensorMap(o, t_infer); + } + outputs_tensor.push_back(t_infer); - i++; } return outputs_tensor; } @@ -198,8 +209,8 @@ OpLayoutInfer::AlignPermuteVectorForMutilInputs() { std::vector dataRef(i_src->GetSpec().GetByteSize()); i_src->CopyDataFromTensor(dataRef.data()); context_->UpdateTensorMap( - i_src, context_->infer_graph_->CreateTensor(i_src->GetSpec(), - (const void*)dataRef.data())); + i_src, context_->infer_graph_->CreateTensor( + i_src->GetSpec(), (const void*)dataRef.data())); context_->SetPermuteVector(i_src, MakeShared(i_src->GetShape().size())); } } else { @@ -247,8 +258,8 @@ OpLayoutInfer::AlignPermuteVectorForElementWise() { if (required_pv->IsAligned()) { std::vector dataRef(i_src->GetSpec().GetByteSize()); i_src->CopyDataFromTensor(dataRef.data()); - perm_out = context_->infer_graph_->CreateTensor(i_src->GetSpec(), - (const void*)dataRef.data()); + perm_out = context_->infer_graph_->CreateTensor( + i_src->GetSpec(), (const void*)dataRef.data()); } else if (i_src->GetShape().size() == required_pv->Rank()) { perm_out = PermuteConstTensor(i_src, required_pv); // need shape expansion @@ -280,8 +291,8 @@ void OpLayoutInfer::ReverseInputsPermuteVector() { if (i_src->IsConstTensor()) { std::vector dataRef(i_src->GetSpec().GetByteSize()); i_src->CopyDataFromTensor(dataRef.data()); - perm_out = context_->infer_graph_->CreateTensor(i_src->GetSpec(), - (const void*)dataRef.data()); + perm_out = context_->infer_graph_->CreateTensor( + i_src->GetSpec(), (const void*)dataRef.data()); input_pv = MakeShared(i_src->GetShape().size()); } else { perm_out = context_->GetMapedTensor(i_src);