From 0e81f3a299835d2b519e273ab366d628aaf23e87 Mon Sep 17 00:00:00 2001
From: Goose-Bomb <goose_bomb@outlook.com>
Date: Sat, 9 Oct 2021 23:03:08 +0800
Subject: [PATCH 01/10] Remove unnecessary compiler flags

---
 src/tim/vx/internal/CMakeLists.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/tim/vx/internal/CMakeLists.txt b/src/tim/vx/internal/CMakeLists.txt
index 0e19d53f5..13089c184 100644
--- a/src/tim/vx/internal/CMakeLists.txt
+++ b/src/tim/vx/internal/CMakeLists.txt
@@ -3,7 +3,6 @@ message("src/tim/vx/internal")
 set(lib_name "tim_internal")
 set(OVXLIB_API_ATTR "__attribute__\(\(visibility\(\"default\"\)\)\)")
 add_definitions(-DOVXLIB_API=${OVXLIB_API_ATTR})
-add_compile_options(-Wno-strict-aliasing -Wno-unused-but-set-variable -Wno-maybe-uninitialized)
 
 aux_source_directory(src INTERNAL_SRC)
 aux_source_directory(src/kernel INTERNAL_KERNEL)

From dd06583f86e10f69e21778f2968c123c68b5f47c Mon Sep 17 00:00:00 2001
From: Goose-Bomb <goose_bomb@outlook.com>
Date: Sat, 9 Oct 2021 23:55:49 +0800
Subject: [PATCH 02/10] Refactor CMakeLists.txt

---
 CMakeLists.txt                           | 20 ++++++++++++--------
 samples/CMakeLists.txt                   |  5 ++++-
 samples/benchmark_test/CMakeLists.txt    | 12 +++++-------
 samples/lenet/CMakeLists.txt             | 15 ++++++++-------
 samples/multi_thread_test/CMakeLists.txt | 13 ++++++++-----
 samples/nbg_runner/CMakeLists.txt        | 13 +++++++------
 src/tim/CMakeLists.txt                   | 16 +++++++---------
 src/tim/utils/CMakeLists.txt             |  7 ++-----
 src/tim/utils/nbg_parser/CMakeLists.txt  |  6 ++++++
 src/tim/vx/internal/CMakeLists.txt       |  2 +-
 10 files changed, 60 insertions(+), 49 deletions(-)
 create mode 100644 src/tim/utils/nbg_parser/CMakeLists.txt

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7b75d7361..217cfbd31 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,11 +1,13 @@
 cmake_minimum_required (VERSION 3.14)
 project(tim-vx LANGUAGES C CXX)
 
-OPTION(TIM_VX_ENABLE_TEST               "Build the unit test"                   ON)
-OPTION(TIM_VX_ENABLE_LAYOUT_INFER       "Enable layout inference support"       ON)
-OPTION(TIM_VX_CODE_COVERAGE             "Run code coverage with gconv(gcc only" OFF)
-OPTION(TIM_VX_USE_EXTERNAL_OVXLIB       "Use external OVXLIB"                   OFF)
-OPTION(TIM_VX_ENABLE_NB_PARSER_EXAMPLE  "Demo shows nbg parser usage"           OFF)
+option(BUILD_SHARED_LIBS                "Build using shared libraries"          OFF)
+option(TIM_VX_ENABLE_TEST               "Build the unit test"                   ON)
+option(TIM_VX_ENABLE_LAYOUT_INFER       "Enable layout inference support"       ON)
+option(TIM_VX_ENABLE_NBG_PARSER         "Enable NBG parser"                     OFF)
+option(TIM_VX_CODE_COVERAGE             "Run code coverage with gconv(gcc only" OFF)
+option(TIM_VX_USE_EXTERNAL_OVXLIB       "Use external OVXLIB"                   OFF)
+option(TIM_VX_BUILD_EXAMPLES            "Build demos show general usage"        OFF)
 
 set(CMAKE_CXX_STANDARD 14)
 set(CMAKE_POSITION_INDEPENDENT_CODE ON)
@@ -37,7 +39,6 @@ else()
     elseif("${CONFIG}" STREQUAL "YOCTO")
         include(cmake/YOCTO.cmake)
     else()
-        set(TIM_VX_ENABLE_TEST ON)
         include(cmake/X86_64_linux.cmake)
     endif()
 endif()
@@ -62,5 +63,8 @@ endif()
 
 include_directories(${OVXDRV_INCLUDE_DIRS})
 
-add_subdirectory("src/tim/")
-add_subdirectory("samples")
+add_subdirectory("src/tim")
+
+if(TIM_VX_BUILD_EXAMPLES)
+    add_subdirectory("samples")
+endif()
diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt
index f6cadfcf7..87dfffa78 100644
--- a/samples/CMakeLists.txt
+++ b/samples/CMakeLists.txt
@@ -1,7 +1,10 @@
 add_subdirectory("benchmark_test")
 add_subdirectory("lenet")
-add_subdirectory("nbg_runner")
 
 if(NOT ANDROID_TOOLCHAIN)
     add_subdirectory("multi_thread_test")
 endif()
+
+if(TIM_VX_ENABLE_NBG_PARSER)
+    add_subdirectory("nbg_runner")
+endif()
diff --git a/samples/benchmark_test/CMakeLists.txt b/samples/benchmark_test/CMakeLists.txt
index b600ee72f..5248c6f22 100644
--- a/samples/benchmark_test/CMakeLists.txt
+++ b/samples/benchmark_test/CMakeLists.txt
@@ -1,11 +1,9 @@
-message("benchmark_test")
+message("samples/benchmark_test")
 
 set(TARGET_NAME "benchmark_test")
 
-aux_source_directory(. SRC)
+aux_source_directory(. ${TARGET_NAME}_SRCS)
+add_executable(${TARGET_NAME} ${${TARGET_NAME}_SRCS})
 
-include_directories(${PROJECT_SOURCE_DIR}/include)
-include_directories(./)
-
-add_executable(${TARGET_NAME} ${SRC})
-target_link_libraries(${TARGET_NAME} ${OVXDRV_LIBRARIES}  tim-vx-static)
\ No newline at end of file
+target_link_libraries(${TARGET_NAME} PRIVATE tim-vx)
+target_include_directories(${TARGET_NAME} PRIVATE ${PROJECT_SOURCE_DIR}/include)
\ No newline at end of file
diff --git a/samples/lenet/CMakeLists.txt b/samples/lenet/CMakeLists.txt
index 0def5fc79..d8a170ea1 100644
--- a/samples/lenet/CMakeLists.txt
+++ b/samples/lenet/CMakeLists.txt
@@ -2,10 +2,11 @@ message("samples/lenet")
 
 set(TARGET_NAME "lenet")
 
-aux_source_directory(. SRC)
-
-include_directories(${PROJECT_SOURCE_DIR}/include)
-include_directories(./)
-
-add_executable(${TARGET_NAME} ${SRC})
-target_link_libraries(${TARGET_NAME} ${OVXDRV_LIBRARIES}  tim-vx-static)
\ No newline at end of file
+aux_source_directory(. ${TARGET_NAME}_SRCS)
+add_executable(${TARGET_NAME} ${${TARGET_NAME}_SRCS})
+
+target_link_libraries(${TARGET_NAME} PRIVATE tim-vx)
+target_include_directories(${TARGET_NAME} PRIVATE 
+    ${CMAKE_CURRENT_SOURCE_DIR}
+    ${PROJECT_SOURCE_DIR}/include
+)
\ No newline at end of file
diff --git a/samples/multi_thread_test/CMakeLists.txt b/samples/multi_thread_test/CMakeLists.txt
index 3575a5947..a1c18f1ad 100644
--- a/samples/multi_thread_test/CMakeLists.txt
+++ b/samples/multi_thread_test/CMakeLists.txt
@@ -2,10 +2,13 @@ message("samples/multi_thread_test")
 
 set(TARGET_NAME "multi_thread_test")
 
-aux_source_directory(. SRC)
+find_package(Threads REQUIRED)
 
-include_directories(${PROJECT_SOURCE_DIR}/include)
-include_directories(./)
+aux_source_directory(. ${TARGET_NAME}_SRCS)
+add_executable(${TARGET_NAME} ${${TARGET_NAME}_SRCS})
 
-add_executable(${TARGET_NAME} ${SRC})
-target_link_libraries(${TARGET_NAME} ${OVXDRV_LIBRARIES}  tim-vx-static pthread)
\ No newline at end of file
+target_link_libraries(${TARGET_NAME} PRIVATE tim-vx Threads::Threads)
+target_include_directories(${TARGET_NAME} PRIVATE 
+    ${CMAKE_CURRENT_SOURCE_DIR}
+    ${PROJECT_SOURCE_DIR}/include
+)
\ No newline at end of file
diff --git a/samples/nbg_runner/CMakeLists.txt b/samples/nbg_runner/CMakeLists.txt
index 0cb323536..32b918eba 100644
--- a/samples/nbg_runner/CMakeLists.txt
+++ b/samples/nbg_runner/CMakeLists.txt
@@ -1,8 +1,9 @@
-if(TIM_VX_ENABLE_NB_PARSER_EXAMPLE)
+message("samples/nbg_runner")
 
-include_directories(${PROJECT_SOURCE_DIR}/include)
-aux_source_directory(. nbg_runner_src)
-add_executable(nbg_runner ${nbg_runner_src})
-target_link_libraries(nbg_runner tim-vx-static nbg_parser)
+set(TARGET_NAME "nbg_runner")
 
-endif()
+aux_source_directory(. ${TARGET_NAME}_SRCS)
+add_executable(${TARGET_NAME} ${${TARGET_NAME}_SRCS})
+
+target_link_libraries(${TARGET_NAME} PRIVATE tim-vx nbg_parser)
+target_include_directories(${TARGET_NAME} PRIVATE ${PROJECT_SOURCE_DIR}/include)
\ No newline at end of file
diff --git a/src/tim/CMakeLists.txt b/src/tim/CMakeLists.txt
index ab99ed3c1..6ecb02a6c 100644
--- a/src/tim/CMakeLists.txt
+++ b/src/tim/CMakeLists.txt
@@ -55,28 +55,26 @@ foreach(src_file ${SRC})
     endif()
 endforeach()
 
-add_library(${TARGET_NAME} SHARED ${SRC})
+add_library(${TARGET_NAME} ${SRC})
 target_link_libraries(${TARGET_NAME} PRIVATE
     -Wl,--whole-archive tim_internal -Wl,--no-whole-archive)
 
-add_library(${TARGET_NAME}-static STATIC ${SRC})
-target_link_libraries(${TARGET_NAME}-static PRIVATE
-	-Wl,--whole-archive tim_internal -Wl,--no-whole-archive)
-
-install(TARGETS ${TARGET_NAME} ${TARGET_NAME}-static
+install(TARGETS ${TARGET_NAME} ${TARGET_NAME}
         DESTINATION ${CMAKE_INSTALL_PREFIX}/lib)
 
-install(DIRECTORY ${CMAKE_SOURCE_DIR}/include/tim/vx DESTINATION ${CMAKE_INSTALL_PREFIX}/include/tim/)
+install(DIRECTORY ${CMAKE_SOURCE_DIR}/include/tim/vx 
+        DESTINATION ${CMAKE_INSTALL_PREFIX}/include/tim)
 
 if(TIM_VX_ENABLE_LAYOUT_INFER)
-    install(DIRECTORY ${CMAKE_SOURCE_DIR}/include/tim/transform DESTINATION ${CMAKE_INSTALL_PREFIX}/include/tim/)
+    install(DIRECTORY ${CMAKE_SOURCE_DIR}/include/tim/transform 
+            DESTINATION ${CMAKE_INSTALL_PREFIX}/include/tim)
 endif()
 
 if (TIM_VX_ENABLE_TEST)
     include(GoogleTest)
 
     add_executable(unit_test ${UT_SRC})
-    target_link_libraries(unit_test gtest gtest_main gmock gmock_main ${TARGET_NAME}-static)
+    target_link_libraries(unit_test gtest gtest_main gmock gmock_main ${TARGET_NAME})
 
     install(TARGETS unit_test DESTINATION ${CMAKE_INSTALL_PREFIX}/bin/)
 endif()
diff --git a/src/tim/utils/CMakeLists.txt b/src/tim/utils/CMakeLists.txt
index ac8c3aba8..f6353b9eb 100644
--- a/src/tim/utils/CMakeLists.txt
+++ b/src/tim/utils/CMakeLists.txt
@@ -1,6 +1,3 @@
-if(TIM_VX_ENABLE_NB_PARSER_EXAMPLE)
-
-aux_source_directory(./nbg_parser nbg_parser_src)
-add_library(nbg_parser STATIC ${nbg_parser_src})
-
+if(TIM_VX_ENABLE_NBG_PARSER)
+    add_subdirectory("nbg_parser")
 endif()
\ No newline at end of file
diff --git a/src/tim/utils/nbg_parser/CMakeLists.txt b/src/tim/utils/nbg_parser/CMakeLists.txt
new file mode 100644
index 000000000..7f14c0baa
--- /dev/null
+++ b/src/tim/utils/nbg_parser/CMakeLists.txt
@@ -0,0 +1,6 @@
+message("src/tim/vx/utils/nbg_parser")
+
+set(TARGET_NAME "nbg_parser")
+
+aux_source_directory(. ${TARGET_NAME}_SRCS)
+add_library(${TARGET_NAME} STATIC ${${TARGET_NAME}_SRCS})
\ No newline at end of file
diff --git a/src/tim/vx/internal/CMakeLists.txt b/src/tim/vx/internal/CMakeLists.txt
index 13089c184..1c538e0f0 100644
--- a/src/tim/vx/internal/CMakeLists.txt
+++ b/src/tim/vx/internal/CMakeLists.txt
@@ -36,5 +36,5 @@ list(APPEND SRC
 include_directories(include)
 include_directories(${OVXDRV_INCLUDE_DIRS})
 
-add_library(${lib_name} ${SRC})
+add_library(${lib_name} STATIC ${SRC})
 target_link_libraries(${lib_name} PRIVATE ${OVXDRV_LIBRARIES})

From 932592f81b0ae9092785d2da6b3fbccf7bffb82c Mon Sep 17 00:00:00 2001
From: Goose-Bomb <goose_bomb@outlook.com>
Date: Sun, 10 Oct 2021 00:04:05 +0800
Subject: [PATCH 03/10] Tweak CMakeLists.txt for libtim_internal

---
 src/tim/vx/internal/CMakeLists.txt | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/tim/vx/internal/CMakeLists.txt b/src/tim/vx/internal/CMakeLists.txt
index 1c538e0f0..a707d6583 100644
--- a/src/tim/vx/internal/CMakeLists.txt
+++ b/src/tim/vx/internal/CMakeLists.txt
@@ -1,6 +1,6 @@
 message("src/tim/vx/internal")
 
-set(lib_name "tim_internal")
+set(TARGET_NAME "tim_internal")
 set(OVXLIB_API_ATTR "__attribute__\(\(visibility\(\"default\"\)\)\)")
 add_definitions(-DOVXLIB_API=${OVXLIB_API_ATTR})
 
@@ -17,8 +17,8 @@ aux_source_directory(src/custom/ops INTERNAL_CUSTOM_OPS)
 aux_source_directory(src/custom/ops/kernel INTERNAL_CUSTOM_OPS_KERNEL)
 aux_source_directory(src/utils INTERNAL_UTILS)
 
-set(SRC)
-list(APPEND SRC
+set(${TARGET_NAME}_SRCS)
+list(APPEND ${TARGET_NAME}_SRCS
     ${INTERNAL_SRC}
     ${INTERNAL_KERNEL}
     ${INTERNAL_KERNEL_CL}
@@ -33,8 +33,9 @@ list(APPEND SRC
     ${INTERNAL_UTILS}
 )
 
-include_directories(include)
-include_directories(${OVXDRV_INCLUDE_DIRS})
-
-add_library(${lib_name} STATIC ${SRC})
-target_link_libraries(${lib_name} PRIVATE ${OVXDRV_LIBRARIES})
+add_library(${TARGET_NAME} STATIC ${${TARGET_NAME}_SRCS})
+target_link_libraries(${TARGET_NAME} PRIVATE ${OVXDRV_LIBRARIES})
+target_include_directories(${TARGET_NAME} PRIVATE
+    ${CMAKE_CURRENT_SOURCE_DIR}/include
+    ${OVXDRV_INCLUDE_DIRS}
+)

From 3f44c73d84595b6cf032bd8edbd961205a83363f Mon Sep 17 00:00:00 2001
From: Goose-Bomb <goose_bomb@outlook.com>
Date: Sun, 10 Oct 2021 00:17:45 +0800
Subject: [PATCH 04/10] Tweak CMakeLists.txt for libtim-vx

---
 src/tim/CMakeLists.txt | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/tim/CMakeLists.txt b/src/tim/CMakeLists.txt
index 6ecb02a6c..8e38a6225 100644
--- a/src/tim/CMakeLists.txt
+++ b/src/tim/CMakeLists.txt
@@ -15,11 +15,11 @@ endif()
 aux_source_directory(./vx VX_SRC)
 aux_source_directory(./vx/ops OPS_SRC)
 
-set(SRC)
-list(APPEND SRC
+set(${TARGET_NAME}_SRCS)
+list(APPEND ${TARGET_NAME}_SRCS
     ${VX_SRC}
     ${OPS_SRC}
-    )
+)
 
 include_directories(${PROJECT_SOURCE_DIR}/include)
 include_directories(${PROJECT_SOURCE_DIR}/include/tim/vx)
@@ -42,20 +42,20 @@ if(TIM_VX_ENABLE_LAYOUT_INFER)
     aux_source_directory(./transform LAYOUT_INFER_FRAMEWORK_SRCS)
     aux_source_directory(./transform/ops LAYOUT_INFER_OP_SRCS)
 
-    list(APPEND SRC
+    list(APPEND ${TARGET_NAME}_SRCS
         ${LAYOUT_INFER_FRAMEWORK_SRCS}
         ${LAYOUT_INFER_OP_SRCS}
     )
 endif()
 
-foreach(src_file ${SRC})
+foreach(src_file ${${TARGET_NAME}_SRCS})
     if(${src_file} MATCHES ".*_test\.cc")
-        list(REMOVE_ITEM SRC ${src_file})
-        list(APPEND UT_SRC ${src_file})
+        list(REMOVE_ITEM ${TARGET_NAME}_SRCS ${src_file})
+        list(APPEND ${TARGET_NAME}_TEST_SRCS ${src_file})
     endif()
 endforeach()
 
-add_library(${TARGET_NAME} ${SRC})
+add_library(${TARGET_NAME} ${${TARGET_NAME}_SRCS})
 target_link_libraries(${TARGET_NAME} PRIVATE
     -Wl,--whole-archive tim_internal -Wl,--no-whole-archive)
 
@@ -70,10 +70,10 @@ if(TIM_VX_ENABLE_LAYOUT_INFER)
             DESTINATION ${CMAKE_INSTALL_PREFIX}/include/tim)
 endif()
 
-if (TIM_VX_ENABLE_TEST)
+if(TIM_VX_ENABLE_TEST)
     include(GoogleTest)
 
-    add_executable(unit_test ${UT_SRC})
+    add_executable(unit_test ${${TARGET_NAME}_TEST_SRCS})
     target_link_libraries(unit_test gtest gtest_main gmock gmock_main ${TARGET_NAME})
 
     install(TARGETS unit_test DESTINATION ${CMAKE_INSTALL_PREFIX}/bin/)

From f2c29c8a3fea0ded988f7dc7aa3c361b067d664b Mon Sep 17 00:00:00 2001
From: Goose-Bomb <goose_bomb@outlook.com>
Date: Sun, 10 Oct 2021 00:19:48 +0800
Subject: [PATCH 05/10] Make TIM_VX_ENABLE_TEST defaults to OFF

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 217cfbd31..3aed4aff4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,7 +2,7 @@ cmake_minimum_required (VERSION 3.14)
 project(tim-vx LANGUAGES C CXX)
 
 option(BUILD_SHARED_LIBS                "Build using shared libraries"          OFF)
-option(TIM_VX_ENABLE_TEST               "Build the unit test"                   ON)
+option(TIM_VX_ENABLE_TEST               "Build the unit test"                   OFF)
 option(TIM_VX_ENABLE_LAYOUT_INFER       "Enable layout inference support"       ON)
 option(TIM_VX_ENABLE_NBG_PARSER         "Enable NBG parser"                     OFF)
 option(TIM_VX_CODE_COVERAGE             "Run code coverage with gconv(gcc only" OFF)

From 34634298c20301e14794fc54c5df09c48bf75da1 Mon Sep 17 00:00:00 2001
From: Goose-Bomb <goose_bomb@outlook.com>
Date: Sun, 10 Oct 2021 01:05:11 +0800
Subject: [PATCH 06/10] Eliminate usage of include_directories

---
 CMakeLists.txt                                |  2 --
 src/tim/CMakeLists.txt                        | 22 +++++++++++--------
 .../ops/activation_layout_inference.h         |  6 ++---
 src/tim/transform/ops/addn_layout_inference.h |  4 ++--
 src/tim/transform/ops/arg_layout_inference.h  |  4 ++--
 .../ops/batch2space_layout_inference.h        |  6 ++---
 .../transform/ops/concat_layout_inferene.h    |  6 ++---
 .../transform/ops/conv2d_layout_inference.h   |  6 ++---
 .../transform/ops/deconv2d_layout_inference.h |  6 ++---
 .../transform/ops/default_layout_inference.h  |  6 ++---
 .../ops/depth2space_layout_inference.h        |  6 ++---
 .../ops/elementwise_layout_inference.h        |  6 ++---
 .../ops/fullyconnected_layout_inference.h     |  6 ++---
 .../transform/ops/gather_layout_inference.h   |  4 ++--
 .../ops/gather_nd_layout_inference.h          |  4 ++--
 .../ops/l2normalization_layout_inference.h    |  4 ++--
 .../transform/ops/logical_layout_inference.h  |  4 ++--
 src/tim/transform/ops/lrn_layout_inference.h  |  4 ++--
 src/tim/transform/ops/op_layout_inference.cc  |  6 ++---
 src/tim/transform/ops/pad_layout_inference.h  |  6 ++---
 .../transform/ops/pool2d_layout_inference.h   |  6 ++---
 .../transform/ops/reduce_layout_inference.h   |  6 ++---
 .../transform/ops/resize_layout_inference.h   |  6 ++---
 .../transform/ops/reverse_layout_inference.h  |  4 ++--
 .../transform/ops/select_layout_inference.h   |  4 ++--
 .../ops/simple_ops_layout_inference.h         |  6 ++---
 .../transform/ops/slice_layout_inference.h    |  4 ++--
 .../transform/ops/softmax_layout_inference.h  |  6 ++---
 .../ops/space2batch_layout_inference.h        |  6 ++---
 .../ops/space2depth_layout_inference.h        |  6 ++---
 .../transform/ops/split_layout_inference.h    |  6 ++---
 .../transform/ops/squeeze_layout_inference.h  |  6 ++---
 .../transform/ops/stack_layout_inference.h    |  6 ++---
 .../ops/stridedslice_layout_inference.h       |  6 ++---
 src/tim/utils/nbg_parser/CMakeLists.txt       |  5 ++++-
 src/tim/vx/ops/activations_test.cc            |  2 +-
 src/tim/vx/ops/addn_test.cc                   |  2 +-
 src/tim/vx/ops/avg_pool_test.cc               |  2 +-
 src/tim/vx/ops/conv1d_test.cc                 |  2 +-
 src/tim/vx/ops/conv2d_test.cc                 |  2 +-
 src/tim/vx/ops/depthwiseConv_test.cc          |  2 +-
 src/tim/vx/ops/groupedconv2d_test.cc          |  2 +-
 src/tim/vx/ops/instancenormalization_test.cc  |  2 +-
 src/tim/vx/ops/layernormalization_test.cc     |  2 +-
 src/tim/vx/ops/logsoftmax_test.cc             |  2 +-
 src/tim/vx/ops/matmul_test.cc                 |  2 +-
 src/tim/vx/ops/moments_test.cc                |  2 +-
 src/tim/vx/ops/resize1d_test.cc               |  2 +-
 src/tim/vx/ops/shuffle_channel_test.cc        |  2 +-
 src/tim/vx/ops/transposeConv_test.cc          |  2 +-
 50 files changed, 118 insertions(+), 113 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3aed4aff4..ebe671cb3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -61,8 +61,6 @@ if(TIM_VX_ENABLE_TEST)
     endif()
 endif()
 
-include_directories(${OVXDRV_INCLUDE_DIRS})
-
 add_subdirectory("src/tim")
 
 if(TIM_VX_BUILD_EXAMPLES)
diff --git a/src/tim/CMakeLists.txt b/src/tim/CMakeLists.txt
index 8e38a6225..d42c26788 100644
--- a/src/tim/CMakeLists.txt
+++ b/src/tim/CMakeLists.txt
@@ -21,10 +21,6 @@ list(APPEND ${TARGET_NAME}_SRCS
     ${OPS_SRC}
 )
 
-include_directories(${PROJECT_SOURCE_DIR}/include)
-include_directories(${PROJECT_SOURCE_DIR}/include/tim/vx)
-include_directories(${PROJECT_SOURCE_DIR}/src/tim/vx)
-
 if(${TIM_VX_USE_EXTERNAL_OVXLIB})
     if(NOT OVXLIB_INC)
         message(FATAL_ERROR "Set OVXLIB_INC if using external OVXLIB (TIM_VX_USE_EXTERNAL_OVXLIB)")
@@ -34,11 +30,8 @@ else()
     set(OVXLIB_INCLUDE_DIR "${PROJECT_SOURCE_DIR}/src/tim/vx/internal/include")
 endif()
 message(STATUS "OVXLIB include directory: ${OVXLIB_INCLUDE_DIR}")
-include_directories(${OVXLIB_INCLUDE_DIR})
 
 if(TIM_VX_ENABLE_LAYOUT_INFER)
-    include_directories(${PROJECT_SOURCE_DIR}/)
-
     aux_source_directory(./transform LAYOUT_INFER_FRAMEWORK_SRCS)
     aux_source_directory(./transform/ops LAYOUT_INFER_OP_SRCS)
 
@@ -58,6 +51,13 @@ endforeach()
 add_library(${TARGET_NAME} ${${TARGET_NAME}_SRCS})
 target_link_libraries(${TARGET_NAME} PRIVATE
     -Wl,--whole-archive tim_internal -Wl,--no-whole-archive)
+target_include_directories(${TARGET_NAME} PRIVATE
+    ${PROJECT_SOURCE_DIR}/include
+    ${CMAKE_CURRENT_SOURCE_DIR}/vx
+    ${CMAKE_CURRENT_SOURCE_DIR}/transform
+    ${OVXLIB_INCLUDE_DIR}
+    ${OVXDRV_INCLUDE_DIRS}
+)
 
 install(TARGETS ${TARGET_NAME} ${TARGET_NAME}
         DESTINATION ${CMAKE_INSTALL_PREFIX}/lib)
@@ -74,9 +74,13 @@ if(TIM_VX_ENABLE_TEST)
     include(GoogleTest)
 
     add_executable(unit_test ${${TARGET_NAME}_TEST_SRCS})
-    target_link_libraries(unit_test gtest gtest_main gmock gmock_main ${TARGET_NAME})
+    target_link_libraries(unit_test PRIVATE gtest gtest_main gmock gmock_main ${TARGET_NAME})
+    target_include_directories(unit_test PRIVATE
+        ${PROJECT_SOURCE_DIR}/include
+        ${CMAKE_CURRENT_SOURCE_DIR}/vx
+    )
 
-    install(TARGETS unit_test DESTINATION ${CMAKE_INSTALL_PREFIX}/bin/)
+    install(TARGETS unit_test DESTINATION ${CMAKE_INSTALL_PREFIX}/bin)
 endif()
 
 add_subdirectory("utils")
diff --git a/src/tim/transform/ops/activation_layout_inference.h b/src/tim/transform/ops/activation_layout_inference.h
index a8d06a38c..c1e22fb3e 100644
--- a/src/tim/transform/ops/activation_layout_inference.h
+++ b/src/tim/transform/ops/activation_layout_inference.h
@@ -26,9 +26,9 @@
 
 #include "tim/vx/ops/activations.h"
 
-#include "src/tim/transform/ops/op_layout_inference.h"
-#include "src/tim/transform/permute_vector.h"
-#include "src/tim/vx/operation_private.h"
+#include "ops/op_layout_inference.h"
+#include "permute_vector.h"
+#include "operation_private.h"
 
 namespace tim {
 namespace transform {
diff --git a/src/tim/transform/ops/addn_layout_inference.h b/src/tim/transform/ops/addn_layout_inference.h
index 1ace58154..51d2af389 100644
--- a/src/tim/transform/ops/addn_layout_inference.h
+++ b/src/tim/transform/ops/addn_layout_inference.h
@@ -24,8 +24,8 @@
 #ifndef TIM_LAYOUT_INFER_ADDN_LAYOUT_INFERENCE_H_
 #define TIM_LAYOUT_INFER_ADDN_LAYOUT_INFERENCE_H_
 
-#include "src/tim/transform/ops/op_layout_inference.h"
-#include "src/tim/vx/operation_private.h"
+#include "ops/op_layout_inference.h"
+#include "operation_private.h"
 #include "tim/vx/ops/addn.h"
 
 namespace tim {
diff --git a/src/tim/transform/ops/arg_layout_inference.h b/src/tim/transform/ops/arg_layout_inference.h
index 3c69f37f2..18138d807 100644
--- a/src/tim/transform/ops/arg_layout_inference.h
+++ b/src/tim/transform/ops/arg_layout_inference.h
@@ -24,8 +24,8 @@
 #ifndef TIM_LAYOUT_INFER_ARG_OPS_LAYOUT_INFERENCE_H_
 #define TIM_LAYOUT_INFER_ARG_OPS_LAYOUT_INFERENCE_H_
 
-#include "src/tim/transform/ops/op_layout_inference.h"
-#include "src/tim/vx/operation_private.h"
+#include "ops/op_layout_inference.h"
+#include "operation_private.h"
 #include "tim/vx/ops/arg.h"
 namespace tim {
 namespace transform {
diff --git a/src/tim/transform/ops/batch2space_layout_inference.h b/src/tim/transform/ops/batch2space_layout_inference.h
index 5ae1b354c..876b88500 100644
--- a/src/tim/transform/ops/batch2space_layout_inference.h
+++ b/src/tim/transform/ops/batch2space_layout_inference.h
@@ -26,9 +26,9 @@
 
 #include "tim/vx/ops/batch2space.h"
 
-#include "src/tim/transform/ops/op_layout_inference.h"
-#include "src/tim/transform/permute_vector.h"
-#include "src/tim/vx/operation_private.h"
+#include "ops/op_layout_inference.h"
+#include "permute_vector.h"
+#include "operation_private.h"
 namespace tim {
 namespace transform {
 class Batch2SpaceLayoutInfer : public OpLayoutInfer {
diff --git a/src/tim/transform/ops/concat_layout_inferene.h b/src/tim/transform/ops/concat_layout_inferene.h
index bdc73b1b3..9643ddf70 100644
--- a/src/tim/transform/ops/concat_layout_inferene.h
+++ b/src/tim/transform/ops/concat_layout_inferene.h
@@ -26,9 +26,9 @@
 
 #include "tim/vx/ops/concat.h"
 
-#include "src/tim/transform/ops/op_layout_inference.h"
-#include "src/tim/transform/permute_vector.h"
-#include "src/tim/vx/operation_private.h"
+#include "ops/op_layout_inference.h"
+#include "permute_vector.h"
+#include "operation_private.h"
 
 namespace tim {
 namespace transform {
diff --git a/src/tim/transform/ops/conv2d_layout_inference.h b/src/tim/transform/ops/conv2d_layout_inference.h
index 87b753aec..b24cdc510 100644
--- a/src/tim/transform/ops/conv2d_layout_inference.h
+++ b/src/tim/transform/ops/conv2d_layout_inference.h
@@ -26,9 +26,9 @@
 
 #include "tim/vx/ops/conv2d.h"
 
-#include "src/tim/vx/operation_private.h"
-#include "src/tim/transform/permute_vector.h"
-#include "src/tim/transform/ops/op_layout_inference.h"
+#include "operation_private.h"
+#include "permute_vector.h"
+#include "ops/op_layout_inference.h"
 
 namespace tim {
 namespace transform {
diff --git a/src/tim/transform/ops/deconv2d_layout_inference.h b/src/tim/transform/ops/deconv2d_layout_inference.h
index cdf9068ab..b91be4161 100644
--- a/src/tim/transform/ops/deconv2d_layout_inference.h
+++ b/src/tim/transform/ops/deconv2d_layout_inference.h
@@ -24,9 +24,9 @@
 #ifndef TIM_LAYOUT_INFER_DECONV2D_LAYOUT_INFERENCE_H_
 #define TIM_LAYOUT_INFER_DECONV2D_LAYOUT_INFERENCE_H_
 
-#include "src/tim/transform/ops/op_layout_inference.h"
-#include "src/tim/transform/permute_vector.h"
-#include "src/tim/vx/operation_private.h"
+#include "ops/op_layout_inference.h"
+#include "permute_vector.h"
+#include "operation_private.h"
 #include "tim/vx/ops/deconv.h"
 
 namespace tim {
diff --git a/src/tim/transform/ops/default_layout_inference.h b/src/tim/transform/ops/default_layout_inference.h
index fd8bb6e2f..9f54c4b83 100644
--- a/src/tim/transform/ops/default_layout_inference.h
+++ b/src/tim/transform/ops/default_layout_inference.h
@@ -31,9 +31,9 @@
 #include "tim/vx/ops/clip.h"
 
 
-#include "src/tim/transform/ops/op_layout_inference.h"
-#include "src/tim/transform/permute_vector.h"
-#include "src/tim/vx/operation_private.h"
+#include "ops/op_layout_inference.h"
+#include "permute_vector.h"
+#include "operation_private.h"
 
 namespace tim {
 namespace transform {
diff --git a/src/tim/transform/ops/depth2space_layout_inference.h b/src/tim/transform/ops/depth2space_layout_inference.h
index 7b92fb732..84ec4d55b 100644
--- a/src/tim/transform/ops/depth2space_layout_inference.h
+++ b/src/tim/transform/ops/depth2space_layout_inference.h
@@ -26,9 +26,9 @@
 
 #include "tim/vx/ops/depth2space.h"
 
-#include "src/tim/transform/ops/op_layout_inference.h"
-#include "src/tim/transform/permute_vector.h"
-#include "src/tim/vx/operation_private.h"
+#include "ops/op_layout_inference.h"
+#include "permute_vector.h"
+#include "operation_private.h"
 
 namespace tim {
 namespace transform {
diff --git a/src/tim/transform/ops/elementwise_layout_inference.h b/src/tim/transform/ops/elementwise_layout_inference.h
index 43821bac3..ab6b6ff7b 100644
--- a/src/tim/transform/ops/elementwise_layout_inference.h
+++ b/src/tim/transform/ops/elementwise_layout_inference.h
@@ -26,9 +26,9 @@
 
 #include "tim/vx/ops/elementwise.h"
 
-#include "src/tim/transform/ops/op_layout_inference.h"
-#include "src/tim/transform/permute_vector.h"
-#include "src/tim/vx/operation_private.h"
+#include "ops/op_layout_inference.h"
+#include "permute_vector.h"
+#include "operation_private.h"
 
 namespace tim {
 namespace transform {
diff --git a/src/tim/transform/ops/fullyconnected_layout_inference.h b/src/tim/transform/ops/fullyconnected_layout_inference.h
index 3708dfbdc..a49febc6e 100644
--- a/src/tim/transform/ops/fullyconnected_layout_inference.h
+++ b/src/tim/transform/ops/fullyconnected_layout_inference.h
@@ -26,9 +26,9 @@
 
 #include "tim/vx/ops/fullyconnected.h"
 
-#include "src/tim/transform/ops/op_layout_inference.h"
-#include "src/tim/transform/permute_vector.h"
-#include "src/tim/vx/operation_private.h"
+#include "ops/op_layout_inference.h"
+#include "permute_vector.h"
+#include "operation_private.h"
 
 namespace tim {
 namespace transform {
diff --git a/src/tim/transform/ops/gather_layout_inference.h b/src/tim/transform/ops/gather_layout_inference.h
index 429458840..72b145a99 100644
--- a/src/tim/transform/ops/gather_layout_inference.h
+++ b/src/tim/transform/ops/gather_layout_inference.h
@@ -24,8 +24,8 @@
 #ifndef TIM_LAYOUT_INFER_GATHER_LAYOUT_INFERENCE_H_
 #define TIM_LAYOUT_INFER_GATHER_LAYOUT_INFERENCE_H_
 
-#include "src/tim/transform/ops/op_layout_inference.h"
-#include "src/tim/vx/operation_private.h"
+#include "ops/op_layout_inference.h"
+#include "operation_private.h"
 #include "tim/vx/ops/gather.h"
 
 namespace tim {
diff --git a/src/tim/transform/ops/gather_nd_layout_inference.h b/src/tim/transform/ops/gather_nd_layout_inference.h
index c70ec174d..9c8fc2399 100644
--- a/src/tim/transform/ops/gather_nd_layout_inference.h
+++ b/src/tim/transform/ops/gather_nd_layout_inference.h
@@ -24,8 +24,8 @@
 #ifndef TIM_LAYOUT_INFER_GATHER_ND_LAYOUT_INFERENCE_H_
 #define TIM_LAYOUT_INFER_GATHER_ND_LAYOUT_INFERENCE_H_
 
-#include "src/tim/transform/ops/op_layout_inference.h"
-#include "src/tim/vx/operation_private.h"
+#include "ops/op_layout_inference.h"
+#include "operation_private.h"
 #include "tim/vx/ops/gathernd.h"
 
 namespace tim {
diff --git a/src/tim/transform/ops/l2normalization_layout_inference.h b/src/tim/transform/ops/l2normalization_layout_inference.h
index a9c5f6e18..027cecc49 100644
--- a/src/tim/transform/ops/l2normalization_layout_inference.h
+++ b/src/tim/transform/ops/l2normalization_layout_inference.h
@@ -24,8 +24,8 @@
 #ifndef TIM_LAYOUT_INFER_L2_NORMALIZATION_LAYOUT_INFERENCE_H_
 #define TIM_LAYOUT_INFER_L2_NORMALIZATION_LAYOUT_INFERENCE_H_
 
-#include "src/tim/transform/ops/op_layout_inference.h"
-#include "src/tim/vx/operation_private.h"
+#include "ops/op_layout_inference.h"
+#include "operation_private.h"
 #include "tim/vx/ops/l2normalization.h"
 
 namespace tim {
diff --git a/src/tim/transform/ops/logical_layout_inference.h b/src/tim/transform/ops/logical_layout_inference.h
index f9885c108..848f0eb36 100644
--- a/src/tim/transform/ops/logical_layout_inference.h
+++ b/src/tim/transform/ops/logical_layout_inference.h
@@ -24,8 +24,8 @@
 #ifndef TIM_LAYOUT_INFER_LOGICAL_OPS_LAYOUT_INFERENCE_H_
 #define TIM_LAYOUT_INFER_LOGICAL_OPS_LAYOUT_INFERENCE_H_
 
-#include "src/tim/transform/ops/op_layout_inference.h"
-#include "src/tim/vx/operation_private.h"
+#include "ops/op_layout_inference.h"
+#include "operation_private.h"
 #include "tim/vx/ops/logical.h"
 
 namespace tim {
diff --git a/src/tim/transform/ops/lrn_layout_inference.h b/src/tim/transform/ops/lrn_layout_inference.h
index c3cb2f8bd..c541007ff 100644
--- a/src/tim/transform/ops/lrn_layout_inference.h
+++ b/src/tim/transform/ops/lrn_layout_inference.h
@@ -26,8 +26,8 @@
 
 #include "tim/vx/ops/localresponsenormalization.h"
 
-#include "src/tim/transform/ops/op_layout_inference.h"
-#include "src/tim/vx/operation_private.h"
+#include "ops/op_layout_inference.h"
+#include "operation_private.h"
 
 namespace tim {
 namespace transform {
diff --git a/src/tim/transform/ops/op_layout_inference.cc b/src/tim/transform/ops/op_layout_inference.cc
index 3a8ee10f4..545729034 100644
--- a/src/tim/transform/ops/op_layout_inference.cc
+++ b/src/tim/transform/ops/op_layout_inference.cc
@@ -23,10 +23,10 @@
  *****************************************************************************/
 
 #include "op_layout_inference.h"
-#include "src/tim/transform/permute_vector.h"
-#include "src/tim/vx/operation_private.h"
+#include "permute_vector.h"
+#include "operation_private.h"
 #include "tim/vx/ops/transpose.h"
-#include "src/tim/vx/type_utils.h"
+#include "type_utils.h"
 
 #include <algorithm>
 #include <vector>
diff --git a/src/tim/transform/ops/pad_layout_inference.h b/src/tim/transform/ops/pad_layout_inference.h
index 338860435..607927ce3 100644
--- a/src/tim/transform/ops/pad_layout_inference.h
+++ b/src/tim/transform/ops/pad_layout_inference.h
@@ -26,9 +26,9 @@
 
 #include "tim/vx/ops/pad.h"
 
-#include "src/tim/transform/ops/op_layout_inference.h"
-#include "src/tim/transform/permute_vector.h"
-#include "src/tim/vx/operation_private.h"
+#include "ops/op_layout_inference.h"
+#include "permute_vector.h"
+#include "operation_private.h"
 namespace tim {
 namespace transform {
 class PadLayoutInfer : public OpLayoutInfer {
diff --git a/src/tim/transform/ops/pool2d_layout_inference.h b/src/tim/transform/ops/pool2d_layout_inference.h
index aea557c06..9954a7e5f 100644
--- a/src/tim/transform/ops/pool2d_layout_inference.h
+++ b/src/tim/transform/ops/pool2d_layout_inference.h
@@ -24,9 +24,9 @@
 #ifndef TIM_LAYOUT_INFER_POOL2D_LAYOUT_INFERENCE_H_
 #define TIM_LAYOUT_INFER_POOL2D_LAYOUT_INFERENCE_H_
 
-#include "src/tim/transform/ops/op_layout_inference.h"
-#include "src/tim/transform/permute_vector.h"
-#include "src/tim/vx/operation_private.h"
+#include "ops/op_layout_inference.h"
+#include "permute_vector.h"
+#include "operation_private.h"
 #include "tim/vx/ops/pool2d.h"
 
 namespace tim {
diff --git a/src/tim/transform/ops/reduce_layout_inference.h b/src/tim/transform/ops/reduce_layout_inference.h
index e773c8f36..e8ec67635 100644
--- a/src/tim/transform/ops/reduce_layout_inference.h
+++ b/src/tim/transform/ops/reduce_layout_inference.h
@@ -28,9 +28,9 @@
 
 #include <set>
 
-#include "src/tim/transform/ops/op_layout_inference.h"
-#include "src/tim/transform/permute_vector.h"
-#include "src/tim/vx/operation_private.h"
+#include "ops/op_layout_inference.h"
+#include "permute_vector.h"
+#include "operation_private.h"
 
 namespace tim {
 namespace transform {
diff --git a/src/tim/transform/ops/resize_layout_inference.h b/src/tim/transform/ops/resize_layout_inference.h
index ca71cec08..465351336 100644
--- a/src/tim/transform/ops/resize_layout_inference.h
+++ b/src/tim/transform/ops/resize_layout_inference.h
@@ -26,9 +26,9 @@
 
 #include "tim/vx/ops/resize.h"
 
-#include "src/tim/transform/ops/op_layout_inference.h"
-#include "src/tim/transform/permute_vector.h"
-#include "src/tim/vx/operation_private.h"
+#include "ops/op_layout_inference.h"
+#include "permute_vector.h"
+#include "operation_private.h"
 namespace tim {
 namespace transform {
 class ResizeLayoutInfer : public OpLayoutInfer {
diff --git a/src/tim/transform/ops/reverse_layout_inference.h b/src/tim/transform/ops/reverse_layout_inference.h
index abfc412df..f999e27a1 100644
--- a/src/tim/transform/ops/reverse_layout_inference.h
+++ b/src/tim/transform/ops/reverse_layout_inference.h
@@ -24,8 +24,8 @@
 #ifndef TIM_LAYOUT_INFER_REVERSE_LAYOUT_INFERENCE_H_
 #define TIM_LAYOUT_INFER_REVERSE_LAYOUT_INFERENCE_H_
 
-#include "src/tim/transform/ops/op_layout_inference.h"
-#include "src/tim/vx/operation_private.h"
+#include "ops/op_layout_inference.h"
+#include "operation_private.h"
 #include "tim/vx/ops/reverse.h"
 
 namespace tim {
diff --git a/src/tim/transform/ops/select_layout_inference.h b/src/tim/transform/ops/select_layout_inference.h
index 3fbde95a5..60dd898e1 100644
--- a/src/tim/transform/ops/select_layout_inference.h
+++ b/src/tim/transform/ops/select_layout_inference.h
@@ -24,8 +24,8 @@
 #ifndef TIM_LAYOUT_INFER_SELECT_LAYOUT_INFERENCE_H_
 #define TIM_LAYOUT_INFER_SELECT_LAYOUT_INFERENCE_H_
 
-#include "src/tim/transform/ops/op_layout_inference.h"
-#include "src/tim/vx/operation_private.h"
+#include "ops/op_layout_inference.h"
+#include "operation_private.h"
 #include "tim/vx/ops/select.h"
 
 namespace tim {
diff --git a/src/tim/transform/ops/simple_ops_layout_inference.h b/src/tim/transform/ops/simple_ops_layout_inference.h
index 90c953de5..a1e92d224 100644
--- a/src/tim/transform/ops/simple_ops_layout_inference.h
+++ b/src/tim/transform/ops/simple_ops_layout_inference.h
@@ -26,9 +26,9 @@
 
 #include "tim/vx/ops/simple_operations.h"
 
-#include "src/tim/transform/ops/op_layout_inference.h"
-#include "src/tim/transform/permute_vector.h"
-#include "src/tim/vx/operation_private.h"
+#include "ops/op_layout_inference.h"
+#include "permute_vector.h"
+#include "operation_private.h"
 
 namespace tim {
 namespace transform {
diff --git a/src/tim/transform/ops/slice_layout_inference.h b/src/tim/transform/ops/slice_layout_inference.h
index 1b15f949a..9b6079eaf 100644
--- a/src/tim/transform/ops/slice_layout_inference.h
+++ b/src/tim/transform/ops/slice_layout_inference.h
@@ -24,8 +24,8 @@
 #ifndef TIM_LAYOUT_INFER_SLICE_LAYOUT_INFERENCE_H_
 #define TIM_LAYOUT_INFER_SLICE_LAYOUT_INFERENCE_H_
 
-#include "src/tim/transform/ops/op_layout_inference.h"
-#include "src/tim/vx/operation_private.h"
+#include "ops/op_layout_inference.h"
+#include "operation_private.h"
 #include "tim/vx/ops/slice.h"
 
 namespace tim {
diff --git a/src/tim/transform/ops/softmax_layout_inference.h b/src/tim/transform/ops/softmax_layout_inference.h
index 2aa798e88..1b8a21f79 100644
--- a/src/tim/transform/ops/softmax_layout_inference.h
+++ b/src/tim/transform/ops/softmax_layout_inference.h
@@ -26,9 +26,9 @@
 
 #include "tim/vx/ops/softmax.h"
 
-#include "src/tim/vx/operation_private.h"
-#include "src/tim/transform/permute_vector.h"
-#include "src/tim/transform/ops/op_layout_inference.h"
+#include "operation_private.h"
+#include "permute_vector.h"
+#include "ops/op_layout_inference.h"
 
 namespace tim {
 namespace transform {
diff --git a/src/tim/transform/ops/space2batch_layout_inference.h b/src/tim/transform/ops/space2batch_layout_inference.h
index 2b2572052..77fdfd989 100644
--- a/src/tim/transform/ops/space2batch_layout_inference.h
+++ b/src/tim/transform/ops/space2batch_layout_inference.h
@@ -26,9 +26,9 @@
 
 #include "tim/vx/ops/space2batch.h"
 
-#include "src/tim/transform/ops/op_layout_inference.h"
-#include "src/tim/transform/permute_vector.h"
-#include "src/tim/vx/operation_private.h"
+#include "ops/op_layout_inference.h"
+#include "permute_vector.h"
+#include "operation_private.h"
 namespace tim {
 namespace transform {
 class Space2BatchLayoutInfer : public OpLayoutInfer {
diff --git a/src/tim/transform/ops/space2depth_layout_inference.h b/src/tim/transform/ops/space2depth_layout_inference.h
index 222098d6c..fbc89cb2b 100644
--- a/src/tim/transform/ops/space2depth_layout_inference.h
+++ b/src/tim/transform/ops/space2depth_layout_inference.h
@@ -26,9 +26,9 @@
 
 #include "tim/vx/ops/space2depth.h"
 
-#include "src/tim/transform/ops/op_layout_inference.h"
-#include "src/tim/transform/permute_vector.h"
-#include "src/tim/vx/operation_private.h"
+#include "ops/op_layout_inference.h"
+#include "permute_vector.h"
+#include "operation_private.h"
 namespace tim {
 namespace transform {
 class SpaceToDepthLayoutInfer : public OpLayoutInfer {
diff --git a/src/tim/transform/ops/split_layout_inference.h b/src/tim/transform/ops/split_layout_inference.h
index d76d10e11..6fceec9f6 100644
--- a/src/tim/transform/ops/split_layout_inference.h
+++ b/src/tim/transform/ops/split_layout_inference.h
@@ -26,9 +26,9 @@
 
 #include "tim/vx/ops/split.h"
 
-#include "src/tim/transform/ops/op_layout_inference.h"
-#include "src/tim/transform/permute_vector.h"
-#include "src/tim/vx/operation_private.h"
+#include "ops/op_layout_inference.h"
+#include "permute_vector.h"
+#include "operation_private.h"
 
 namespace tim {
 namespace transform {
diff --git a/src/tim/transform/ops/squeeze_layout_inference.h b/src/tim/transform/ops/squeeze_layout_inference.h
index 0e3372509..0fe8d678d 100644
--- a/src/tim/transform/ops/squeeze_layout_inference.h
+++ b/src/tim/transform/ops/squeeze_layout_inference.h
@@ -26,9 +26,9 @@
 
 #include "tim/vx/ops/squeeze.h"
 
-#include "src/tim/transform/ops/op_layout_inference.h"
-#include "src/tim/transform/permute_vector.h"
-#include "src/tim/vx/operation_private.h"
+#include "ops/op_layout_inference.h"
+#include "permute_vector.h"
+#include "operation_private.h"
 
 namespace tim {
 namespace transform {
diff --git a/src/tim/transform/ops/stack_layout_inference.h b/src/tim/transform/ops/stack_layout_inference.h
index 5df8a7d83..416124581 100644
--- a/src/tim/transform/ops/stack_layout_inference.h
+++ b/src/tim/transform/ops/stack_layout_inference.h
@@ -26,9 +26,9 @@
 
 #include "tim/vx/ops/stack.h"
 
-#include "src/tim/vx/operation_private.h"
-#include "src/tim/transform/permute_vector.h"
-#include "src/tim/transform/ops/op_layout_inference.h"
+#include "operation_private.h"
+#include "permute_vector.h"
+#include "ops/op_layout_inference.h"
 
 namespace tim {
 namespace transform {
diff --git a/src/tim/transform/ops/stridedslice_layout_inference.h b/src/tim/transform/ops/stridedslice_layout_inference.h
index 04bea4245..afbdebf09 100644
--- a/src/tim/transform/ops/stridedslice_layout_inference.h
+++ b/src/tim/transform/ops/stridedslice_layout_inference.h
@@ -26,9 +26,9 @@
 
 #include "tim/vx/ops/stridedslice.h"
 
-#include "src/tim/transform/ops/op_layout_inference.h"
-#include "src/tim/transform/permute_vector.h"
-#include "src/tim/vx/operation_private.h"
+#include "ops/op_layout_inference.h"
+#include "permute_vector.h"
+#include "operation_private.h"
 
 namespace tim {
 namespace transform {
diff --git a/src/tim/utils/nbg_parser/CMakeLists.txt b/src/tim/utils/nbg_parser/CMakeLists.txt
index 7f14c0baa..5a5bcb450 100644
--- a/src/tim/utils/nbg_parser/CMakeLists.txt
+++ b/src/tim/utils/nbg_parser/CMakeLists.txt
@@ -3,4 +3,7 @@ message("src/tim/vx/utils/nbg_parser")
 set(TARGET_NAME "nbg_parser")
 
 aux_source_directory(. ${TARGET_NAME}_SRCS)
-add_library(${TARGET_NAME} STATIC ${${TARGET_NAME}_SRCS})
\ No newline at end of file
+add_library(${TARGET_NAME} STATIC ${${TARGET_NAME}_SRCS})
+target_include_directories(${TARGET_NAME} PRIVATE
+    ${PROJECT_SOURCE_DIR}/include
+)
\ No newline at end of file
diff --git a/src/tim/vx/ops/activations_test.cc b/src/tim/vx/ops/activations_test.cc
index 5cde1e7a5..fad39ae34 100644
--- a/src/tim/vx/ops/activations_test.cc
+++ b/src/tim/vx/ops/activations_test.cc
@@ -26,7 +26,7 @@
 #include "tim/vx/ops/activations.h"
 
 #include "gtest/gtest.h"
-#include "src/tim/vx/test_utils.h"
+#include "test_utils.h"
 
 TEST(Linear, shape_5_1_fp32) {
   auto ctx = tim::vx::Context::Create();
diff --git a/src/tim/vx/ops/addn_test.cc b/src/tim/vx/ops/addn_test.cc
index ae50b27a4..67afbc324 100644
--- a/src/tim/vx/ops/addn_test.cc
+++ b/src/tim/vx/ops/addn_test.cc
@@ -25,7 +25,7 @@
 #include "tim/vx/graph.h"
 #include "tim/vx/ops/addn.h"
 #include "tim/vx/types.h"
-#include "src/tim/vx/test_utils.h"
+#include "test_utils.h"
 
 #include "gtest/gtest.h"
 
diff --git a/src/tim/vx/ops/avg_pool_test.cc b/src/tim/vx/ops/avg_pool_test.cc
index 0465302bd..807975ff1 100644
--- a/src/tim/vx/ops/avg_pool_test.cc
+++ b/src/tim/vx/ops/avg_pool_test.cc
@@ -26,7 +26,7 @@
 #include "tim/vx/ops/pool2d.h"
 #include <iostream>
 #include "gtest/gtest.h"
-#include "src/tim/vx/test_utils.h"
+#include "test_utils.h"
 
 TEST(AVG, shape_3_3_1_2_fp32_kernel_2_stride_1) {
     auto ctx = tim::vx::Context::Create();
diff --git a/src/tim/vx/ops/conv1d_test.cc b/src/tim/vx/ops/conv1d_test.cc
index b024c670c..1bf1ae75e 100644
--- a/src/tim/vx/ops/conv1d_test.cc
+++ b/src/tim/vx/ops/conv1d_test.cc
@@ -24,7 +24,7 @@
 #include "tim/vx/context.h"
 #include "tim/vx/graph.h"
 #include "tim/vx/ops/conv1d.h"
-#include "src/tim/vx/test_utils.h"
+#include "test_utils.h"
 #include "gtest/gtest.h"
 
 TEST(Conv1d, shape_3_6_1_float_ksize_1_stride_1_weights_3_no_bias_whcn) {
diff --git a/src/tim/vx/ops/conv2d_test.cc b/src/tim/vx/ops/conv2d_test.cc
index 9533f51b8..08b709ddb 100644
--- a/src/tim/vx/ops/conv2d_test.cc
+++ b/src/tim/vx/ops/conv2d_test.cc
@@ -1,7 +1,7 @@
 #include "tim/vx/ops/conv2d.h"
 
 #include "gtest/gtest.h"
-#include "src/tim/vx/test_utils.h"
+#include "test_utils.h"
 #include "tim/vx/context.h"
 #include "tim/vx/graph.h"
 #include "tim/vx/types.h"
diff --git a/src/tim/vx/ops/depthwiseConv_test.cc b/src/tim/vx/ops/depthwiseConv_test.cc
index 63137ab81..9be4bd5e5 100644
--- a/src/tim/vx/ops/depthwiseConv_test.cc
+++ b/src/tim/vx/ops/depthwiseConv_test.cc
@@ -1,5 +1,5 @@
 #include "gtest/gtest.h"
-#include "src/tim/vx/test_utils.h"
+#include "test_utils.h"
 #include "tim/vx/context.h"
 #include "tim/vx/graph.h"
 #include "tim/vx/ops/conv2d.h"
diff --git a/src/tim/vx/ops/groupedconv2d_test.cc b/src/tim/vx/ops/groupedconv2d_test.cc
index b6ae3cab5..3cd6cfd31 100644
--- a/src/tim/vx/ops/groupedconv2d_test.cc
+++ b/src/tim/vx/ops/groupedconv2d_test.cc
@@ -24,7 +24,7 @@
 #include "tim/vx/context.h"
 #include "tim/vx/graph.h"
 #include "tim/vx/ops/groupedconv2d.h"
-#include "src/tim/vx/test_utils.h"
+#include "test_utils.h"
 #include "gtest/gtest.h"
 
 TEST(GroupedConv2d, shape_3_3_6_1_float_group_1_no_bias_whcn) {
diff --git a/src/tim/vx/ops/instancenormalization_test.cc b/src/tim/vx/ops/instancenormalization_test.cc
index 7111057c8..e5013f551 100644
--- a/src/tim/vx/ops/instancenormalization_test.cc
+++ b/src/tim/vx/ops/instancenormalization_test.cc
@@ -24,7 +24,7 @@
 #include "tim/vx/context.h"
 #include "tim/vx/graph.h"
 #include "tim/vx/ops/instancenormalization.h"
-#include "src/tim/vx/test_utils.h"
+#include "test_utils.h"
 #include "gtest/gtest.h"
 
 TEST(InstanceNorm, shape_3_6_1_float) {
diff --git a/src/tim/vx/ops/layernormalization_test.cc b/src/tim/vx/ops/layernormalization_test.cc
index e74990368..ba6a9f243 100644
--- a/src/tim/vx/ops/layernormalization_test.cc
+++ b/src/tim/vx/ops/layernormalization_test.cc
@@ -24,7 +24,7 @@
 #include "tim/vx/context.h"
 #include "tim/vx/graph.h"
 #include "tim/vx/ops/layernormalization.h"
-#include "src/tim/vx/test_utils.h"
+#include "test_utils.h"
 #include "gtest/gtest.h"
 
 TEST(LayerNorm, axis_0_shape_3_6_1_float) {
diff --git a/src/tim/vx/ops/logsoftmax_test.cc b/src/tim/vx/ops/logsoftmax_test.cc
index 681621d01..781d9f394 100644
--- a/src/tim/vx/ops/logsoftmax_test.cc
+++ b/src/tim/vx/ops/logsoftmax_test.cc
@@ -24,7 +24,7 @@
 #include "tim/vx/context.h"
 #include "tim/vx/graph.h"
 #include "tim/vx/ops/logsoftmax.h"
-#include "src/tim/vx/test_utils.h"
+#include "test_utils.h"
 #include "gtest/gtest.h"
 
 TEST(LogSoftmax, shape_6_1_float_axis_0) {
diff --git a/src/tim/vx/ops/matmul_test.cc b/src/tim/vx/ops/matmul_test.cc
index 60f63952c..903c0f05b 100644
--- a/src/tim/vx/ops/matmul_test.cc
+++ b/src/tim/vx/ops/matmul_test.cc
@@ -24,7 +24,7 @@
 #include "tim/vx/context.h"
 #include "tim/vx/graph.h"
 #include "tim/vx/ops/matmul.h"
-#include "src/tim/vx/test_utils.h"
+#include "test_utils.h"
 #include "gtest/gtest.h"
 
 TEST(Matmul, shape_2_6_shape_6_2_float) {
diff --git a/src/tim/vx/ops/moments_test.cc b/src/tim/vx/ops/moments_test.cc
index 7e2c34b3c..b109f235d 100644
--- a/src/tim/vx/ops/moments_test.cc
+++ b/src/tim/vx/ops/moments_test.cc
@@ -25,7 +25,7 @@
 #include "tim/vx/context.h"
 #include "tim/vx/graph.h"
 #include "tim/vx/ops/moments.h"
-#include "src/tim/vx/test_utils.h"
+#include "test_utils.h"
 #include "gtest/gtest.h"
 
 TEST(Moments, shape_6_3_1_float_axes_0_1) {
diff --git a/src/tim/vx/ops/resize1d_test.cc b/src/tim/vx/ops/resize1d_test.cc
index 357dc4220..f57c1889e 100644
--- a/src/tim/vx/ops/resize1d_test.cc
+++ b/src/tim/vx/ops/resize1d_test.cc
@@ -24,7 +24,7 @@
 #include "tim/vx/context.h"
 #include "tim/vx/graph.h"
 #include "tim/vx/ops/resize1d.h"
-#include "src/tim/vx/test_utils.h"
+#include "test_utils.h"
 #include "gtest/gtest.h"
 
 TEST(Resize1d, shape_4_2_1_float_nearest_whcn) {
diff --git a/src/tim/vx/ops/shuffle_channel_test.cc b/src/tim/vx/ops/shuffle_channel_test.cc
index 03ec4974f..9e1e028a1 100644
--- a/src/tim/vx/ops/shuffle_channel_test.cc
+++ b/src/tim/vx/ops/shuffle_channel_test.cc
@@ -25,7 +25,7 @@
 #include "tim/vx/graph.h"
 #include "tim/vx/ops/shuffle_channel.h"
 #include "tim/vx/types.h"
-#include "src/tim/vx/test_utils.h"
+#include "test_utils.h"
 
 #include "gtest/gtest.h"
 
diff --git a/src/tim/vx/ops/transposeConv_test.cc b/src/tim/vx/ops/transposeConv_test.cc
index 59e91315c..83cdd3eb5 100644
--- a/src/tim/vx/ops/transposeConv_test.cc
+++ b/src/tim/vx/ops/transposeConv_test.cc
@@ -1,6 +1,6 @@
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
-#include "src/tim/vx/test_utils.h"
+#include "test_utils.h"
 #include "tim/vx/context.h"
 #include "tim/vx/graph.h"
 #include "tim/vx/ops/deconv.h"

From 1a44737363c460ea4cbe527c0856145497cae7ab Mon Sep 17 00:00:00 2001
From: Goose-Bomb <goose_bomb@outlook.com>
Date: Mon, 11 Oct 2021 15:42:35 +0800
Subject: [PATCH 07/10] Fix CI unit test

---
 .github/workflows/x86_vsim_unit_test.yml | 2 +-
 CMakeLists.txt                           | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/x86_vsim_unit_test.yml b/.github/workflows/x86_vsim_unit_test.yml
index 1a5fd9d30..d6e6bd8cb 100644
--- a/.github/workflows/x86_vsim_unit_test.yml
+++ b/.github/workflows/x86_vsim_unit_test.yml
@@ -24,7 +24,7 @@ jobs:
     - name: Configure CMake
       # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
       # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
-      run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}}
+      run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DTIM_VX_ENABLE_TEST=ON
 
     - name: Build
       # Build your program with the given configuration
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ebe671cb3..348a05a36 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -29,7 +29,6 @@ if(EXTERNAL_VIV_SDK AND EXISTS ${EXTERNAL_VIV_SDK})
     # this is for internal development purpose
     include(cmake/local_sdk.cmake)
 else()
-    set(TIM_VX_ENABLE_TEST OFF)
     if("${CONFIG}" STREQUAL "A311D")
         include(cmake/A311D.cmake)
     elseif("${CONFIG}" STREQUAL "S905D3")

From 745a87abff7bd628338f828107e32f1bdab0d1b2 Mon Sep 17 00:00:00 2001
From: Goose-Bomb <goose_bomb@outlook.com>
Date: Tue, 4 Jan 2022 14:07:09 +0800
Subject: [PATCH 08/10] Fix warnings relating to inheritance

---
 include/tim/vx/ops/rnn_cell.h    | 21 +++++++++--------
 src/tim/vx/direct_map_op_impl.cc | 32 ++++++++++++-------------
 src/tim/vx/direct_map_op_impl.h  |  9 ++++---
 src/tim/vx/op_impl.cc            | 10 ++++----
 src/tim/vx/op_impl.h             |  5 ++--
 src/tim/vx/ops/rnn_cell.cc       | 40 ++++++++++++++++----------------
 6 files changed, 59 insertions(+), 58 deletions(-)

diff --git a/include/tim/vx/ops/rnn_cell.h b/include/tim/vx/ops/rnn_cell.h
index 1419803b6..d5f1e0d35 100644
--- a/include/tim/vx/ops/rnn_cell.h
+++ b/include/tim/vx/ops/rnn_cell.h
@@ -29,19 +29,20 @@ namespace tim {
 namespace vx {
 namespace ops {
 
-class RNNCell : public Operation{
+class RNNCell : public Operation {
  public:
   enum ActivationType {
-        kNONE = 0,
-        kRELU = 1,
-        kRELU1 = 2,
-        kRELU6 = 3,
-        kTANH = 4,
-        kSIGMOID = 6,
-        kHARDSIGMOID = 31,  /* temporary use 31*/
-      };
+    kNONE = 0,
+    kRELU = 1,
+    kRELU1 = 2,
+    kRELU6 = 3,
+    kTANH = 4,
+    kSIGMOID = 6,
+    kHARDSIGMOID = 31, /* temporary use 31 */
+  };
   RNNCell(Graph* graph, ActivationType activation);
-  std::shared_ptr<Operation> Clone(std::shared_ptr<Graph>& graph) const override;
+  std::shared_ptr<Operation> Clone(
+      std::shared_ptr<Graph>& graph) const override;
 
  protected:
   const ActivationType activation_;
diff --git a/src/tim/vx/direct_map_op_impl.cc b/src/tim/vx/direct_map_op_impl.cc
index 170160440..7ea0bcbbd 100644
--- a/src/tim/vx/direct_map_op_impl.cc
+++ b/src/tim/vx/direct_map_op_impl.cc
@@ -24,11 +24,11 @@
 #include "direct_map_op_impl.h"
 #include "type_utils.h"
 
-namespace tim{
-namespace vx{
+namespace tim {
+namespace vx {
 
 DirectMapOpImpl::DirectMapOpImpl(Graph* graph, uint32_t kind, int input_cnt,
-                             int output_cnt, DataLayout layout)
+                                 int output_cnt, DataLayout layout)
     : OpImpl(graph, kind, input_cnt, output_cnt, layout),
       node_(vsi_nn_AddNode(graph_->graph(), kind_, input_cnt_, output_cnt_,
                            NULL)) {
@@ -36,7 +36,8 @@ DirectMapOpImpl::DirectMapOpImpl(Graph* graph, uint32_t kind, int input_cnt,
   node_->uid = graph_->graph()->cur_nid;
 }
 
-DirectMapOpImpl& DirectMapOpImpl::BindInput(const std::shared_ptr<Tensor>& tensor) {
+DirectMapOpImpl& DirectMapOpImpl::BindInput(
+    const std::shared_ptr<Tensor>& tensor) {
   inputs_tensor_.push_back(tensor);
   uint32_t tensor_id = tensor->GetId();
   node_->input.tensors[input_tensor_index++] = tensor_id;
@@ -59,17 +60,16 @@ DirectMapOpImpl& DirectMapOpImpl::BindOutput(
   return *this;
 }
 
-void DirectMapOpImpl::SetRoundingPolicy(
-      OverflowPolicy overflow_policy,
-      RoundingPolicy rounding_policy,
-      RoundType down_scale_size_rounding,
-      uint32_t accumulator_bits) {
-    node_->vx_param.overflow_policy = TranslateOverflowPolicy(overflow_policy);
-    node_->vx_param.rounding_policy = TranslateRoundingPolicy(rounding_policy);
-    node_->vx_param.down_scale_size_rounding =
-        TranslateDownScaleSizeRounding(down_scale_size_rounding);
-    node_->vx_param.accumulator_bits = accumulator_bits;
+void DirectMapOpImpl::SetRoundingPolicy(OverflowPolicy overflow_policy,
+                                        RoundingPolicy rounding_policy,
+                                        RoundType down_scale_size_rounding,
+                                        uint32_t accumulator_bits) {
+  node_->vx_param.overflow_policy = TranslateOverflowPolicy(overflow_policy);
+  node_->vx_param.rounding_policy = TranslateRoundingPolicy(rounding_policy);
+  node_->vx_param.down_scale_size_rounding =
+      TranslateDownScaleSizeRounding(down_scale_size_rounding);
+  node_->vx_param.accumulator_bits = accumulator_bits;
 }
 
-}
-}
\ No newline at end of file
+}  // namespace vx
+}  // namespace tim
\ No newline at end of file
diff --git a/src/tim/vx/direct_map_op_impl.h b/src/tim/vx/direct_map_op_impl.h
index e4ff4329f..88c49c5e9 100644
--- a/src/tim/vx/direct_map_op_impl.h
+++ b/src/tim/vx/direct_map_op_impl.h
@@ -24,7 +24,6 @@
 #ifndef TIM_VX_DIRECT_MAP_OP_IMPL_H_
 #define TIM_VX_DIRECT_MAP_OP_IMPL_H_
 
-
 #include "vsi_nn_pub.h"
 #include "graph_private.h"
 
@@ -38,7 +37,7 @@ class DirectMapOpImpl : public OpImpl {
   // DirectMapOpImpl(Graph* graph, uint32_t kind, int input_cnt = 0,
   //               int output_cnt = 0);
   DirectMapOpImpl(Graph* graph, uint32_t kind, int input_cnt = 0,
-                int output_cnt = 0, DataLayout layout = DataLayout::ANY);
+                  int output_cnt = 0, DataLayout layout = DataLayout::ANY);
   ~DirectMapOpImpl() {}
 
   DirectMapOpImpl& BindInput(const std::shared_ptr<Tensor>& tensor) override;
@@ -50,12 +49,12 @@ class DirectMapOpImpl : public OpImpl {
       OverflowPolicy overflow_policy = OverflowPolicy::SATURATE,
       RoundingPolicy rounding_policy = RoundingPolicy::RTNE,
       RoundType down_scale_size_rounding = RoundType::FLOOR,
-      uint32_t accumulator_bits =0);
+      uint32_t accumulator_bits = 0);
 
-  std::vector<std::shared_ptr<Tensor>> InputsTensor() {
+  std::vector<std::shared_ptr<Tensor>> InputsTensor() override {
     return inputs_tensor_;
   }
-  std::vector<std::shared_ptr<Tensor>> OutputsTensor() {
+  std::vector<std::shared_ptr<Tensor>> OutputsTensor() override {
     return outputs_tensor_;
   }
 
diff --git a/src/tim/vx/op_impl.cc b/src/tim/vx/op_impl.cc
index 3d6037554..a0366b2cb 100644
--- a/src/tim/vx/op_impl.cc
+++ b/src/tim/vx/op_impl.cc
@@ -23,15 +23,15 @@
 *****************************************************************************/
 #include "op_impl.h"
 
-namespace tim{
-namespace vx{
+namespace tim {
+namespace vx {
 
 OpImpl::OpImpl(Graph* graph, uint32_t kind, int input_cnt, int output_cnt,
-                DataLayout layout)
+               DataLayout layout)
     : graph_(reinterpret_cast<GraphImpl*>(graph)),
       kind_(kind),
       input_cnt_(input_cnt),
       output_cnt_(output_cnt),
       layout_(layout) {}
-}
-}
+}  // namespace vx
+}  // namespace tim
diff --git a/src/tim/vx/op_impl.h b/src/tim/vx/op_impl.h
index 582ba798f..637deee33 100644
--- a/src/tim/vx/op_impl.h
+++ b/src/tim/vx/op_impl.h
@@ -34,14 +34,15 @@ namespace vx {
 class OpImpl {
  public:
   OpImpl(Graph* graph, uint32_t kind, int input_cnt, int output_cnt,
-                DataLayout layout);
+         DataLayout layout);
+  virtual ~OpImpl() = default;
   virtual OpImpl& BindInput(const std::shared_ptr<Tensor>& tensor) = 0;
   virtual OpImpl& BindOutput(const std::shared_ptr<Tensor>& tensor) = 0;
   virtual std::vector<std::shared_ptr<Tensor>> InputsTensor() = 0;
   virtual std::vector<std::shared_ptr<Tensor>> OutputsTensor() = 0;
   virtual vsi_nn_node_t* node() = 0;
 
-  GraphImpl* graph_;
+  GraphImpl* graph_{nullptr};
   uint32_t kind_{0};
   int32_t input_cnt_{0};
   int32_t output_cnt_{0};
diff --git a/src/tim/vx/ops/rnn_cell.cc b/src/tim/vx/ops/rnn_cell.cc
index 6b784a7b4..e5ec74faa 100644
--- a/src/tim/vx/ops/rnn_cell.cc
+++ b/src/tim/vx/ops/rnn_cell.cc
@@ -30,9 +30,8 @@ namespace tim {
 namespace vx {
 namespace ops {
 
-class RNNCellImpl : public OpImpl{
+class RNNCellImpl : public OpImpl {
  public:
-
   enum {
     // signature
     FULLY_CONNECTED_0_IN = 0,
@@ -49,20 +48,19 @@ class RNNCellImpl : public OpImpl{
     // signature end
   };
 
-  RNNCellImpl(Graph* graph, int input_cnt,
-                int output_cnt, DataLayout layout = DataLayout::ANY)
-                : OpImpl(graph, -1, input_cnt, output_cnt, layout){
-      fc0_ = graph->CreateOperation<tim::vx::ops::FullyConnected>(0, 4);
-      fc1_ = graph->CreateOperation<tim::vx::ops::FullyConnected>(0, 4);
-      add_ = graph->CreateOperation<tim::vx::ops::Add>();
-      tanh_ = graph->CreateOperation<tim::vx::ops::Tanh>();
-      data_convert_ = graph->CreateOperation<tim::vx::ops::DataConvert>();
+  RNNCellImpl(Graph* graph, int input_cnt, int output_cnt,
+              DataLayout layout = DataLayout::ANY)
+      : OpImpl(graph, -1, input_cnt, output_cnt, layout) {
+    fc0_ = graph->CreateOperation<tim::vx::ops::FullyConnected>(0, 4);
+    fc1_ = graph->CreateOperation<tim::vx::ops::FullyConnected>(0, 4);
+    add_ = graph->CreateOperation<tim::vx::ops::Add>();
+    tanh_ = graph->CreateOperation<tim::vx::ops::Tanh>();
+    data_convert_ = graph->CreateOperation<tim::vx::ops::DataConvert>();
   }
 
   ~RNNCellImpl() {}
 
-  RNNCellImpl& BindInput(const std::shared_ptr<Tensor>& tensor) override
-  {
+  RNNCellImpl& BindInput(const std::shared_ptr<Tensor>& tensor) override {
     in_tensors_[input_tensor_index] = tensor;
 
     if (this->input_tensor_index == INPUT_CNT - 1) {
@@ -75,7 +73,6 @@ class RNNCellImpl : public OpImpl{
       tim::vx::TensorSpec add_spec(tim::vx::DataType::FLOAT32, shape,
                                    tim::vx::TensorAttribute::TRANSIENT);
 
-
       auto FC0_tensor = graph_->CreateTensor(FC0_spec);
       auto FC1_tensor = graph_->CreateTensor(FC1_spec);
       auto add_tensor = graph_->CreateTensor(add_spec);
@@ -99,22 +96,24 @@ class RNNCellImpl : public OpImpl{
     return *this;
   }
 
-  RNNCellImpl& BindOutput(const std::shared_ptr<Tensor>& tensor) override{
+  RNNCellImpl& BindOutput(const std::shared_ptr<Tensor>& tensor) override {
     out_tensors_[output_tensor_index] = tensor;
 
     tanh_->BindOutput(out_tensors_[OUT]);
     data_convert_->BindInput(out_tensors_[OUT]);
-    if (this->output_tensor_index == OUT_CNT - 1){
+    if (this->output_tensor_index == OUT_CNT - 1) {
       data_convert_->BindOutput(out_tensors_[STATE_OUT]);
     }
     this->output_tensor_index++;
     return *this;
   }
 
-  vsi_nn_node_t* node() override{ return nullptr; }
+  vsi_nn_node_t* node() override { return nullptr; }
 
-  std::vector<std::shared_ptr<Tensor>> InputsTensor() { return inputs_tensor_; }
-  std::vector<std::shared_ptr<Tensor>> OutputsTensor() {
+  std::vector<std::shared_ptr<Tensor>> InputsTensor() override {
+    return inputs_tensor_;
+  }
+  std::vector<std::shared_ptr<Tensor>> OutputsTensor() override {
     return outputs_tensor_;
   }
 
@@ -129,8 +128,9 @@ class RNNCellImpl : public OpImpl{
   std::array<std::shared_ptr<tim::vx::Tensor>, OUT_CNT> out_tensors_;
 };
 
-RNNCell::RNNCell(Graph* graph,  ActivationType activation) : activation_(activation){
-    impl_ = std::make_unique<RNNCellImpl>(graph, 0, 0, DataLayout::ANY);
+RNNCell::RNNCell(Graph* graph, ActivationType activation)
+    : activation_(activation) {
+  impl_ = std::make_unique<RNNCellImpl>(graph, 0, 0, DataLayout::ANY);
 }
 
 std::shared_ptr<Operation> RNNCell::Clone(std::shared_ptr<Graph>& graph) const {

From d2bfb4331e8f9cb6c59d15cd1f0c0797de21ff2f Mon Sep 17 00:00:00 2001
From: Xiaoran Weng <Xiaoran.Weng@verisilicon.com>
Date: Wed, 20 Dec 2023 17:24:26 +0800
Subject: [PATCH 09/10] Keep graph output order in layout inference

Type: Code Improvement
---
 src/tim/transform/layout_infer_context.h     |  14 +-
 src/tim/transform/layout_inference.cc        | 174 +++++++++++--------
 src/tim/transform/ops/op_layout_inference.cc |  77 ++++----
 3 files changed, 160 insertions(+), 105 deletions(-)

diff --git a/src/tim/transform/layout_infer_context.h b/src/tim/transform/layout_infer_context.h
index d63960c91..6a490777f 100644
--- a/src/tim/transform/layout_infer_context.h
+++ b/src/tim/transform/layout_infer_context.h
@@ -1,16 +1,18 @@
 #ifndef TIM_VX_LAYOUT_INFER_CONTEXT_H_
 #define TIM_VX_LAYOUT_INFER_CONTEXT_H_
+
 #include "permute_vector.h"
 #include "tim/transform/layout_inference.h"
 
+#include <unordered_map>
+
 namespace tim {
 namespace transform {
 namespace layout_inference_impl {
 class LayoutInferContext {
  public:
   LayoutInferContext(const std::shared_ptr<vx::Graph>& src_graph,
-                     std::shared_ptr<vx::Graph>& infer_graph)
-      : src_graph_(src_graph), infer_graph_(infer_graph) {}
+                     std::shared_ptr<vx::Graph>& infer_graph);
   void SetPermuteVector(std::shared_ptr<vx::Tensor> tensor,
                         std::shared_ptr<IPermuteVector> pv);
   const std::shared_ptr<IPermuteVector> GetPermuteVector(
@@ -22,12 +24,16 @@ class LayoutInferContext {
                        const std::shared_ptr<vx::Tensor>& t_layout);
   std::shared_ptr<vx::Tensor> GetMapedTensor(
       const std::shared_ptr<vx::Tensor>& t_src) const;
+  std::shared_ptr<vx::Tensor> GetMappedGraphInputTensor(
+      const std::shared_ptr<vx::Tensor>& t_src) const;
+  std::shared_ptr<vx::Tensor> GetMappedGraphOutputTensor(
+      const std::shared_ptr<vx::Tensor>& t_src) const;
 
   void UpdateGraphInputMap(const std::shared_ptr<vx::Tensor>& i_src,
                            const std::shared_ptr<vx::Tensor>& i_layout);
 
   void UpdateGraphOutputMap(const std::shared_ptr<vx::Tensor>& o_src,
-                           const std::shared_ptr<vx::Tensor>& o_layout);
+                            const std::shared_ptr<vx::Tensor>& o_layout);
 
   std::map<std::shared_ptr<vx::Tensor>, std::shared_ptr<vx::Tensor>>
   GetGraphInputMap() const {
@@ -44,7 +50,7 @@ class LayoutInferContext {
  private:
   std::map<std::shared_ptr<vx::Tensor>, std::shared_ptr<IPermuteVector>>
       tensor_pv_;
-  std::vector<std::shared_ptr<vx::Operation>> visited_op_;
+  std::unordered_map<std::shared_ptr<vx::Operation>, bool> op_visited_;
   // tensor_in_src -> tensor_in_layout
   std::map<std::shared_ptr<vx::Tensor>, std::shared_ptr<vx::Tensor>>
       tensor_map_;
diff --git a/src/tim/transform/layout_inference.cc b/src/tim/transform/layout_inference.cc
index fd2f16366..c517e3cc2 100644
--- a/src/tim/transform/layout_inference.cc
+++ b/src/tim/transform/layout_inference.cc
@@ -73,7 +73,7 @@
 #include "ops/roi_pool_layout_inference.h"
 
 #include <algorithm>
-#include <deque>
+#include <queue>
 
 #include "tim/vx/context.h"
 #include "tim/vx/graph.h"
@@ -87,7 +87,16 @@ std::vector<std::shared_ptr<vx::Tensor>> HandleLayoutInfer(
     std::shared_ptr<layout_inference_impl::LayoutInferContext>& ctx,
     const std::shared_ptr<vx::Operation>& op);
 
-// Implemention for LayoutInferContext
+// Implementation for LayoutInferContext
+LayoutInferContext::LayoutInferContext(
+    const std::shared_ptr<vx::Graph>& src_graph,
+    std::shared_ptr<vx::Graph>& infer_graph)
+    : src_graph_(src_graph), infer_graph_(infer_graph) {
+  for (const auto& op : src_graph->OpVector()) {
+    op_visited_[op] = false;
+  }
+}
+
 void LayoutInferContext::SetPermuteVector(std::shared_ptr<vx::Tensor> tensor,
                                           std::shared_ptr<IPermuteVector> pv) {
   if (tensor_pv_.end() != tensor_pv_.find(tensor)) {
@@ -110,27 +119,19 @@ const std::shared_ptr<IPermuteVector> LayoutInferContext::GetPermuteVector(
 }
 
 void LayoutInferContext::MarkVisited(const std::shared_ptr<vx::Operation>& op) {
-  if (visited_op_.end() !=
-      std::find(visited_op_.begin(), visited_op_.end(), op)) {
-    VSILOGW("The operation has been mark as visited.");
-  } else {
-    visited_op_.push_back(op);
-  }
+  op_visited_[op] = true;
 }
 
-bool LayoutInferContext::IsVisited(const std::shared_ptr<vx::Operation>& op) const {
-  if (visited_op_.end() !=
-      std::find(visited_op_.begin(), visited_op_.end(), op)) {
-    return true;
-  } else {
-    return false;
-  }
+bool LayoutInferContext::IsVisited(
+    const std::shared_ptr<vx::Operation>& op) const {
+  return op_visited_.at(op);
 }
 
 bool LayoutInferContext::IsReadyForInfer(
     const std::shared_ptr<vx::Operation>& op) const {
   for (const auto& tensor : op->impl()->InputsTensor()) {
-    if (!tensor->IsConstTensor() && tensor->GetId() != (uint32_t)-1 &&
+    if (!tensor->IsConstTensor() &&
+        tensor->GetId() != static_cast<uint32_t>(-1) &&
         (tensor_pv_.end() == tensor_pv_.find(tensor))) {
       return false;
     }
@@ -149,21 +150,43 @@ std::shared_ptr<vx::Tensor> LayoutInferContext::GetMapedTensor(
   auto it = tensor_map_.find(t_src);
   if (it != tensor_map_.end()) {
     return it->second;
-  } else {
-    VSILOGE("Tensor has not beed inserted in tensor map.");
-    assert(false);
   }
 
+  VSILOGE("Tensor has not beed inserted in tensor map.");
+  return nullptr;
+}
+
+std::shared_ptr<vx::Tensor> LayoutInferContext::GetMappedGraphInputTensor(
+    const std::shared_ptr<vx::Tensor>& t_src) const {
+  auto it = graph_input_map_.find(t_src);
+  if (it != tensor_map_.end()) {
+    return it->second;
+  }
+
+  VSILOGE("Tensor has not beed inserted in graph input tensor map.");
   return nullptr;
 }
 
-void LayoutInferContext::UpdateGraphInputMap(const std::shared_ptr<vx::Tensor>& i_src,
-                           const std::shared_ptr<vx::Tensor>& i_layout) {
+std::shared_ptr<vx::Tensor> LayoutInferContext::GetMappedGraphOutputTensor(
+    const std::shared_ptr<vx::Tensor>& t_src) const {
+  auto it = graph_output_map_.find(t_src);
+  if (it != tensor_map_.end()) {
+    return it->second;
+  }
+
+  VSILOGE("Tensor has not beed inserted in graph output tensor map.");
+  return nullptr;
+}
+
+void LayoutInferContext::UpdateGraphInputMap(
+    const std::shared_ptr<vx::Tensor>& i_src,
+    const std::shared_ptr<vx::Tensor>& i_layout) {
   graph_input_map_[i_src] = i_layout;
 }
 
-void LayoutInferContext::UpdateGraphOutputMap(const std::shared_ptr<vx::Tensor>& o_src,
-                           const std::shared_ptr<vx::Tensor>& o_layout) {
+void LayoutInferContext::UpdateGraphOutputMap(
+    const std::shared_ptr<vx::Tensor>& o_src,
+    const std::shared_ptr<vx::Tensor>& o_layout) {
   graph_output_map_[o_src] = o_layout;
 }
 
@@ -173,39 +196,40 @@ void LayoutInferContext::UpdateGraphOutputMap(const std::shared_ptr<vx::Tensor>&
     op_infer->OnInputs(next_tensors);                             \
     op_infer->OnOutputs(next_tensors);                            \
     break;                                                        \
-  }                                                               \
-
-#define REGIST_REDUCE_LAYOUT_INFERENCE(op_idx)                                 \
-  case op_idx: {                                                               \
-    auto reduce_type = op->impl()->node()->nn_param.reduce.type;               \
-    switch (reduce_type) {                                                     \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_MEAN, ReduceMean);                 \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_MAX, ReduceMax);                   \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_MIN, ReduceMin);                   \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_PROD, ReduceProd);                 \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_ANY, ReduceAny);                   \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_SUM, ReduceSum);                   \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_ALL, ReduceAll);                   \
-    default:                                                                   \
-      VSILOGW("Op %d: Default layout inference pass for reduce.", reduce_type);\
-      assert(false);                                                           \
-    }                                                                          \
-    break;                                                                     \
-  }                                                                            \
-
-#define REGIST_LOGICAL_LAYOUT_INFERENCE(op_idx)                                  \
-  case op_idx: {                                                                 \
-    auto logical_type = op->impl()->node()->nn_param.relational_ops.op;          \
-    switch (logical_type)                                                        \
-    {                                                                            \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_LOGICAL_AND, LogicalAnd);                   \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_LOGICAL_OR, LogicalOr);                     \
-    default:                                                                     \
-      VSILOGW("Op %d: Default layout inference pass for logical.", logical_type);\
-      assert(false);                                                             \
-    }                                                                            \
-    break;                                                                       \
-  }                                                                              \
+  }
+
+#define REGIST_REDUCE_LAYOUT_INFERENCE(op_idx)                      \
+  case op_idx: {                                                    \
+    auto reduce_type = op->impl()->node()->nn_param.reduce.type;    \
+    switch (reduce_type) {                                          \
+      REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_MEAN, ReduceMean);      \
+      REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_MAX, ReduceMax);        \
+      REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_MIN, ReduceMin);        \
+      REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_PROD, ReduceProd);      \
+      REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_ANY, ReduceAny);        \
+      REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_SUM, ReduceSum);        \
+      REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_ALL, ReduceAll);        \
+      default:                                                      \
+        VSILOGW("Op %d: Default layout inference pass for reduce.", \
+                reduce_type);                                       \
+        assert(false);                                              \
+    }                                                               \
+    break;                                                          \
+  }
+
+#define REGIST_LOGICAL_LAYOUT_INFERENCE(op_idx)                         \
+  case op_idx: {                                                        \
+    auto logical_type = op->impl()->node()->nn_param.relational_ops.op; \
+    switch (logical_type) {                                             \
+      REGIST_LAYOUT_INFERENCE(VSI_NN_LOGICAL_AND, LogicalAnd);          \
+      REGIST_LAYOUT_INFERENCE(VSI_NN_LOGICAL_OR, LogicalOr);            \
+      default:                                                          \
+        VSILOGW("Op %d: Default layout inference pass for logical.",    \
+                logical_type);                                          \
+        assert(false);                                                  \
+    }                                                                   \
+    break;                                                              \
+  }
 
 std::vector<std::shared_ptr<vx::Tensor>> HandleLayoutInfer(
     std::shared_ptr<layout_inference_impl::LayoutInferContext>& ctx,
@@ -279,8 +303,10 @@ std::vector<std::shared_ptr<vx::Tensor>> HandleLayoutInfer(
     REGIST_LAYOUT_INFERENCE(VSI_NN_OP_CONV3D, Conv3d);
     REGIST_LAYOUT_INFERENCE(VSI_NN_OP_LSTM_OVXLIB, UnidirectionalLstm);
     REGIST_LAYOUT_INFERENCE(VSI_NN_OP_EXPAND_BROADCAST, Broadcast);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_UNIDIRECTIONAL_SEQUENCE_RNN, UnidirectionalRnn);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_BIDIRECTIONAL_SEQUENCE_RNN, BidirectionalRnn);
+    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_UNIDIRECTIONAL_SEQUENCE_RNN,
+                            UnidirectionalRnn);
+    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_BIDIRECTIONAL_SEQUENCE_RNN,
+                            BidirectionalRnn);
 #ifdef VSI_FEAT_OP_CUSTOM_TINY_YOLOV4_POSTPROCESS
     REGIST_LAYOUT_INFERENCE(VSI_NN_OP_CUSTOM_TINY_YOLOV4_POSTPROCESS, Yolov4);
 #endif
@@ -312,13 +338,13 @@ LayoutInference(
       std::make_shared<layout_inference_impl::LayoutInferContext>(src_graph,
                                                                   infer_graph);
 
-  std::deque<std::shared_ptr<vx::Tensor>> tensor_queue;
+  std::queue<std::shared_ptr<vx::Tensor>> tensor_queue;
   auto graph_inputs = src_graph->InputsTensor();
   for (const auto& t_src : graph_inputs) {
     auto input = infer_graph->CreateTensor(t_src->GetSpec());
     layout_infer_ctx->UpdateTensorMap(t_src, input);
     layout_infer_ctx->UpdateGraphInputMap(t_src, input);
-    tensor_queue.push_back(t_src);
+    tensor_queue.push(t_src);
     layout_infer_ctx->SetPermuteVector(
         t_src, tensor_pv_map.find(t_src) != tensor_pv_map.end()
                    ? tensor_pv_map[t_src]
@@ -329,27 +355,39 @@ LayoutInference(
   for (auto const_in : const_inputs) {
     std::vector<uint8_t> dataRef(const_in->GetSpec().GetByteSize());
     const_in->CopyDataFromTensor(dataRef.data());
-    auto input =
-        infer_graph->CreateTensor(const_in->GetSpec(), (const void*)dataRef.data());
+    auto input = infer_graph->CreateTensor(const_in->GetSpec(),
+                                           (const void*)dataRef.data());
     layout_infer_ctx->UpdateTensorMap(const_in, input);
-    tensor_queue.push_back(const_in);
+    tensor_queue.push(const_in);
     layout_infer_ctx->SetPermuteVector(
         const_in, tensor_pv_map.find(const_in) != tensor_pv_map.end()
-                   ? tensor_pv_map[const_in]
-                   : MakeShared(const_in->GetShape().size()));
+                      ? tensor_pv_map[const_in]
+                      : MakeShared(const_in->GetShape().size()));
+  }
+
+  auto graph_outputs = src_graph->OutputsTensor();
+  for (const auto& t_src : graph_outputs) {
+    auto output = infer_graph->CreateTensor(t_src->GetSpec());
+    layout_infer_ctx->UpdateTensorMap(t_src, output);
+    layout_infer_ctx->UpdateGraphOutputMap(t_src, output);
+    tensor_queue.push(t_src);
+    layout_infer_ctx->SetPermuteVector(
+        t_src, tensor_pv_map.find(t_src) != tensor_pv_map.end()
+                   ? tensor_pv_map[t_src]
+                   : MakeShared(t_src->GetShape().size()));
   }
 
   while (!tensor_queue.empty()) {
     auto tensor = tensor_queue.front();
-    tensor_queue.pop_front();
+    tensor_queue.pop();
     const auto& consumers = src_graph->GetConsumersOp(tensor);
     for (const auto& op : consumers) {
-      if (!layout_infer_ctx->IsVisited(op) && op->impl()->kind_ !=-1 &&
+      if (!layout_infer_ctx->IsVisited(op) && op->impl()->kind_ != -1 &&
           layout_infer_ctx->IsReadyForInfer(op)) {
         auto next_tensors =
             layout_inference_impl::HandleLayoutInfer(layout_infer_ctx, op);
         for (const auto& t : next_tensors) {
-          tensor_queue.push_back(t);
+          tensor_queue.push(t);
         }
       }
     }
diff --git a/src/tim/transform/ops/op_layout_inference.cc b/src/tim/transform/ops/op_layout_inference.cc
index 7275a2873..d3df200d0 100644
--- a/src/tim/transform/ops/op_layout_inference.cc
+++ b/src/tim/transform/ops/op_layout_inference.cc
@@ -38,16 +38,13 @@ void OpLayoutInfer::OnOutputs(
   auto graph_outputs = context_->src_graph_->OutputsTensor();
   auto op_outputs = op_->impl()->OutputsTensor();
   for (const auto& out : op_outputs) {
-    if (graph_outputs.end() !=
-        std::find(graph_outputs.begin(), graph_outputs.end(), out)) {
-      context_->UpdateGraphOutputMap(out, context_->GetMapedTensor(out));
+    if (graph_outputs.cend() !=
+        std::find(graph_outputs.cbegin(), graph_outputs.cend(), out)) {
       auto pv = context_->GetPermuteVector(out);
       if (!pv->IsAligned()) {
         auto perm_out = InsertPermute(context_->GetMapedTensor(out),
                                       pv->Reverse(), true, out);
-        // Update graph out tensor
         context_->UpdateTensorMap(out, perm_out);
-        context_->UpdateGraphOutputMap(out, perm_out);
       }
       if (!context_->src_graph_->GetConsumersOp(out).empty()) {
         // The tensor is output of graph, but it also is the input of other operations
@@ -65,19 +62,18 @@ void OpLayoutInfer::OnOutputs(
 std::shared_ptr<vx::Tensor> OpLayoutInfer::InsertPermute(
     std::shared_ptr<vx::Tensor> input, std::shared_ptr<IPermuteVector> perm,
     bool is_graph_output, std::shared_ptr<vx::Tensor> src_out) {
-  auto out_spec = input->GetSpec();
+  std::shared_ptr<vx::Tensor> out_tensor;
   if (is_graph_output) {
-    auto out_shape = src_out->GetShape();
-    out_spec.SetShape(out_shape);
-    out_spec.SetAttribute(vx::TensorAttribute::OUTPUT);
+    out_tensor = context_->GetMappedGraphOutputTensor(src_out);
   } else {
-    out_spec.SetAttribute(vx::TensorAttribute::TRANSIENT);
-  }
-  if (out_spec.quantization_.Type() == vx::QuantType::SYMMETRIC_PER_CHANNEL) {
-    out_spec.quantization_.SetChannelDim(
-        MapAxis(perm->AsStdVec(), out_spec.quantization_.ChannelDim()));
+    auto out_spec = input->GetSpec().AsTransientSpec();
+    if (out_spec.quantization_.Type() == vx::QuantType::SYMMETRIC_PER_CHANNEL) {
+      out_spec.quantization_.SetChannelDim(
+          MapAxis(perm->AsStdVec(), out_spec.quantization_.ChannelDim()));
+    }
+    out_tensor = context_->infer_graph_->CreateTensor(out_spec);
   }
-  auto out_tensor = context_->infer_graph_->CreateTensor(out_spec);
+
   auto perm_op = context_->infer_graph_->CreateOperation<vx::ops::Transpose>(
       perm->AsStdVec());
   (*perm_op).BindInput(input).BindOutput(out_tensor);
@@ -88,20 +84,28 @@ std::vector<std::shared_ptr<vx::Tensor>> OpLayoutInfer::CreateOutputsTensor(
     std::shared_ptr<IPermuteVector> required_pv) {
   std::vector<std::shared_ptr<vx::Tensor>> outputs_tensor;
 
-  if (op_->impl()->OutputsTensor().size() > 1) {
+  auto op_outputs = op_->impl()->OutputsTensor();
+  if (op_outputs.size() > 1) {
     // todo(sven): potential bug here if node have multi-output and require layout inference
     std::cout << "warning at " << __FUNCTION__ << ", #" << __LINE__
               << std::endl;
   }
 
-  for (const auto& o : op_->impl()->OutputsTensor()) {
+  for (const auto& o : op_outputs) {
     auto in_shape = o->GetShape();
     auto out_spec = o->GetSpec();
-    if (!(required_pv->IsAligned())) {
+    if (!required_pv->IsAligned()) {
       out_spec = out_spec.AsTransientSpec();
     }
-    auto t_infer = context_->infer_graph_->CreateTensor(out_spec);
-    context_->UpdateTensorMap(o, t_infer);
+
+    std::shared_ptr<vx::Tensor> t_infer;
+    if (out_spec.GetTensorAttribute() == vx::OUTPUT) {
+      t_infer = context_->GetMapedTensor(o);
+    } else {
+      t_infer = context_->infer_graph_->CreateTensor(out_spec);
+      context_->UpdateTensorMap(o, t_infer);
+    }
+
     outputs_tensor.push_back(t_infer);
   }
   return outputs_tensor;
@@ -111,19 +115,26 @@ std::vector<std::shared_ptr<vx::Tensor>> OpLayoutInfer::CreateOutputsTensor(
     const std::vector<std::shared_ptr<IPermuteVector>>& required_pv) {
   std::vector<std::shared_ptr<vx::Tensor>> outputs_tensor;
 
-  assert(required_pv.size() == (op_->impl()->OutputsTensor().size()));
+  auto op_outputs = op_->impl()->OutputsTensor();
+  assert(required_pv.size() == (op_outputs.size()));
 
-  uint32_t i = 0;
-  for (const auto& o : op_->impl()->OutputsTensor()) {
+  for (size_t i = 0; i < op_outputs.size(); i++) {
+    const auto& o = op_outputs[i];
     auto in_shape = o->GetShape();
     auto out_spec = o->GetSpec();
-    if (!(required_pv[i]->IsAligned())) {
+    if (!required_pv[i]->IsAligned()) {
       out_spec = out_spec.AsTransientSpec();
     }
-    auto t_infer = context_->infer_graph_->CreateTensor(out_spec);
-    context_->UpdateTensorMap(o, t_infer);
+
+    std::shared_ptr<vx::Tensor> t_infer;
+    if (out_spec.GetTensorAttribute() == vx::OUTPUT) {
+      t_infer = context_->GetMapedTensor(o);
+    } else {
+      t_infer = context_->infer_graph_->CreateTensor(out_spec);
+      context_->UpdateTensorMap(o, t_infer);
+    }
+
     outputs_tensor.push_back(t_infer);
-    i++;
   }
   return outputs_tensor;
 }
@@ -198,8 +209,8 @@ OpLayoutInfer::AlignPermuteVectorForMutilInputs() {
       std::vector<uint8_t> dataRef(i_src->GetSpec().GetByteSize());
       i_src->CopyDataFromTensor(dataRef.data());
       context_->UpdateTensorMap(
-          i_src, context_->infer_graph_->CreateTensor(i_src->GetSpec(),
-                                                      (const void*)dataRef.data()));
+          i_src, context_->infer_graph_->CreateTensor(
+                     i_src->GetSpec(), (const void*)dataRef.data()));
       context_->SetPermuteVector(i_src, MakeShared(i_src->GetShape().size()));
     }
   } else {
@@ -247,8 +258,8 @@ OpLayoutInfer::AlignPermuteVectorForElementWise() {
       if (required_pv->IsAligned()) {
         std::vector<uint8_t> dataRef(i_src->GetSpec().GetByteSize());
         i_src->CopyDataFromTensor(dataRef.data());
-        perm_out = context_->infer_graph_->CreateTensor(i_src->GetSpec(),
-                                                        (const void*)dataRef.data());
+        perm_out = context_->infer_graph_->CreateTensor(
+            i_src->GetSpec(), (const void*)dataRef.data());
       } else if (i_src->GetShape().size() == required_pv->Rank()) {
         perm_out = PermuteConstTensor(i_src, required_pv);
         // need shape expansion
@@ -280,8 +291,8 @@ void OpLayoutInfer::ReverseInputsPermuteVector() {
       if (i_src->IsConstTensor()) {
         std::vector<uint8_t> dataRef(i_src->GetSpec().GetByteSize());
         i_src->CopyDataFromTensor(dataRef.data());
-        perm_out = context_->infer_graph_->CreateTensor(i_src->GetSpec(),
-                                                        (const void*)dataRef.data());
+        perm_out = context_->infer_graph_->CreateTensor(
+            i_src->GetSpec(), (const void*)dataRef.data());
         input_pv = MakeShared(i_src->GetShape().size());
       } else {
         perm_out = context_->GetMapedTensor(i_src);

From 84399b90491178c7eb23e13ea97fd67710316587 Mon Sep 17 00:00:00 2001
From: Xiaoran Weng <Xiaoran.Weng@verisilicon.com>
Date: Wed, 20 Dec 2023 17:36:07 +0800
Subject: [PATCH 10/10] Fix typos in layout inference

Type: Code Improvement
---
 src/tim/transform/layout_infer_context.h      |   2 +-
 src/tim/transform/layout_inference.cc         | 170 +++++++++---------
 .../ops/activation_layout_inference.h         |   8 +-
 src/tim/transform/ops/addn_layout_inference.h |   2 +-
 src/tim/transform/ops/arg_layout_inference.h  |   2 +-
 .../ops/batch2space_layout_inference.h        |   4 +-
 .../ops/batchnorm_layout_inference.h          |  12 +-
 .../ops/bidirectional_rnn_layout_inference.h  |   2 +-
 .../ops/broadcast_layout_inference.h          |   2 +-
 .../transform/ops/concat_layout_inferene.h    |   2 +-
 .../transform/ops/conv2d_layout_inference.h   |  12 +-
 .../transform/ops/conv3d_layout_inference.h   |   8 +-
 .../transform/ops/deconv2d_layout_inference.h |  12 +-
 .../transform/ops/default_layout_inference.h  |   2 +-
 .../ops/depth2space_layout_inference.h        |   4 +-
 .../ops/elementwise_layout_inference.h        |   4 +-
 .../ops/fullyconnected_layout_inference.h     |   2 +-
 .../transform/ops/gather_layout_inference.h   |   2 +-
 .../ops/gather_nd_layout_inference.h          |   2 +-
 .../ops/grouped_conv2d_layout_inference.h     |  12 +-
 .../ops/instance_norm_layout_inference.h      |   6 +-
 .../ops/l2normalization_layout_inference.h    |   2 +-
 .../transform/ops/logical_layout_inference.h  |   2 +-
 src/tim/transform/ops/lrn_layout_inference.h  |   2 +-
 src/tim/transform/ops/op_layout_inference.cc  |  16 +-
 src/tim/transform/ops/pad_layout_inference.h  |   2 +-
 .../transform/ops/pad_v2_layout_inference.h   |   2 +-
 .../transform/ops/pool2d_layout_inference.h   |   4 +-
 .../transform/ops/reduce_layout_inference.h   |   2 +-
 .../transform/ops/resize_layout_inference.h   |   4 +-
 .../transform/ops/reverse_layout_inference.h  |   2 +-
 .../ops/roi_align_layout_inference.h          |   6 +-
 .../transform/ops/roi_pool_layout_inference.h |   6 +-
 .../transform/ops/select_layout_inference.h   |   2 +-
 .../ops/simple_ops_layout_inference.h         |   2 +-
 .../transform/ops/slice_layout_inference.h    |   2 +-
 .../transform/ops/softmax_layout_inference.h  |   2 +-
 .../ops/space2batch_layout_inference.h        |   4 +-
 .../ops/space2depth_layout_inference.h        |   4 +-
 .../transform/ops/split_layout_inference.h    |   2 +-
 .../transform/ops/squeeze_layout_inference.h  |   2 +-
 .../transform/ops/stack_layout_inference.h    |   2 +-
 .../ops/stridedslice_layout_inference.h       |   2 +-
 .../ops/transpose_layout_inference.h          |   2 +-
 .../unidirectional_lstm_layout_inference.h    |   2 +-
 .../ops/unidirectional_rnn_layout_inference.h |   2 +-
 .../transform/ops/yolov4_layout_inference.h   |   2 +-
 47 files changed, 177 insertions(+), 177 deletions(-)

diff --git a/src/tim/transform/layout_infer_context.h b/src/tim/transform/layout_infer_context.h
index 6a490777f..41dd2ebf8 100644
--- a/src/tim/transform/layout_infer_context.h
+++ b/src/tim/transform/layout_infer_context.h
@@ -22,7 +22,7 @@ class LayoutInferContext {
   bool IsReadyForInfer(const std::shared_ptr<vx::Operation>& op) const;
   void UpdateTensorMap(const std::shared_ptr<vx::Tensor>& t_src,
                        const std::shared_ptr<vx::Tensor>& t_layout);
-  std::shared_ptr<vx::Tensor> GetMapedTensor(
+  std::shared_ptr<vx::Tensor> GetMappedTensor(
       const std::shared_ptr<vx::Tensor>& t_src) const;
   std::shared_ptr<vx::Tensor> GetMappedGraphInputTensor(
       const std::shared_ptr<vx::Tensor>& t_src) const;
diff --git a/src/tim/transform/layout_inference.cc b/src/tim/transform/layout_inference.cc
index c517e3cc2..5bb11d5ba 100644
--- a/src/tim/transform/layout_inference.cc
+++ b/src/tim/transform/layout_inference.cc
@@ -145,7 +145,7 @@ void LayoutInferContext::UpdateTensorMap(
   tensor_map_[t_src] = t_layout;
 }
 
-std::shared_ptr<vx::Tensor> LayoutInferContext::GetMapedTensor(
+std::shared_ptr<vx::Tensor> LayoutInferContext::GetMappedTensor(
     const std::shared_ptr<vx::Tensor>& t_src) const {
   auto it = tensor_map_.find(t_src);
   if (it != tensor_map_.end()) {
@@ -190,7 +190,7 @@ void LayoutInferContext::UpdateGraphOutputMap(
   graph_output_map_[o_src] = o_layout;
 }
 
-#define REGIST_LAYOUT_INFERENCE(op_idx, name)                     \
+#define REGISTER_LAYOUT_INFERENCE(op_idx, name)                   \
   case op_idx: {                                                  \
     auto op_infer = std::make_shared<name##LayoutInfer>(op, ctx); \
     op_infer->OnInputs(next_tensors);                             \
@@ -198,17 +198,17 @@ void LayoutInferContext::UpdateGraphOutputMap(
     break;                                                        \
   }
 
-#define REGIST_REDUCE_LAYOUT_INFERENCE(op_idx)                      \
+#define REGISTER_REDUCE_LAYOUT_INFERENCE(op_idx)                    \
   case op_idx: {                                                    \
     auto reduce_type = op->impl()->node()->nn_param.reduce.type;    \
     switch (reduce_type) {                                          \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_MEAN, ReduceMean);      \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_MAX, ReduceMax);        \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_MIN, ReduceMin);        \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_PROD, ReduceProd);      \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_ANY, ReduceAny);        \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_SUM, ReduceSum);        \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_ALL, ReduceAll);        \
+      REGISTER_LAYOUT_INFERENCE(VSI_NN_REDUCE_MEAN, ReduceMean);    \
+      REGISTER_LAYOUT_INFERENCE(VSI_NN_REDUCE_MAX, ReduceMax);      \
+      REGISTER_LAYOUT_INFERENCE(VSI_NN_REDUCE_MIN, ReduceMin);      \
+      REGISTER_LAYOUT_INFERENCE(VSI_NN_REDUCE_PROD, ReduceProd);    \
+      REGISTER_LAYOUT_INFERENCE(VSI_NN_REDUCE_ANY, ReduceAny);      \
+      REGISTER_LAYOUT_INFERENCE(VSI_NN_REDUCE_SUM, ReduceSum);      \
+      REGISTER_LAYOUT_INFERENCE(VSI_NN_REDUCE_ALL, ReduceAll);      \
       default:                                                      \
         VSILOGW("Op %d: Default layout inference pass for reduce.", \
                 reduce_type);                                       \
@@ -217,12 +217,12 @@ void LayoutInferContext::UpdateGraphOutputMap(
     break;                                                          \
   }
 
-#define REGIST_LOGICAL_LAYOUT_INFERENCE(op_idx)                         \
+#define REGISTER_LOGICAL_LAYOUT_INFERENCE(op_idx)                       \
   case op_idx: {                                                        \
     auto logical_type = op->impl()->node()->nn_param.relational_ops.op; \
     switch (logical_type) {                                             \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_LOGICAL_AND, LogicalAnd);          \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_LOGICAL_OR, LogicalOr);            \
+      REGISTER_LAYOUT_INFERENCE(VSI_NN_LOGICAL_AND, LogicalAnd);        \
+      REGISTER_LAYOUT_INFERENCE(VSI_NN_LOGICAL_OR, LogicalOr);          \
       default:                                                          \
         VSILOGW("Op %d: Default layout inference pass for logical.",    \
                 logical_type);                                          \
@@ -238,80 +238,80 @@ std::vector<std::shared_ptr<vx::Tensor>> HandleLayoutInfer(
   auto op_id = op->impl()->kind_;
   std::vector<std::shared_ptr<vx::Tensor>> next_tensors;
   switch (op_id) {
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_CONV2D, Conv2d);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_GROUPED_CONV2D, GroupedConv2d);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_RELU, Relu);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_RELU1, Relu1);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_RELU6, Relu6);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_ELU, Elu);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SIGMOID, Sigmoid);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_MISH, Mish);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_HARD_SIGMOID, HardSigmoid);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SOFTRELU, SoftRelu);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SWISH, HardSwish);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_TANH, Tanh);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_LEAKY_RELU, LeakyRelu);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_CONCAT, Concat);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_ADD, Add);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SUBTRACT, Sub);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_MULTIPLY, Multiply);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_DIVIDE, Div);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_POW, Pow);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_MINIMUM, Minimum);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_MAXIMUM, Maximum);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_DATACONVERT, DataConvert);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_NEG, Neg);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_ABS, Abs);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SIN, Sin);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_EXP, Exp);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_LOG, Log);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SQRT, Sqrt);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_RSQRT, Rsqrt);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SQUARE, Square);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_LOGICAL_NOT, LogicalNot);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_POOL, Pool2d);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SOFTMAX, Softmax);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SQUEEZE, Squeeze);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_STACK, Stack);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SPACE2DEPTH, SpaceToDepth);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_DEPTH2SPACE, DepthToSpace);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SPACE2BATCH, Space2Batch);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_BATCH2SPACE, Batch2Space);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_PAD, Pad);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_PAD2, PadV2);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_FCL2, FullyConnected);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_RESIZE, Resize);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SPLIT, Split);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_STRIDED_SLICE, StridedSlice);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_LRN2, LRN);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_L2_NORMALIZE, L2Normalization);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_INSTANCE_NORM, InstanceNorm);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_ROI_ALIGN, RoiAlign);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_ROI_POOL, RoiPool);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_ADDN, AddN);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_PRELU, PRelu);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_GATHER, Gather);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_GATHER_ND, GatherNd);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_REVERSE, Reverse);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SLICE, Slice);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SELECT, Select);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_ARGMAX, Arg);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_ARGMIN, Arg);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_DECONVOLUTION, DeConv2d);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_BATCH_NORM, BatchNorm);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_PERMUTE, Transpose);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_CONV3D, Conv3d);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_LSTM_OVXLIB, UnidirectionalLstm);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_EXPAND_BROADCAST, Broadcast);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_UNIDIRECTIONAL_SEQUENCE_RNN,
-                            UnidirectionalRnn);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_BIDIRECTIONAL_SEQUENCE_RNN,
-                            BidirectionalRnn);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_CONV2D, Conv2d);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_GROUPED_CONV2D, GroupedConv2d);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_RELU, Relu);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_RELU1, Relu1);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_RELU6, Relu6);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_ELU, Elu);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SIGMOID, Sigmoid);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_MISH, Mish);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_HARD_SIGMOID, HardSigmoid);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SOFTRELU, SoftRelu);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SWISH, HardSwish);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_TANH, Tanh);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_LEAKY_RELU, LeakyRelu);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_CONCAT, Concat);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_ADD, Add);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SUBTRACT, Sub);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_MULTIPLY, Multiply);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_DIVIDE, Div);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_POW, Pow);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_MINIMUM, Minimum);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_MAXIMUM, Maximum);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_DATACONVERT, DataConvert);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_NEG, Neg);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_ABS, Abs);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SIN, Sin);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_EXP, Exp);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_LOG, Log);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SQRT, Sqrt);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_RSQRT, Rsqrt);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SQUARE, Square);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_LOGICAL_NOT, LogicalNot);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_POOL, Pool2d);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SOFTMAX, Softmax);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SQUEEZE, Squeeze);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_STACK, Stack);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SPACE2DEPTH, SpaceToDepth);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_DEPTH2SPACE, DepthToSpace);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SPACE2BATCH, Space2Batch);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_BATCH2SPACE, Batch2Space);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_PAD, Pad);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_PAD2, PadV2);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_FCL2, FullyConnected);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_RESIZE, Resize);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SPLIT, Split);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_STRIDED_SLICE, StridedSlice);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_LRN2, LRN);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_L2_NORMALIZE, L2Normalization);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_INSTANCE_NORM, InstanceNorm);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_ROI_ALIGN, RoiAlign);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_ROI_POOL, RoiPool);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_ADDN, AddN);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_PRELU, PRelu);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_GATHER, Gather);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_GATHER_ND, GatherNd);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_REVERSE, Reverse);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SLICE, Slice);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SELECT, Select);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_ARGMAX, Arg);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_ARGMIN, Arg);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_DECONVOLUTION, DeConv2d);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_BATCH_NORM, BatchNorm);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_PERMUTE, Transpose);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_CONV3D, Conv3d);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_LSTM_OVXLIB, UnidirectionalLstm);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_EXPAND_BROADCAST, Broadcast);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_UNIDIRECTIONAL_SEQUENCE_RNN,
+                              UnidirectionalRnn);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_BIDIRECTIONAL_SEQUENCE_RNN,
+                              BidirectionalRnn);
 #ifdef VSI_FEAT_OP_CUSTOM_TINY_YOLOV4_POSTPROCESS
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_CUSTOM_TINY_YOLOV4_POSTPROCESS, Yolov4);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_CUSTOM_TINY_YOLOV4_POSTPROCESS, Yolov4);
 #endif
-    REGIST_LOGICAL_LAYOUT_INFERENCE(VSI_NN_OP_LOGICAL_OPS);
-    REGIST_REDUCE_LAYOUT_INFERENCE(VSI_NN_OP_REDUCE);
+    REGISTER_LOGICAL_LAYOUT_INFERENCE(VSI_NN_OP_LOGICAL_OPS);
+    REGISTER_REDUCE_LAYOUT_INFERENCE(VSI_NN_OP_REDUCE);
     // use default layout inference
     default: {
       VSILOGW("Op %d: default layout inference pass.", op_id);
diff --git a/src/tim/transform/ops/activation_layout_inference.h b/src/tim/transform/ops/activation_layout_inference.h
index 11659bd9b..841822b3a 100644
--- a/src/tim/transform/ops/activation_layout_inference.h
+++ b/src/tim/transform/ops/activation_layout_inference.h
@@ -51,7 +51,7 @@ class ActivationLayoutInfer : public OpLayoutInfer {
     auto activation = op_->Clone(context_->infer_graph_);
     auto out_infer = CreateOutputsTensor(input_pv);
     (*activation)
-        .BindInput(context_->GetMapedTensor(i_src))
+        .BindInput(context_->GetMappedTensor(i_src))
         .BindOutput(out_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], input_pv);
     next_tensors.push_back(op_->impl()->OutputsTensor()[0]);
@@ -107,7 +107,7 @@ class PReluLayoutInfer : public OpLayoutInfer {
             context_->infer_graph_->CreateOperation<vx::ops::Reshape>(
                 boardcast_shape);
         (*reshape)
-            .BindInput(context_->GetMapedTensor(src_slope))
+            .BindInput(context_->GetMappedTensor(src_slope))
             .BindOutput(reshape_out);
         context_->UpdateTensorMap(src_slope, reshape_out);
       }
@@ -130,8 +130,8 @@ class PReluLayoutInfer : public OpLayoutInfer {
     auto out_infer = CreateOutputsTensor(input_pv);
 
     (*prelu)
-        .BindInput(context_->GetMapedTensor(src_input))
-        .BindInput(context_->GetMapedTensor(src_slope));
+        .BindInput(context_->GetMappedTensor(src_input))
+        .BindInput(context_->GetMappedTensor(src_slope));
     (*prelu).BindOutput(out_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], input_pv);
     next_tensors.push_back(op_->impl()->OutputsTensor()[0]);
diff --git a/src/tim/transform/ops/addn_layout_inference.h b/src/tim/transform/ops/addn_layout_inference.h
index 45650ef9d..bdeab29ae 100644
--- a/src/tim/transform/ops/addn_layout_inference.h
+++ b/src/tim/transform/ops/addn_layout_inference.h
@@ -44,7 +44,7 @@ class AddNLayoutInfer : public OpLayoutInfer {
     auto addn = op_->Clone(context_->infer_graph_);
 
     for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*addn).BindInput(context_->GetMapedTensor(i_src));
+      (*addn).BindInput(context_->GetMappedTensor(i_src));
     }
     auto infer_out = CreateOutputsTensor(required_pv);
     (*addn).BindOutput(infer_out[0]);
diff --git a/src/tim/transform/ops/arg_layout_inference.h b/src/tim/transform/ops/arg_layout_inference.h
index af4696762..b05c70b51 100644
--- a/src/tim/transform/ops/arg_layout_inference.h
+++ b/src/tim/transform/ops/arg_layout_inference.h
@@ -45,7 +45,7 @@ class ArgLayoutInfer : public OpLayoutInfer {
 
     auto arg = op_->Clone(context_->infer_graph_);
     auto infer_out = CreateOutputsTensor(input_pv);
-    (*arg).BindInput(context_->GetMapedTensor(src_input));
+    (*arg).BindInput(context_->GetMappedTensor(src_input));
     (*arg).BindOutput(infer_out[0]);
 
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], input_pv);
diff --git a/src/tim/transform/ops/batch2space_layout_inference.h b/src/tim/transform/ops/batch2space_layout_inference.h
index 065ae80f9..6f6b4b62f 100644
--- a/src/tim/transform/ops/batch2space_layout_inference.h
+++ b/src/tim/transform/ops/batch2space_layout_inference.h
@@ -51,7 +51,7 @@ class Batch2SpaceLayoutInfer : public OpLayoutInfer {
     auto final_pv = pv->Reverse()->Add(required_pv);
     if (!final_pv->IsAligned()) {
       auto perm_out =
-          InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+          InsertPermute(context_->GetMappedTensor(input_tensors[0]), final_pv);
       context_->UpdateTensorMap(input_tensors[0], perm_out);
       context_->SetPermuteVector(input_tensors[0], required_pv);
     }
@@ -70,7 +70,7 @@ class Batch2SpaceLayoutInfer : public OpLayoutInfer {
         context_->infer_graph_->CreateOperation<vx::ops::Batch2Space>(
             block_size, crop, vx::DataLayout::WHCN);
     auto out_tensor_infer = CreateOutputsTensor(required_pv);
-    (*batch2space).BindInput(context_->GetMapedTensor(input_tensors[0]));
+    (*batch2space).BindInput(context_->GetMappedTensor(input_tensors[0]));
     (*batch2space).BindOutput(out_tensor_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
     // Add out tensor of src_graph into next_tensor
diff --git a/src/tim/transform/ops/batchnorm_layout_inference.h b/src/tim/transform/ops/batchnorm_layout_inference.h
index e01d416f5..7e9ac8fde 100644
--- a/src/tim/transform/ops/batchnorm_layout_inference.h
+++ b/src/tim/transform/ops/batchnorm_layout_inference.h
@@ -57,7 +57,7 @@ class BatchNormLayoutInfer : public OpLayoutInfer {
             perm_out = context_->infer_graph_->CreateTensor(src_in->GetSpec(), (const void*)dataRef.data());
             input_pv = MakeShared(src_in->GetShape().size());
         } else {
-          perm_out = context_->GetMapedTensor(src_in);
+          perm_out = context_->GetMappedTensor(src_in);
           input_pv = context_->GetPermuteVector(src_in);
           context_->SetPermuteVector(src_in, input_pv);
           if (idx == 0) {
@@ -73,11 +73,11 @@ class BatchNormLayoutInfer : public OpLayoutInfer {
 
     auto batchnorm = op_->Clone(context_->infer_graph_);
     auto out_tensor_infer = CreateOutputsTensor(required_pv);
-    (*batchnorm).BindInput(context_->GetMapedTensor(input_tensors[0]));
-    (*batchnorm).BindInput(context_->GetMapedTensor(input_tensors[1]));
-    (*batchnorm).BindInput(context_->GetMapedTensor(input_tensors[2]));
-    (*batchnorm).BindInput(context_->GetMapedTensor(input_tensors[3]));
-    (*batchnorm).BindInput(context_->GetMapedTensor(input_tensors[4]));
+    (*batchnorm).BindInput(context_->GetMappedTensor(input_tensors[0]));
+    (*batchnorm).BindInput(context_->GetMappedTensor(input_tensors[1]));
+    (*batchnorm).BindInput(context_->GetMappedTensor(input_tensors[2]));
+    (*batchnorm).BindInput(context_->GetMappedTensor(input_tensors[3]));
+    (*batchnorm).BindInput(context_->GetMappedTensor(input_tensors[4]));
 
     (*batchnorm).BindOutput(out_tensor_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
diff --git a/src/tim/transform/ops/bidirectional_rnn_layout_inference.h b/src/tim/transform/ops/bidirectional_rnn_layout_inference.h
index 7097b73d7..70d8ca06c 100644
--- a/src/tim/transform/ops/bidirectional_rnn_layout_inference.h
+++ b/src/tim/transform/ops/bidirectional_rnn_layout_inference.h
@@ -72,7 +72,7 @@ class BidirectionalRnnLayoutInfer : public OpLayoutInfer {
 
 
     for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*cloned_op).BindInput(context_->GetMapedTensor(i_src));
+      (*cloned_op).BindInput(context_->GetMappedTensor(i_src));
     }
 
     
diff --git a/src/tim/transform/ops/broadcast_layout_inference.h b/src/tim/transform/ops/broadcast_layout_inference.h
index 41c034bb6..6db12adea 100644
--- a/src/tim/transform/ops/broadcast_layout_inference.h
+++ b/src/tim/transform/ops/broadcast_layout_inference.h
@@ -46,7 +46,7 @@ class BroadcastLayoutInfer : public OpLayoutInfer {
     auto cloned_op = op_->Clone(context_->infer_graph_);
 
     for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*cloned_op).BindInput(context_->GetMapedTensor(i_src));
+      (*cloned_op).BindInput(context_->GetMappedTensor(i_src));
     }
 
     std::vector<std::shared_ptr<IPermuteVector>> required_pv_lst;
diff --git a/src/tim/transform/ops/concat_layout_inferene.h b/src/tim/transform/ops/concat_layout_inferene.h
index d08aea17b..db26d7e93 100644
--- a/src/tim/transform/ops/concat_layout_inferene.h
+++ b/src/tim/transform/ops/concat_layout_inferene.h
@@ -47,7 +47,7 @@ class ConcatLayoutInfer : public OpLayoutInfer {
     auto concat = context_->infer_graph_->CreateOperation<vx::ops::Concat>(
         axis, op_->impl()->InputsTensor().size());
     for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*concat).BindInput(context_->GetMapedTensor(i_src));
+      (*concat).BindInput(context_->GetMappedTensor(i_src));
     }
     auto out_infer = CreateOutputsTensor(required_pv);
     (*concat).BindOutput(out_infer[0]);
diff --git a/src/tim/transform/ops/conv2d_layout_inference.h b/src/tim/transform/ops/conv2d_layout_inference.h
index 96b46abeb..46fc8acc7 100644
--- a/src/tim/transform/ops/conv2d_layout_inference.h
+++ b/src/tim/transform/ops/conv2d_layout_inference.h
@@ -79,10 +79,10 @@ class Conv2dLayoutInfer : public OpLayoutInfer {
     auto final_pv = input_pv->Reverse()->Add(required_pv);
     if (!final_pv->IsAligned()) {
       infer_input =
-          InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+          InsertPermute(context_->GetMappedTensor(input_tensors[0]), final_pv);
       context_->SetPermuteVector(input_tensors[0], required_pv);
     } else {
-      infer_input = context_->GetMapedTensor(input_tensors[0]);
+      infer_input = context_->GetMappedTensor(input_tensors[0]);
       context_->SetPermuteVector(input_tensors[0], input_pv);
     }
     context_->UpdateTensorMap(input_tensors[0], infer_input);
@@ -104,10 +104,10 @@ class Conv2dLayoutInfer : public OpLayoutInfer {
       auto final_pv = weight_pv->Reverse()->Add(weight_required_pv);
       if (!final_pv->IsAligned()) {
         infer_weight =
-            InsertPermute(context_->GetMapedTensor(input_tensors[1]), final_pv);
+            InsertPermute(context_->GetMappedTensor(input_tensors[1]), final_pv);
         context_->SetPermuteVector(input_tensors[1], weight_required_pv);
       } else {
-        infer_weight = context_->GetMapedTensor(input_tensors[1]);
+        infer_weight = context_->GetMappedTensor(input_tensors[1]);
         context_->SetPermuteVector(input_tensors[1], weight_pv);
       }
       context_->UpdateTensorMap(input_tensors[1], infer_weight);
@@ -121,7 +121,7 @@ class Conv2dLayoutInfer : public OpLayoutInfer {
         infer_bias = context_->infer_graph_->CreateTensor(
             input_tensors[2]->GetSpec(), (const void*)dataRef.data());
       } else {
-        infer_bias = context_->GetMapedTensor(input_tensors[2]);
+        infer_bias = context_->GetMappedTensor(input_tensors[2]);
       }
       auto bias_pv = MakeShared(1);
       context_->UpdateTensorMap(input_tensors[2], infer_bias);
@@ -131,7 +131,7 @@ class Conv2dLayoutInfer : public OpLayoutInfer {
     auto conv2d = op_->Clone(context_->infer_graph_);
     auto otensor_infer = CreateOutputsTensor(required_pv);
     for (const auto& i_src : input_tensors) {
-      (*conv2d).BindInput(context_->GetMapedTensor(i_src));
+      (*conv2d).BindInput(context_->GetMappedTensor(i_src));
     }
     (*conv2d).BindOutput(otensor_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
diff --git a/src/tim/transform/ops/conv3d_layout_inference.h b/src/tim/transform/ops/conv3d_layout_inference.h
index 806199dbc..5b005563f 100644
--- a/src/tim/transform/ops/conv3d_layout_inference.h
+++ b/src/tim/transform/ops/conv3d_layout_inference.h
@@ -81,7 +81,7 @@ class Conv3dLayoutInfer : public OpLayoutInfer {
       } else {
         // For bias
         if (in->GetShape().size() == 1) {
-          infer_tensor = context_->GetMapedTensor(in);
+          infer_tensor = context_->GetMappedTensor(in);
           trans_pv = MakeShared(1);
         } else {
           // For input/weight
@@ -89,10 +89,10 @@ class Conv3dLayoutInfer : public OpLayoutInfer {
           auto final_pv = pv->Reverse()->Add(required_pv);
           if (!final_pv->IsAligned()) {
             infer_tensor =
-                InsertPermute(context_->GetMapedTensor(in), final_pv);
+                InsertPermute(context_->GetMappedTensor(in), final_pv);
             trans_pv = required_pv;
           } else {
-            infer_tensor = context_->GetMapedTensor(in);
+            infer_tensor = context_->GetMappedTensor(in);
             trans_pv = pv;
           }
         }
@@ -131,7 +131,7 @@ class Conv3dLayoutInfer : public OpLayoutInfer {
         vx::DataLayout::WHDCN, vx::DataLayout::WHDIcOc);
     auto otensor_infer = CreateOutputsTensor(required_pv);
     for (const auto& i_src : input_tensors) {
-      (*conv3d).BindInput(context_->GetMapedTensor(i_src));
+      (*conv3d).BindInput(context_->GetMappedTensor(i_src));
     }
     (*conv3d).BindOutput(otensor_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
diff --git a/src/tim/transform/ops/deconv2d_layout_inference.h b/src/tim/transform/ops/deconv2d_layout_inference.h
index 8788c1df1..ba2ea9e09 100644
--- a/src/tim/transform/ops/deconv2d_layout_inference.h
+++ b/src/tim/transform/ops/deconv2d_layout_inference.h
@@ -79,10 +79,10 @@ class DeConv2dLayoutInfer : public OpLayoutInfer {
     auto final_pv = input_pv->Reverse()->Add(required_pv);
     if (!final_pv->IsAligned()) {
       infer_input =
-          InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+          InsertPermute(context_->GetMappedTensor(input_tensors[0]), final_pv);
       context_->SetPermuteVector(input_tensors[0], required_pv);
     } else {
-      infer_input = context_->GetMapedTensor(input_tensors[0]);
+      infer_input = context_->GetMappedTensor(input_tensors[0]);
       context_->SetPermuteVector(input_tensors[0], input_pv);
     }
     context_->UpdateTensorMap(input_tensors[0], infer_input);
@@ -104,10 +104,10 @@ class DeConv2dLayoutInfer : public OpLayoutInfer {
       auto final_pv = weight_pv->Reverse()->Add(weight_required_pv);
       if (!final_pv->IsAligned()) {
         infer_weight =
-            InsertPermute(context_->GetMapedTensor(input_tensors[1]), final_pv);
+            InsertPermute(context_->GetMappedTensor(input_tensors[1]), final_pv);
         context_->SetPermuteVector(input_tensors[1], weight_required_pv);
       } else {
-        infer_weight = context_->GetMapedTensor(input_tensors[1]);
+        infer_weight = context_->GetMappedTensor(input_tensors[1]);
         context_->SetPermuteVector(input_tensors[1], weight_pv);
       }
       context_->UpdateTensorMap(input_tensors[1], infer_weight);
@@ -121,7 +121,7 @@ class DeConv2dLayoutInfer : public OpLayoutInfer {
         infer_bias = context_->infer_graph_->CreateTensor(
             input_tensors[2]->GetSpec(), (const void*)dataRef.data());
       } else {
-        infer_bias = context_->GetMapedTensor(input_tensors[2]);
+        infer_bias = context_->GetMappedTensor(input_tensors[2]);
       }
       auto bias_pv = MakeShared(1);
       context_->UpdateTensorMap(input_tensors[2], infer_bias);
@@ -131,7 +131,7 @@ class DeConv2dLayoutInfer : public OpLayoutInfer {
     auto deconv = op_->Clone(context_->infer_graph_);
     auto infer_out = CreateOutputsTensor(required_pv);
     for (const auto& i_src : input_tensors) {
-      (*deconv).BindInput(context_->GetMapedTensor(i_src));
+      (*deconv).BindInput(context_->GetMappedTensor(i_src));
     }
     (*deconv).BindOutput(infer_out[0]);
 
diff --git a/src/tim/transform/ops/default_layout_inference.h b/src/tim/transform/ops/default_layout_inference.h
index 2ae29a3f9..391cb5cde 100644
--- a/src/tim/transform/ops/default_layout_inference.h
+++ b/src/tim/transform/ops/default_layout_inference.h
@@ -53,7 +53,7 @@ class DefaultLayoutInfer : public OpLayoutInfer {
     auto cloned_op = op_->Clone(context_->infer_graph_);
 
     for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*cloned_op).BindInput(context_->GetMapedTensor(i_src));
+      (*cloned_op).BindInput(context_->GetMappedTensor(i_src));
     }
 
     std::vector<std::shared_ptr<IPermuteVector>> required_pv_lst;
diff --git a/src/tim/transform/ops/depth2space_layout_inference.h b/src/tim/transform/ops/depth2space_layout_inference.h
index 8d42390d1..a94bf340a 100644
--- a/src/tim/transform/ops/depth2space_layout_inference.h
+++ b/src/tim/transform/ops/depth2space_layout_inference.h
@@ -52,7 +52,7 @@ class DepthToSpaceLayoutInfer : public OpLayoutInfer {
     auto final_pv = pv->Reverse()->Add(required_pv);
     if (!final_pv->IsAligned()) {
       auto perm_out =
-          InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+          InsertPermute(context_->GetMappedTensor(input_tensors[0]), final_pv);
       context_->UpdateTensorMap(input_tensors[0], perm_out);
       context_->SetPermuteVector(input_tensors[0], required_pv);
     }
@@ -63,7 +63,7 @@ class DepthToSpaceLayoutInfer : public OpLayoutInfer {
         context_->infer_graph_->CreateOperation<vx::ops::DepthToSpace>(
             block_size, vx::DataLayout::WHCN);
     auto out_tensor_infer = CreateOutputsTensor(required_pv);
-    (*space2depth).BindInput(context_->GetMapedTensor(input_tensors[0]));
+    (*space2depth).BindInput(context_->GetMappedTensor(input_tensors[0]));
     (*space2depth).BindOutput(out_tensor_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
     // Add out tensor of src_graph into next_tensor
diff --git a/src/tim/transform/ops/elementwise_layout_inference.h b/src/tim/transform/ops/elementwise_layout_inference.h
index 1248e8639..5941f831d 100644
--- a/src/tim/transform/ops/elementwise_layout_inference.h
+++ b/src/tim/transform/ops/elementwise_layout_inference.h
@@ -71,7 +71,7 @@ class ElementWiseLayoutInfer : public OpLayoutInfer {
     auto required_pv = AlignPermuteVectorForElementWise();
     auto elementwise = context_->infer_graph_->CreateOperation<OpType>();
     for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*elementwise).BindInput(context_->GetMapedTensor(i_src));
+      (*elementwise).BindInput(context_->GetMappedTensor(i_src));
     }
     auto out_infer = CreateOutputsTensor(required_pv);
     (*elementwise).BindOutput(out_infer[0]);
@@ -120,7 +120,7 @@ class MultiplyLayoutInfer : public OpLayoutInfer {
         context_->infer_graph_->CreateOperation<tim::vx::ops::Multiply>(
             op_->impl()->node()->nn_param.multiply.scale);
     for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*multiply).BindInput(context_->GetMapedTensor(i_src));
+      (*multiply).BindInput(context_->GetMappedTensor(i_src));
     }
     auto out_infer = CreateOutputsTensor(required_pv);
     (*multiply).BindOutput(out_infer[0]);
diff --git a/src/tim/transform/ops/fullyconnected_layout_inference.h b/src/tim/transform/ops/fullyconnected_layout_inference.h
index 8b838888a..32e87fd09 100644
--- a/src/tim/transform/ops/fullyconnected_layout_inference.h
+++ b/src/tim/transform/ops/fullyconnected_layout_inference.h
@@ -65,7 +65,7 @@ class FullyConnectedLayoutInfer : public OpLayoutInfer {
         MakeShared(op_->impl()->OutputsTensor()[0]->GetShape().size());
     auto out_infer = CreateOutputsTensor(required_pv);
     for (auto in : op_->impl()->InputsTensor()) {
-      (*fcl).BindInput(context_->GetMapedTensor(in));
+      (*fcl).BindInput(context_->GetMappedTensor(in));
     }
     (*fcl).BindOutput(out_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
diff --git a/src/tim/transform/ops/gather_layout_inference.h b/src/tim/transform/ops/gather_layout_inference.h
index 7ef544062..3f28c4d39 100644
--- a/src/tim/transform/ops/gather_layout_inference.h
+++ b/src/tim/transform/ops/gather_layout_inference.h
@@ -45,7 +45,7 @@ class GatherLayoutInfer : public OpLayoutInfer {
         op_->impl()->node()->nn_param.gather.batch_dims);
     int32_t output_rank = -1;
     for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*gather).BindInput(context_->GetMapedTensor(i_src));
+      (*gather).BindInput(context_->GetMappedTensor(i_src));
       output_rank += i_src->GetShape().size();
     }
     auto infer_out = CreateOutputsTensor(
diff --git a/src/tim/transform/ops/gather_nd_layout_inference.h b/src/tim/transform/ops/gather_nd_layout_inference.h
index cebf74ad4..201cc0bdc 100644
--- a/src/tim/transform/ops/gather_nd_layout_inference.h
+++ b/src/tim/transform/ops/gather_nd_layout_inference.h
@@ -46,7 +46,7 @@ class GatherNdLayoutInfer : public OpLayoutInfer {
 
     auto gather = context_->infer_graph_->CreateOperation<vx::ops::GatherNd>();
     for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*gather).BindInput(context_->GetMapedTensor(i_src));
+      (*gather).BindInput(context_->GetMappedTensor(i_src));
     }
     auto infer_out = CreateOutputsTensor(
         context_->GetPermuteVector(op_->impl()->InputsTensor()[0]));
diff --git a/src/tim/transform/ops/grouped_conv2d_layout_inference.h b/src/tim/transform/ops/grouped_conv2d_layout_inference.h
index b2df948b2..30243481b 100644
--- a/src/tim/transform/ops/grouped_conv2d_layout_inference.h
+++ b/src/tim/transform/ops/grouped_conv2d_layout_inference.h
@@ -79,10 +79,10 @@ class GroupedConv2dLayoutInfer : public OpLayoutInfer {
     auto final_pv = input_pv->Reverse()->Add(required_pv);
     if (!final_pv->IsAligned()) {
       infer_input =
-          InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+          InsertPermute(context_->GetMappedTensor(input_tensors[0]), final_pv);
       context_->SetPermuteVector(input_tensors[0], required_pv);
     } else {
-      infer_input = context_->GetMapedTensor(input_tensors[0]);
+      infer_input = context_->GetMappedTensor(input_tensors[0]);
       context_->SetPermuteVector(input_tensors[0], input_pv);
     }
     context_->UpdateTensorMap(input_tensors[0], infer_input);
@@ -104,10 +104,10 @@ class GroupedConv2dLayoutInfer : public OpLayoutInfer {
       auto final_pv = weight_pv->Reverse()->Add(weight_required_pv);
       if (!final_pv->IsAligned()) {
         infer_weight =
-            InsertPermute(context_->GetMapedTensor(input_tensors[1]), final_pv);
+            InsertPermute(context_->GetMappedTensor(input_tensors[1]), final_pv);
         context_->SetPermuteVector(input_tensors[1], weight_required_pv);
       } else {
-        infer_weight = context_->GetMapedTensor(input_tensors[1]);
+        infer_weight = context_->GetMappedTensor(input_tensors[1]);
         context_->SetPermuteVector(input_tensors[1], weight_pv);
       }
       context_->UpdateTensorMap(input_tensors[1], infer_weight);
@@ -121,7 +121,7 @@ class GroupedConv2dLayoutInfer : public OpLayoutInfer {
         infer_bias = context_->infer_graph_->CreateTensor(
             input_tensors[2]->GetSpec(), (const void*)dataRef.data());
       } else {
-        infer_bias = context_->GetMapedTensor(input_tensors[2]);
+        infer_bias = context_->GetMappedTensor(input_tensors[2]);
       }
       auto bias_pv = MakeShared(1);
       context_->UpdateTensorMap(input_tensors[2], infer_bias);
@@ -131,7 +131,7 @@ class GroupedConv2dLayoutInfer : public OpLayoutInfer {
     auto grouped_conv2d = op_->Clone(context_->infer_graph_);
     auto otensor_infer = CreateOutputsTensor(required_pv);
     for (const auto& i_src : input_tensors) {
-      (*grouped_conv2d).BindInput(context_->GetMapedTensor(i_src));
+      (*grouped_conv2d).BindInput(context_->GetMappedTensor(i_src));
     }
     (*grouped_conv2d).BindOutput(otensor_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
diff --git a/src/tim/transform/ops/instance_norm_layout_inference.h b/src/tim/transform/ops/instance_norm_layout_inference.h
index bb8b73d5d..f0d3df75a 100644
--- a/src/tim/transform/ops/instance_norm_layout_inference.h
+++ b/src/tim/transform/ops/instance_norm_layout_inference.h
@@ -63,10 +63,10 @@ class InstanceNormLayoutInfer : public OpLayoutInfer {
     auto final_pv = input_pv->Reverse()->Add(required_pv);
     std::shared_ptr<vx::Tensor> infer_input;
     if (!final_pv->IsAligned()) {
-      infer_input = InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+      infer_input = InsertPermute(context_->GetMappedTensor(input_tensors[0]), final_pv);
       context_->SetPermuteVector(input_tensors[0], required_pv);
     } else {
-      infer_input = context_->GetMapedTensor(input_tensors[0]);
+      infer_input = context_->GetMappedTensor(input_tensors[0]);
       context_->SetPermuteVector(input_tensors[0], input_pv);
     }
     context_->UpdateTensorMap(input_tensors[0], infer_input);
@@ -85,7 +85,7 @@ class InstanceNormLayoutInfer : public OpLayoutInfer {
     auto instance_norm = op_->Clone(context_->infer_graph_);
     auto outs_infer = CreateOutputsTensor(required_pv);
     for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*instance_norm).BindInput(context_->GetMapedTensor(i_src));
+      (*instance_norm).BindInput(context_->GetMappedTensor(i_src));
     }
     (*instance_norm).BindOutput(outs_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
diff --git a/src/tim/transform/ops/l2normalization_layout_inference.h b/src/tim/transform/ops/l2normalization_layout_inference.h
index 00148edd2..cfdcacc51 100644
--- a/src/tim/transform/ops/l2normalization_layout_inference.h
+++ b/src/tim/transform/ops/l2normalization_layout_inference.h
@@ -47,7 +47,7 @@ class L2NormalizationLayoutInfer : public OpLayoutInfer {
     auto l2norm =
         context_->infer_graph_->CreateOperation<vx::ops::L2Normalization>(axis);
     auto infer_out = CreateOutputsTensor(input_pv);
-    (*l2norm).BindInput(context_->GetMapedTensor(src_input));
+    (*l2norm).BindInput(context_->GetMappedTensor(src_input));
     (*l2norm).BindOutput(infer_out[0]);
 
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], input_pv);
diff --git a/src/tim/transform/ops/logical_layout_inference.h b/src/tim/transform/ops/logical_layout_inference.h
index ee140a1ca..ef7754d7e 100644
--- a/src/tim/transform/ops/logical_layout_inference.h
+++ b/src/tim/transform/ops/logical_layout_inference.h
@@ -71,7 +71,7 @@ class LogicalOpsLayoutInfer : public OpLayoutInfer {
     auto infer_out = CreateOutputsTensor(required_pv);
     auto logical_op = context_->infer_graph_->CreateOperation<OpTpye>();
     for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*logical_op).BindInput(context_->GetMapedTensor(i_src));
+      (*logical_op).BindInput(context_->GetMappedTensor(i_src));
     }
     (*logical_op).BindOutput(infer_out[0]);
 
diff --git a/src/tim/transform/ops/lrn_layout_inference.h b/src/tim/transform/ops/lrn_layout_inference.h
index 74b97fbbf..79749cca5 100644
--- a/src/tim/transform/ops/lrn_layout_inference.h
+++ b/src/tim/transform/ops/lrn_layout_inference.h
@@ -53,7 +53,7 @@ class LRNLayoutInfer : public OpLayoutInfer {
                    ->CreateOperation<vx::ops::LocalResponseNormalization>(
                        size, alpha, beta, bias, axis);
     auto infer_out = CreateOutputsTensor(input_pv);
-    (*lrn).BindInput(context_->GetMapedTensor(src_input));
+    (*lrn).BindInput(context_->GetMappedTensor(src_input));
     (*lrn).BindOutput(infer_out[0]);
 
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], input_pv);
diff --git a/src/tim/transform/ops/op_layout_inference.cc b/src/tim/transform/ops/op_layout_inference.cc
index d3df200d0..55ac6e44c 100644
--- a/src/tim/transform/ops/op_layout_inference.cc
+++ b/src/tim/transform/ops/op_layout_inference.cc
@@ -42,7 +42,7 @@ void OpLayoutInfer::OnOutputs(
         std::find(graph_outputs.cbegin(), graph_outputs.cend(), out)) {
       auto pv = context_->GetPermuteVector(out);
       if (!pv->IsAligned()) {
-        auto perm_out = InsertPermute(context_->GetMapedTensor(out),
+        auto perm_out = InsertPermute(context_->GetMappedTensor(out),
                                       pv->Reverse(), true, out);
         context_->UpdateTensorMap(out, perm_out);
       }
@@ -100,7 +100,7 @@ std::vector<std::shared_ptr<vx::Tensor>> OpLayoutInfer::CreateOutputsTensor(
 
     std::shared_ptr<vx::Tensor> t_infer;
     if (out_spec.GetTensorAttribute() == vx::OUTPUT) {
-      t_infer = context_->GetMapedTensor(o);
+      t_infer = context_->GetMappedTensor(o);
     } else {
       t_infer = context_->infer_graph_->CreateTensor(out_spec);
       context_->UpdateTensorMap(o, t_infer);
@@ -128,7 +128,7 @@ std::vector<std::shared_ptr<vx::Tensor>> OpLayoutInfer::CreateOutputsTensor(
 
     std::shared_ptr<vx::Tensor> t_infer;
     if (out_spec.GetTensorAttribute() == vx::OUTPUT) {
-      t_infer = context_->GetMapedTensor(o);
+      t_infer = context_->GetMappedTensor(o);
     } else {
       t_infer = context_->infer_graph_->CreateTensor(out_spec);
       context_->UpdateTensorMap(o, t_infer);
@@ -226,9 +226,9 @@ OpLayoutInfer::AlignPermuteVectorForMutilInputs() {
       } else {
         auto final_pv =
             context_->GetPermuteVector(i_src)->Reverse()->Add(required_pv);
-        final_pv->IsAligned() ? perm_out = context_->GetMapedTensor(i_src)
+        final_pv->IsAligned() ? perm_out = context_->GetMappedTensor(i_src)
                               : perm_out = InsertPermute(
-                                    context_->GetMapedTensor(i_src), final_pv);
+                                    context_->GetMappedTensor(i_src), final_pv);
       }
       context_->UpdateTensorMap(i_src, perm_out);
       context_->SetPermuteVector(i_src, required_pv);
@@ -274,8 +274,8 @@ OpLayoutInfer::AlignPermuteVectorForElementWise() {
       auto final_pv =
           context_->GetPermuteVector(i_src)->Reverse()->Add(required_pv);
       final_pv->IsAligned()
-          ? perm_out = context_->GetMapedTensor(i_src)
-          : perm_out = InsertPermute(context_->GetMapedTensor(i_src), final_pv);
+          ? perm_out = context_->GetMappedTensor(i_src)
+          : perm_out = InsertPermute(context_->GetMappedTensor(i_src), final_pv);
     }
     context_->UpdateTensorMap(i_src, perm_out);
     context_->SetPermuteVector(i_src, required_pv);
@@ -295,7 +295,7 @@ void OpLayoutInfer::ReverseInputsPermuteVector() {
             i_src->GetSpec(), (const void*)dataRef.data());
         input_pv = MakeShared(i_src->GetShape().size());
       } else {
-        perm_out = context_->GetMapedTensor(i_src);
+        perm_out = context_->GetMappedTensor(i_src);
         input_pv = context_->GetPermuteVector(i_src);
         if (!input_pv->IsAligned()) {
           perm_out = InsertPermute(perm_out, input_pv->Reverse());
diff --git a/src/tim/transform/ops/pad_layout_inference.h b/src/tim/transform/ops/pad_layout_inference.h
index 8e041d1b8..d44ed7a41 100644
--- a/src/tim/transform/ops/pad_layout_inference.h
+++ b/src/tim/transform/ops/pad_layout_inference.h
@@ -63,7 +63,7 @@ class PadLayoutInfer : public OpLayoutInfer {
         front_size, back_size, pad_value, pad_mode);
 
     auto out_infer = CreateOutputsTensor(input_pv);
-    (*pad).BindInput(context_->GetMapedTensor(i_src));
+    (*pad).BindInput(context_->GetMappedTensor(i_src));
     (*pad).BindOutput(out_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], input_pv);
     next_tensors.push_back(op_->impl()->OutputsTensor()[0]);
diff --git a/src/tim/transform/ops/pad_v2_layout_inference.h b/src/tim/transform/ops/pad_v2_layout_inference.h
index d9bad6afc..0faa91882 100644
--- a/src/tim/transform/ops/pad_v2_layout_inference.h
+++ b/src/tim/transform/ops/pad_v2_layout_inference.h
@@ -61,7 +61,7 @@ class PadV2LayoutInfer : public OpLayoutInfer {
     auto pad_v2 = context_->infer_graph_->CreateOperation<vx::ops::PadV2>(
         front_size, back_size, pad_value);
     auto out_infer = CreateOutputsTensor(input_pv);
-    (*pad_v2).BindInput(context_->GetMapedTensor(i_src));
+    (*pad_v2).BindInput(context_->GetMappedTensor(i_src));
     (*pad_v2).BindOutput(out_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], input_pv);
     next_tensors.push_back(op_->impl()->OutputsTensor()[0]);
diff --git a/src/tim/transform/ops/pool2d_layout_inference.h b/src/tim/transform/ops/pool2d_layout_inference.h
index 406cd3ce6..b2efc9add 100644
--- a/src/tim/transform/ops/pool2d_layout_inference.h
+++ b/src/tim/transform/ops/pool2d_layout_inference.h
@@ -50,7 +50,7 @@ class Pool2dLayoutInfer : public OpLayoutInfer {
     auto final_pv = pv->Reverse()->Add(required_pv);
     if (!final_pv->IsAligned()) {
       auto perm_out =
-          InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+          InsertPermute(context_->GetMappedTensor(input_tensors[0]), final_pv);
       context_->UpdateTensorMap(input_tensors[0], perm_out);
       context_->SetPermuteVector(input_tensors[0], required_pv);
     }
@@ -82,7 +82,7 @@ class Pool2dLayoutInfer : public OpLayoutInfer {
           pool_type, pad_type, ksize, stride, round_type, vx::DataLayout::WHCN);
     }
     auto otensor_infer = CreateOutputsTensor(required_pv);
-    (*pool2d).BindInput(context_->GetMapedTensor(input_tensors[0]));
+    (*pool2d).BindInput(context_->GetMappedTensor(input_tensors[0]));
     (*pool2d).BindOutput(otensor_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
     // Add out tensor of src_graph into next_tensor
diff --git a/src/tim/transform/ops/reduce_layout_inference.h b/src/tim/transform/ops/reduce_layout_inference.h
index 6766985dc..e0df3fec6 100644
--- a/src/tim/transform/ops/reduce_layout_inference.h
+++ b/src/tim/transform/ops/reduce_layout_inference.h
@@ -60,7 +60,7 @@ class ReduceLayoutInfer : public OpLayoutInfer {
     }
     auto reduce = context_->infer_graph_->CreateOperation<OpType>(
         new_axis, op_->impl()->node()->nn_param.reduce.keep_dim);
-    (*reduce).BindInput(context_->GetMapedTensor(t_src));
+    (*reduce).BindInput(context_->GetMappedTensor(t_src));
 
     if (op_->impl()->node()->nn_param.reduce.keep_dim) {
       auto otensor_infer = CreateOutputsTensor(pv);
diff --git a/src/tim/transform/ops/resize_layout_inference.h b/src/tim/transform/ops/resize_layout_inference.h
index ada33b7cb..e3b2809eb 100644
--- a/src/tim/transform/ops/resize_layout_inference.h
+++ b/src/tim/transform/ops/resize_layout_inference.h
@@ -51,7 +51,7 @@ class ResizeLayoutInfer : public OpLayoutInfer {
     auto final_pv = input_pv->Reverse()->Add(required_pv);
 
     if (!final_pv->IsAligned()) {
-      auto perm_out = InsertPermute(context_->GetMapedTensor(i_src), final_pv);
+      auto perm_out = InsertPermute(context_->GetMappedTensor(i_src), final_pv);
       context_->UpdateTensorMap(i_src, perm_out);
       context_->SetPermuteVector(i_src, final_pv);
     }
@@ -70,7 +70,7 @@ class ResizeLayoutInfer : public OpLayoutInfer {
         target_width);
 
     auto out_infer = CreateOutputsTensor(required_pv);
-    (*resize).BindInput(context_->GetMapedTensor(i_src));
+    (*resize).BindInput(context_->GetMappedTensor(i_src));
     (*resize).BindOutput(out_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
     next_tensors.push_back(op_->impl()->OutputsTensor()[0]);
diff --git a/src/tim/transform/ops/reverse_layout_inference.h b/src/tim/transform/ops/reverse_layout_inference.h
index 3ff961d6c..a9bf2882c 100644
--- a/src/tim/transform/ops/reverse_layout_inference.h
+++ b/src/tim/transform/ops/reverse_layout_inference.h
@@ -49,7 +49,7 @@ class ReverseLayoutInfer : public OpLayoutInfer {
 
     auto reverse = context_->infer_graph_->CreateOperation<vx::ops::Reverse>(
         axis);
-    (*reverse).BindInput(context_->GetMapedTensor(src_input));
+    (*reverse).BindInput(context_->GetMappedTensor(src_input));
     auto infer_out = CreateOutputsTensor(input_pv);
     (*reverse).BindOutput(infer_out[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], input_pv);
diff --git a/src/tim/transform/ops/roi_align_layout_inference.h b/src/tim/transform/ops/roi_align_layout_inference.h
index 8e3d706bb..3f3cd3b96 100644
--- a/src/tim/transform/ops/roi_align_layout_inference.h
+++ b/src/tim/transform/ops/roi_align_layout_inference.h
@@ -62,10 +62,10 @@ class RoiAlignLayoutInfer : public OpLayoutInfer {
     auto final_pv = input_pv->Reverse()->Add(required_pv);
     std::shared_ptr<vx::Tensor> infer_input;
     if (!final_pv->IsAligned()) {
-      infer_input = InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+      infer_input = InsertPermute(context_->GetMappedTensor(input_tensors[0]), final_pv);
       context_->SetPermuteVector(input_tensors[0], required_pv);
     } else {
-      infer_input = context_->GetMapedTensor(input_tensors[0]);
+      infer_input = context_->GetMappedTensor(input_tensors[0]);
       context_->SetPermuteVector(input_tensors[0], input_pv);
     }
     context_->UpdateTensorMap(input_tensors[0], infer_input);
@@ -84,7 +84,7 @@ class RoiAlignLayoutInfer : public OpLayoutInfer {
     auto roi_align = op_->Clone(context_->infer_graph_);
     auto outs_infer = CreateOutputsTensor(required_pv);
     for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*roi_align).BindInput(context_->GetMapedTensor(i_src));
+      (*roi_align).BindInput(context_->GetMappedTensor(i_src));
     }
     (*roi_align).BindOutput(outs_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
diff --git a/src/tim/transform/ops/roi_pool_layout_inference.h b/src/tim/transform/ops/roi_pool_layout_inference.h
index ac4d25bc4..56d3028f6 100644
--- a/src/tim/transform/ops/roi_pool_layout_inference.h
+++ b/src/tim/transform/ops/roi_pool_layout_inference.h
@@ -62,10 +62,10 @@ class RoiPoolLayoutInfer : public OpLayoutInfer {
     auto final_pv = input_pv->Reverse()->Add(required_pv);
     std::shared_ptr<vx::Tensor> infer_input;
     if (!final_pv->IsAligned()) {
-      infer_input = InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+      infer_input = InsertPermute(context_->GetMappedTensor(input_tensors[0]), final_pv);
       context_->SetPermuteVector(input_tensors[0], required_pv);
     } else {
-      infer_input = context_->GetMapedTensor(input_tensors[0]);
+      infer_input = context_->GetMappedTensor(input_tensors[0]);
       context_->SetPermuteVector(input_tensors[0], input_pv);
     }
     context_->UpdateTensorMap(input_tensors[0], infer_input);
@@ -84,7 +84,7 @@ class RoiPoolLayoutInfer : public OpLayoutInfer {
     auto roi_pool = op_->Clone(context_->infer_graph_);
     auto outs_infer = CreateOutputsTensor(required_pv);
     for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*roi_pool).BindInput(context_->GetMapedTensor(i_src));
+      (*roi_pool).BindInput(context_->GetMappedTensor(i_src));
     }
     (*roi_pool).BindOutput(outs_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
diff --git a/src/tim/transform/ops/select_layout_inference.h b/src/tim/transform/ops/select_layout_inference.h
index ff7a11827..20f045923 100644
--- a/src/tim/transform/ops/select_layout_inference.h
+++ b/src/tim/transform/ops/select_layout_inference.h
@@ -42,7 +42,7 @@ class SelectLayoutInfer : public OpLayoutInfer {
     auto select = context_->infer_graph_->CreateOperation<vx::ops::Select>();
     auto infer_out = CreateOutputsTensor(required_pv);
     for (const auto& i_src : op_->impl()->InputsTensor()) {
-        (*select).BindInput(context_->GetMapedTensor(i_src));
+        (*select).BindInput(context_->GetMappedTensor(i_src));
     }
     (*select).BindOutput(infer_out[0]);
 
diff --git a/src/tim/transform/ops/simple_ops_layout_inference.h b/src/tim/transform/ops/simple_ops_layout_inference.h
index 5867230bc..28ae75983 100644
--- a/src/tim/transform/ops/simple_ops_layout_inference.h
+++ b/src/tim/transform/ops/simple_ops_layout_inference.h
@@ -49,7 +49,7 @@ class SimpleOpsLayoutInfer : public OpLayoutInfer {
     auto out_infer = CreateOutputsTensor(input_pv);
     auto simple_op = context_->infer_graph_->CreateOperation<OpType>();
     (*simple_op)
-        .BindInput(context_->GetMapedTensor(i_src))
+        .BindInput(context_->GetMappedTensor(i_src))
         .BindOutput(out_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], input_pv);
     next_tensors.push_back(op_->impl()->OutputsTensor()[0]);
diff --git a/src/tim/transform/ops/slice_layout_inference.h b/src/tim/transform/ops/slice_layout_inference.h
index 5db415039..aba7f5ada 100644
--- a/src/tim/transform/ops/slice_layout_inference.h
+++ b/src/tim/transform/ops/slice_layout_inference.h
@@ -54,7 +54,7 @@ class SliceLayoutInfer : public OpLayoutInfer {
     auto slice = context_->infer_graph_->CreateOperation<vx::ops::Slice>(
         dims, start, length);
     auto infer_out = CreateOutputsTensor(input_pv);
-    (*slice).BindInput(context_->GetMapedTensor(src_input));
+    (*slice).BindInput(context_->GetMappedTensor(src_input));
     (*slice).BindOutput(infer_out[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], input_pv);
     next_tensors.push_back(op_->impl()->OutputsTensor()[0]);
diff --git a/src/tim/transform/ops/softmax_layout_inference.h b/src/tim/transform/ops/softmax_layout_inference.h
index 57a2990ce..967a28815 100644
--- a/src/tim/transform/ops/softmax_layout_inference.h
+++ b/src/tim/transform/ops/softmax_layout_inference.h
@@ -49,7 +49,7 @@ class SoftmaxLayoutInfer : public OpLayoutInfer {
     auto softmax =
         context_->infer_graph_->CreateOperation<vx::ops::Softmax>(beta, axis);
     auto otensor_infer = CreateOutputsTensor(required_pv);
-    (*softmax).BindInput(context_->GetMapedTensor(input_tensors[0]));
+    (*softmax).BindInput(context_->GetMappedTensor(input_tensors[0]));
     (*softmax).BindOutput(otensor_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
     // Add out tensor of src_graph into next_tensor
diff --git a/src/tim/transform/ops/space2batch_layout_inference.h b/src/tim/transform/ops/space2batch_layout_inference.h
index 4eb601354..ee6e31fed 100644
--- a/src/tim/transform/ops/space2batch_layout_inference.h
+++ b/src/tim/transform/ops/space2batch_layout_inference.h
@@ -51,7 +51,7 @@ class Space2BatchLayoutInfer : public OpLayoutInfer {
     auto final_pv = pv->Reverse()->Add(required_pv);
     if (!final_pv->IsAligned()) {
       auto perm_out =
-          InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+          InsertPermute(context_->GetMappedTensor(input_tensors[0]), final_pv);
       context_->UpdateTensorMap(input_tensors[0], perm_out);
       context_->SetPermuteVector(input_tensors[0], required_pv);
     }
@@ -70,7 +70,7 @@ class Space2BatchLayoutInfer : public OpLayoutInfer {
         context_->infer_graph_->CreateOperation<vx::ops::Space2Batch>(
             block_size, pad, vx::DataLayout::WHCN);
     auto out_tensor_infer = CreateOutputsTensor(required_pv);
-    (*space2batch).BindInput(context_->GetMapedTensor(input_tensors[0]));
+    (*space2batch).BindInput(context_->GetMappedTensor(input_tensors[0]));
     (*space2batch).BindOutput(out_tensor_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
     // Add out tensor of src_graph into next_tensor
diff --git a/src/tim/transform/ops/space2depth_layout_inference.h b/src/tim/transform/ops/space2depth_layout_inference.h
index e861a238d..ecac32cad 100644
--- a/src/tim/transform/ops/space2depth_layout_inference.h
+++ b/src/tim/transform/ops/space2depth_layout_inference.h
@@ -51,7 +51,7 @@ class SpaceToDepthLayoutInfer : public OpLayoutInfer {
     auto final_pv = pv->Reverse()->Add(required_pv);
     if (!final_pv->IsAligned()) {
       auto perm_out =
-          InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+          InsertPermute(context_->GetMappedTensor(input_tensors[0]), final_pv);
       context_->UpdateTensorMap(input_tensors[0], perm_out);
       context_->SetPermuteVector(input_tensors[0], required_pv);
     }
@@ -64,7 +64,7 @@ class SpaceToDepthLayoutInfer : public OpLayoutInfer {
         context_->infer_graph_->CreateOperation<vx::ops::SpaceToDepth>(
             block_size, vx::DataLayout::WHCN);
     auto out_tensor_infer = CreateOutputsTensor(required_pv);
-    (*space2depth).BindInput(context_->GetMapedTensor(input_tensors[0]));
+    (*space2depth).BindInput(context_->GetMappedTensor(input_tensors[0]));
     (*space2depth).BindOutput(out_tensor_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
     // Add out tensor of src_graph into next_tensor
diff --git a/src/tim/transform/ops/split_layout_inference.h b/src/tim/transform/ops/split_layout_inference.h
index 0479641b1..d80a4d241 100644
--- a/src/tim/transform/ops/split_layout_inference.h
+++ b/src/tim/transform/ops/split_layout_inference.h
@@ -51,7 +51,7 @@ class SplitLayoutInfer : public OpLayoutInfer {
     auto split =
         context_->infer_graph_->CreateOperation<vx::ops::Split>(axis, slices);
     auto infer_out = CreateOutputsTensor(input_pv);
-    (*split).BindInput(context_->GetMapedTensor(input_tensor));
+    (*split).BindInput(context_->GetMappedTensor(input_tensor));
     (*split).BindOutputs(infer_out);
     for (const auto& out : op_->impl()->OutputsTensor()) {
         context_->SetPermuteVector(out, input_pv);
diff --git a/src/tim/transform/ops/squeeze_layout_inference.h b/src/tim/transform/ops/squeeze_layout_inference.h
index 719e352c7..a183530b6 100644
--- a/src/tim/transform/ops/squeeze_layout_inference.h
+++ b/src/tim/transform/ops/squeeze_layout_inference.h
@@ -50,7 +50,7 @@ class SqueezeLayoutInfer : public OpLayoutInfer {
     auto squeeze =
         context_->infer_graph_->CreateOperation<vx::ops::Squeeze>(axis);
     (*squeeze).BindInput(
-        context_->GetMapedTensor(op_->impl()->InputsTensor()[0]));
+        context_->GetMappedTensor(op_->impl()->InputsTensor()[0]));
 
     auto required_pv =
         MakeShared(op_->impl()->OutputsTensor()[0]->GetShape().size());
diff --git a/src/tim/transform/ops/stack_layout_inference.h b/src/tim/transform/ops/stack_layout_inference.h
index f649a2391..7f4645678 100644
--- a/src/tim/transform/ops/stack_layout_inference.h
+++ b/src/tim/transform/ops/stack_layout_inference.h
@@ -53,7 +53,7 @@ class StackLayoutInfer : public OpLayoutInfer {
     auto aligninput_pv = AlignPermuteVectorForMutilInputs();
 
     for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*stack).BindInput(context_->GetMapedTensor(i_src));
+      (*stack).BindInput(context_->GetMappedTensor(i_src));
     }
 
     if (axis < 0) {
diff --git a/src/tim/transform/ops/stridedslice_layout_inference.h b/src/tim/transform/ops/stridedslice_layout_inference.h
index 1cfca8362..6f0136e9b 100644
--- a/src/tim/transform/ops/stridedslice_layout_inference.h
+++ b/src/tim/transform/ops/stridedslice_layout_inference.h
@@ -105,7 +105,7 @@ class StridedSliceLayoutInfer : public OpLayoutInfer {
       }
 
       auto infer_out = CreateOutputsTensor(out_pv);
-      (*strided_slice).BindInput(context_->GetMapedTensor(src_input));
+      (*strided_slice).BindInput(context_->GetMappedTensor(src_input));
       (*strided_slice).BindOutput(infer_out[0]);
       context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], out_pv);
       next_tensors.push_back(op_->impl()->OutputsTensor()[0]);
diff --git a/src/tim/transform/ops/transpose_layout_inference.h b/src/tim/transform/ops/transpose_layout_inference.h
index 88ee23928..6578dbaf6 100644
--- a/src/tim/transform/ops/transpose_layout_inference.h
+++ b/src/tim/transform/ops/transpose_layout_inference.h
@@ -42,7 +42,7 @@ class TransposeLayoutInfer : public OpLayoutInfer {
   void OnInputs(
       std::vector<std::shared_ptr<vx::Tensor>>& next_tensors) override {
     auto src_input = op_->impl()->InputsTensor()[0];
-    auto infer_input = context_->GetMapedTensor(src_input);
+    auto infer_input = context_->GetMappedTensor(src_input);
     auto input_pv = context_->GetPermuteVector(src_input);
 
     std::vector<uint32_t> perm(op_->impl()->node()->nn_param.permute.dim_num);
diff --git a/src/tim/transform/ops/unidirectional_lstm_layout_inference.h b/src/tim/transform/ops/unidirectional_lstm_layout_inference.h
index 47b802bbc..9a099ec84 100644
--- a/src/tim/transform/ops/unidirectional_lstm_layout_inference.h
+++ b/src/tim/transform/ops/unidirectional_lstm_layout_inference.h
@@ -72,7 +72,7 @@ class UnidirectionalLstmLayoutInfer : public OpLayoutInfer {
 
 
     for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*cloned_op).BindInput(context_->GetMapedTensor(i_src));
+      (*cloned_op).BindInput(context_->GetMappedTensor(i_src));
     }
 
     std::vector<std::shared_ptr<IPermuteVector>> required_pv_lst;
diff --git a/src/tim/transform/ops/unidirectional_rnn_layout_inference.h b/src/tim/transform/ops/unidirectional_rnn_layout_inference.h
index 7bbbc09a3..8a8c261a9 100644
--- a/src/tim/transform/ops/unidirectional_rnn_layout_inference.h
+++ b/src/tim/transform/ops/unidirectional_rnn_layout_inference.h
@@ -72,7 +72,7 @@ class UnidirectionalRnnLayoutInfer : public OpLayoutInfer {
 
 
     for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*cloned_op).BindInput(context_->GetMapedTensor(i_src));
+      (*cloned_op).BindInput(context_->GetMappedTensor(i_src));
     }
 
     
diff --git a/src/tim/transform/ops/yolov4_layout_inference.h b/src/tim/transform/ops/yolov4_layout_inference.h
index 2e44bbeb2..6698e08d7 100644
--- a/src/tim/transform/ops/yolov4_layout_inference.h
+++ b/src/tim/transform/ops/yolov4_layout_inference.h
@@ -53,7 +53,7 @@ class Yolov4LayoutInfer : public OpLayoutInfer {
         context_->SetPermuteVector(i_src, MakeShared(4));
         context_->UpdateTensorMap(i_src, i_infer);
       } 
-      (*cloned_op).BindInput(context_->GetMapedTensor(i_src));
+      (*cloned_op).BindInput(context_->GetMappedTensor(i_src));
     }
 
     std::vector<std::shared_ptr<IPermuteVector>> required_pv_lst;