Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[VerisiliconTimVX] Support Allwinner V853 and OpenWrt, Don't merge!!! #10484

Open
wants to merge 3 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ lite_option(LITE_WITH_NNADAPTER "Enable NNAdapter in lite mode"
lite_option(LITE_WITH_XPU "Enable XPU in lite mode" OFF)
lite_option(XPU_WITH_XFT "Enable XPU-XFT in lite mode" OFF)
lite_option(LITE_WITH_TRAIN "Enable training operators and kernels in lite" OFF)
lite_option(LITE_WITH_OPENMP "Enable OpenMP in lite framework" ON)
lite_option(LITE_WITH_OPENMP "Enable OpenMP in lite framework" OFF)
lite_option(LITE_WITH_OPENCL "Enable OpenCL support in lite" OFF)
lite_option(LITE_WITH_METAL "Enable Metal support in lite" OFF)
lite_option(LITE_WITH_PROFILE "Enable profile mode in lite framework" OFF)
Expand Down
4 changes: 2 additions & 2 deletions cmake/os/armlinux.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ if(ARMLINUX_ARCH_ABI STREQUAL "armv7")
endif()
if(ARMLINUX_ARCH_ABI STREQUAL "armv7hf")
set(CMAKE_SYSTEM_PROCESSOR arm)
set(CMAKE_C_COMPILER "arm-linux-gnueabihf-gcc")
set(CMAKE_CXX_COMPILER "arm-linux-gnueabihf-g++")
set(CMAKE_C_COMPILER "/opt/toolchain-sunxi-musl/bin/arm-openwrt-linux-muslgnueabi-gcc")
set(CMAKE_CXX_COMPILER "/opt/toolchain-sunxi-musl/bin/arm-openwrt-linux-muslgnueabi-g++")
endif()
set(HOST_C_COMPILER $ENV{CC})
set(HOST_CXX_COMPILER $ENV{CXX})
Expand Down
8 changes: 5 additions & 3 deletions lite/api/tools/benchmark/profile/memory_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include "memory_info.h"
#include "memory_info.h" // NOLINT

#ifdef __linux__
#include <malloc.h>
Expand Down Expand Up @@ -40,13 +40,15 @@ MemoryUsage GetMemoryUsage() {
if (getrusage(RUSAGE_SELF, &res) == 0) {
result.max_rss_kb = res.ru_maxrss;
}
/*
#if defined(__GLIBC__) && __GLIBC_MINOR__ >= 33
const auto mem = mallinfo2();
#else
const auto mem = mallinfo();
#endif
result.total_allocated_bytes = mem.arena;
result.in_use_allocated_bytes = mem.uordblks;
*/
#endif
return result;
}
Expand All @@ -59,6 +61,6 @@ void MemoryUsage::AllStatsToStream(std::ostream* stream) const {
<< in_use_allocated_bytes / 1024.0 / 1024.0 << " MB";
}

} // namespace paddle
} // namespace lite_api
} // namespace profile
} // namespace lite_api
} // namespace paddle
1 change: 1 addition & 0 deletions lite/backends/host/math/topk.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#pragma once
#include <algorithm>
#include <cstdint>
#include <utility>
#include <vector>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,16 @@ namespace verisilicon_timvx {
#undef REGISTER_CONVERTER

int Converter::Apply(core::Model* model) {
// Create the input and output tensors in advance so that the input and output
// remain in their original order when saving the NBG model on some machines.
auto input_count = model->input_operands.size();
for (size_t i = 0; i < input_count; i++) {
ConvertOperand(model->input_operands[i]);
}
auto output_count = model->output_operands.size();
for (size_t i = 0; i < output_count; i++) {
ConvertOperand(model->output_operands[i]);
}
// Convert the NNAdapter operations to the tim-vx operations
std::vector<core::Operation*> operations =
SortOperationsInTopologicalOrder(model);
Expand Down Expand Up @@ -89,8 +99,11 @@ std::shared_ptr<tim::vx::Tensor> Converter::AddTensor(

std::shared_ptr<tim::vx::Tensor> Converter::ConvertOperand(
core::Operand* operand, std::vector<int32_t> dimensions) {
auto tensor = AddTensor(&operand->type, operand->buffer, dimensions);
UpdateTensorMap(operand, tensor);
std::shared_ptr<tim::vx::Tensor> tensor = GetMappedTensor(operand);
if (!tensor || !IsModelOutputOperand(operand)) {
tensor = AddTensor(&operand->type, operand->buffer, dimensions);
UpdateTensorMap(operand, tensor);
}
return tensor;
}
} // namespace verisilicon_timvx
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ int ConvertExpand(Converter* converter, core::Operation* operation) {
auto shape_count = output_operand->type.dimensions.count;
auto shape_data = output_operand->type.dimensions.data;

std::vector<int32_t> expand_shape;
std::vector<uint32_t> expand_shape;
for (int i = shape_count - 1; i >= 0; i--) {
expand_shape.push_back(shape_data[i]);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ int ConvertFill(Converter* converter, core::Operation* operation) {

auto shape_count = output_operand->type.dimensions.count;
auto shape_data = output_operand->type.dimensions.data;
std::vector<int32_t> shape;
std::vector<uint32_t> shape;
for (int i = shape_count - 1; i >= 0; i--) {
shape.push_back(shape_data[i]);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ endif()
message(STATUS "NNADAPTER_VERISILICON_TIMVX_VIV_SDK_ROOT=${NNADAPTER_VERISILICON_TIMVX_VIV_SDK_ROOT}")

# Remove the -Werror flags to avoid compilation errors
set(VERISILICON_TIMVX_PATCH_COMMAND sed -e "s/-Werror//g" -i CMakeLists.txt && sed -e "s/3.14/3.10/g" -i CMakeLists.txt)
set(VERISILICON_TIMVX_PATCH_COMMAND sed -e "s/-Werror//g" -i CMakeLists.txt && sed -e "s/3.14/3.10/g" -i CMakeLists.txt && sed -e "s/uint /uint32_t /g" -i include/tim/vx/ops/custom_base.h)
if(CMAKE_SYSTEM_NAME MATCHES "Android")
# Hack the TIM-VX and change the name of lib 'libArchModelSw.so' to 'libarchmodelSw.so' for Android
set(VERISILICON_TIMVX_PATCH_COMMAND ${VERISILICON_TIMVX_PATCH_COMMAND} && sed -e "s/libArchModelSw/libarchmodelSw/g" -i cmake/local_sdk.cmake)
Expand All @@ -78,7 +78,7 @@ ExternalProject_Add(
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-DEXTERNAL_VIV_SDK=${NNADAPTER_VERISILICON_TIMVX_VIV_SDK_ROOT}
-DCMAKE_INSTALL_PREFIX=${VERISILICON_TIMVX_INSTALL_DIR}
-DTIM_VX_ENABLE_TENSOR_CACHE=OFF
-DTIM_VX_ENABLE_TENSOR_CACHE=OFF
${CROSS_COMPILE_CMAKE_ARGS}
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#include "driver/verisilicon_timvx/converter/converter.h"
#include "driver/verisilicon_timvx/optimizer/convert_fill_like_into_mul_add.h"
#include "driver/verisilicon_timvx/optimizer/convert_meshgrid_into_reshape_expand.h"
#include "driver/verisilicon_timvx/optimizer/fix_ops.h"
#include "driver/verisilicon_timvx/optimizer/remove_relu.h"
#include "driver/verisilicon_timvx/optimizer/unpack_op_fusion.h"
#include "optimizer/constant_fold_operations.h"
#include "optimizer/convert_adaptive_pool2d_into_pool2d.h"
Expand Down Expand Up @@ -95,6 +97,10 @@ int Program::Build(core::Model* model, core::Cache* cache) {
for (size_t i = 0; i < input_count; i++) {
const auto& type = cache->input_types[i];
input_tensors_[i] = CreateTimVXTensor(graph_.get(), &type);
NNADAPTER_VLOG(3) << "Model input[" << i
<< "] id=" << input_tensors_[i]->GetId()
<< nnadapter::OperandTypeToString(
&cache->input_types[i]);
NNADAPTER_CHECK(input_tensors_[i]);
}
}
Expand All @@ -106,6 +112,10 @@ int Program::Build(core::Model* model, core::Cache* cache) {
for (size_t i = 0; i < output_count; i++) {
const auto& type = cache->output_types[i];
output_tensors_[i] = CreateTimVXTensor(graph_.get(), &type);
NNADAPTER_VLOG(3) << "Model output[" << i
<< "] id=" << output_tensors_[i]->GetId()
<< nnadapter::OperandTypeToString(
&cache->output_types[i]);
NNADAPTER_CHECK(output_tensors_[i]);
}
auto nbg_op = graph_->CreateOperation<tim::vx::ops::NBG>(
Expand Down Expand Up @@ -133,7 +143,9 @@ int Program::Build(core::Model* model, core::Cache* cache) {
FuseSigmoidMulIntoSwish(model);
ConvertAdaptivePool2dIntoPool2d(model);
UnpackOpFusion(model);
// FixOps(model);
ConvertQuantizationSymmToAsymm(model);
// RemoveRelu(model);
NNADAPTER_VLOG(5) << "Optimized model:" << std::endl << Visualize(model);
// Convert a NNAdapter model to a tim-vx graph
Converter converter(graph_.get(), &tensors_);
Expand All @@ -150,6 +162,9 @@ int Program::Build(core::Model* model, core::Cache* cache) {
NNADAPTER_CHECK(tensors_.find(operand) != tensors_.end());
input_tensors_[i] = tensors_[operand].front();
NNADAPTER_CHECK(input_tensors_[i]);
NNADAPTER_VLOG(3) << "Model input[" << i
<< "] id=" << input_tensors_[i]->GetId()
<< nnadapter::OperandTypeToString(&operand->type);
input_types_[i] = type;
}
}
Expand All @@ -164,6 +179,9 @@ int Program::Build(core::Model* model, core::Cache* cache) {
NNADAPTER_CHECK(tensors_.find(operand) != tensors_.end());
output_tensors_[i] = tensors_[operand].back();
NNADAPTER_CHECK(output_tensors_[i]);
NNADAPTER_VLOG(3) << "Model output[" << i
<< "] id=" << output_tensors_[i]->GetId()
<< nnadapter::OperandTypeToString(&operand->type);
output_types_[i] = type;
}
// Compile tim-vx graph and serialize to NBG(Network Binary Graph)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "driver/verisilicon_timvx/optimizer/fix_ops.h"
#include <cmath>
#include <vector>
#include "utility/debug.h"
#include "utility/logging.h"
#include "utility/modeling.h"
#include "utility/utility.h"

namespace nnadapter {
namespace verisilicon_timvx {

static void FixResizeLinearNearest(core::Model* model,
core::Operation* operation) {
auto& input_operands = operation->input_operands;
auto& output_operands = operation->output_operands;
auto output_operand = output_operands[0];
auto output_operations = GetOperandConsumers(model, output_operand);
auto dummy_operand =
AppendUnaryOperation(model, output_operand, NNADAPTER_RELU);
UpdateOperationInputOperands(
output_operations, output_operand, dummy_operand);
UpdateModelOutputOperands(model, output_operand, dummy_operand);
}

void FixOps(core::Model* model) {
std::vector<core::Operation*> operations =
SortOperationsInTopologicalOrder(model);
for (auto operation : operations) {
NNADAPTER_VLOG(5) << "Converting " << OperationTypeToString(operation->type)
<< " ...";
switch (operation->type) {
case NNADAPTER_RESIZE_LINEAR:
case NNADAPTER_RESIZE_NEAREST:
FixResizeLinearNearest(model, operation);
break;
default:
break;
}
}
}

} // namespace verisilicon_timvx
} // namespace nnadapter
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "core/types.h"

namespace nnadapter {
namespace verisilicon_timvx {

void FixOps(core::Model* model);

} // namespace verisilicon_timvx
} // namespace nnadapter
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "driver/verisilicon_timvx/optimizer/remove_relu.h"
#include <algorithm>
#include <map>
#include <vector>
#include "utility/debug.h"
#include "utility/logging.h"
#include "utility/micros.h"
#include "utility/modeling.h"
#include "utility/utility.h"

namespace nnadapter {
namespace verisilicon_timvx {

// Convert input(scale,zero_point=128)->relu->output to
// input(scale,zero_point=0)
NNADAPTER_EXPORT void RemoveRelu(core::Model* model) {
std::vector<core::Operation*> operations =
SortOperationsInTopologicalOrder(model);
for (auto operation : operations) {
if (operation->type != NNADAPTER_RELU) continue;
auto relu_input_operand = operation->input_operands[0];
auto relu_output_operand = operation->output_operands[0];
if (IsModelInputOperand(relu_input_operand)) continue;
auto relu_input_consumers = GetOperandConsumers(model, relu_input_operand);
if (relu_input_consumers.size() != 1) continue;
if (!IsUInt8AsymmPerLayerQuantType(relu_input_operand->type.precision) ||
!IsUInt8AsymmPerLayerQuantType(relu_output_operand->type.precision))
continue;
relu_input_operand->type.asymm_per_layer_params.scale =
relu_output_operand->type.asymm_per_layer_params.scale;
relu_input_operand->type.asymm_per_layer_params.zero_point = 0;
auto relu_output_consumers =
GetOperandConsumers(model, relu_output_operand);
UpdateOperationInputOperands(
relu_output_consumers, relu_output_operand, relu_input_operand);
if (IsModelOutputOperand(relu_output_operand)) {
UpdateModelOutputOperands(model, relu_output_operand, relu_input_operand);
}
RemoveOperand(model, relu_output_operand);
RemoveOperation(model, operation);
}
}

} // namespace verisilicon_timvx
} // namespace nnadapter
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "core/types.h"

namespace nnadapter {
namespace verisilicon_timvx {

void RemoveRelu(core::Model *model);

} // namespace verisilicon_timvx
} // namespace nnadapter
6 changes: 1 addition & 5 deletions lite/core/device_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -562,12 +562,8 @@ int set_sched_affinity(const std::vector<int>& cpu_ids) {

#define PD_CPU_ZERO(cpusetp) memset((cpusetp), 0, sizeof(cpu_set_t))

// set affinity for thread
#ifdef __GLIBC__
// set affinity for thread
pid_t pid = syscall(SYS_gettid);
#else
pid_t pid = gettid();
#endif
cpu_set_t mask;
PD_CPU_ZERO(&mask);
unsigned int Runmask = 0;
Expand Down
Loading