From 71bb092a98e791ba7b9ca315b3a34fd07b30a6e5 Mon Sep 17 00:00:00 2001 From: chxin66 <57057788+chxin66@users.noreply.github.com> Date: Wed, 3 Jan 2024 23:40:57 +0800 Subject: [PATCH] 20240103 release (#2) Signed-off-by: Chen Co-authored-by: Chen --- LICENSE | 2 +- README.md | 76 +- cmake/module/TimVxConfig.cmake | 2 +- nnapi_status.md | 33 - .../Android_12/0001-Build-shell-service.patch | 64 + .../0002-Validate-model-in-shim-driver.patch | 60 + .../0001-Build-shell-service.patch | 0 .../0002-Validate-model-in-shim-driver.patch | 0 src/Compilation.cpp | 207 ++ src/Compilation.h | 104 +- src/Device.cpp | 90 + src/Device.h | 82 + src/DeviceManager.cpp | 38 + src/DeviceManager.h | 57 +- src/Event.cpp | 145 + src/Event.h | 62 +- src/Execution.cpp | 987 +++-- src/Execution.h | 146 +- src/MapOperation.cpp | 76 +- src/MapOperation.h | 2 +- src/Memory.cpp | 291 ++ src/Memory.h | 279 +- src/MemoryDesc.cpp | 173 + src/MemoryDesc.h | 63 + src/Model.cpp | 678 ++-- src/Model.h | 96 +- src/OpCreator.h | 3204 +++++++++-------- src/Types.h | 132 +- src/Utils.cpp | 305 +- src/Utils.h | 63 +- src/VsiDevice.h | 95 - src/VsiNeuralNetworksSupportLibraryimpl.cpp | 1235 +++---- vts_status.md | 111 - 33 files changed, 5024 insertions(+), 3934 deletions(-) create mode 100644 patches/Android_12/0001-Build-shell-service.patch create mode 100644 patches/Android_12/0002-Validate-model-in-shim-driver.patch rename patches/{ => Android_14}/0001-Build-shell-service.patch (100%) rename patches/{ => Android_14}/0002-Validate-model-in-shim-driver.patch (100%) create mode 100644 src/Compilation.cpp create mode 100644 src/Device.cpp create mode 100644 src/Device.h create mode 100644 src/DeviceManager.cpp create mode 100644 src/Event.cpp create mode 100644 src/Memory.cpp create mode 100644 src/MemoryDesc.cpp create mode 100644 src/MemoryDesc.h delete mode 100644 src/VsiDevice.h delete mode 100644 vts_status.md diff --git a/LICENSE b/LICENSE index a019ca2..06f62f3 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ /**************************************************************************** * -* Copyright (c) 2023 Vivante Corporation +* Copyright (c) 2024 Vivante Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), diff --git a/README.md b/README.md index 7b24466..1b3e8eb 100644 --- a/README.md +++ b/README.md @@ -1,59 +1,30 @@ # VSI NPU Android Support Library -**NOTE**: For customer, please ignore any section with (VSI internal) - -## How to build from distributed customer source package +## 1 How to build ```sh cmake -B -S -DCMAKE_TOOLCHAIN_FILE=/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=android-34 cd make tim-vx VsiSupportLibrary -# tim-vx MUST make before VsiSupportLibrary -``` - -## How to build from internal repo (VSI internal) - -```sh -#Verified with android ndk r23c for verisilicon in house development -cmake -B -S -DCMAKE_TOOLCHAIN_FILE=/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=android-34 -DSLANG_TARGET_PID= -DSL_DIST_BUILD=OFF - -cd -make tim-vx Slang VsiSupportLibrary ``` Reference for cmake variable in android toolchain: -## Common problems - -ld: error: undefined symbol: __android_log_print. - -Append -DCMAKE_CXX_FLAGS="-llog" after cmake options may help. - -## Switch git url for TIM-VX (VSI internal) - -For customer, they can get latest tim-vx from github and this is the default behavior. For internal development, we can switch it back to internal gitlab by - -DPUBLIC_TIM_VX=OFF - -## Switch git url for Slang (internal build) - -When build in internal, we can switch Slang resuorce to internal url by - -DINTERNAL_BUILD=ON - -## Integrate with Android +## 2 Integrate with Android verified with i.MX 8M Plus and Android 14 -### Precondition +### 2.1 Precondition Since Android Support Library is a standalone library which implemented NNAPI spec, on android, we need to wrap it as a service for applications(android cts). In this document, we take "shell" approach to wrap support library as a service. Download android aosp or get it from SoC vendor. -### Apply patch when build shell service +### 2.2 Apply patch when build shell service -Apply patches in our SL `patches/`, if Android 12, use `patches_a12`: +Apply patches in our SL `patches/`, select the corresponding version: ```sh cd ${AOSP_ROOT}/packages/modules/NeuralNetworks/ @@ -66,7 +37,7 @@ Why these patches are needed: 1. Build the shell service executable that can load our support library. 2. Use NNAPI validation utils to check whether a HAL model is conformed to NNAPI standard before converting the HAL model to SL model. Also check whether the HAL model contains OPs not supported by SL, if so, skip related VTS test cases. -### build shell service for VTS and CTS +### 2.3 build shell service for VTS and CTS ```sh cd ${AOSP_ROOT}/packages/modules/NeuralNetworks/driver/sample_shim @@ -75,14 +46,14 @@ mm -j8 The built shell service executable is located at `${AOSP_ROOT}/out/target/product/evk_8mp/symbols/vendor/bin/hw/android.hardware.neuralnetworks-shell-service-sample`. -### Run test +### 2.4 Run test -push libtim-vx.so libVsiSupportLibrary.so libneuralnetworks.so VtsHalNeuralnetworksTargetTest CtsNNAPITestCases64 android.hardware.neuralnetworks-shell-service-sample to board +push libtim-vx.so libVsiSupportLibrary.so VtsHalNeuralnetworksTargetTest CtsNNAPITestCases64 android.hardware.neuralnetworks-shell-service-sample to board You can get android test suit in -#### 1. Delete old service and add shell service to vintf manifest +#### 2.4.1 Delete old service(optional) and add shell service to vintf manifest -delete old service: +If you have old service, delete old service: cd /vendor/etc/vintf/manifest rm -f android.hardware.neuralnetworks@1.3-service-vsi-npu-server.xml @@ -98,7 +69,7 @@ add following content to `/vendor/etc/vintf/manifest.xml` Finally, reboot. -#### 2. Add system lib in default link space +#### 2.4.2 Add system lib in default link space to solve link fail in namespace(default): dlopen failed: library "libandroidfw.so" not found: needed by /vendor/lib64/libandroid.so in namespace (default) @@ -107,35 +78,35 @@ In `linkerconfig/ld.config.txt` ```sh [vendor] namespace.default.search.paths = /odm/${LIB} -# Add these two lines +# Add these three lines namespace.default.search.paths += /system/${LIB} namespace.default.search.paths += /apex/com.android.i18n/${LIB} namespace.default.search.paths += /apex/com.android.os.statsd/${LIB} ``` -### 3. Start service by run android.hardware.neuralnetworks-shell-service-sample on Android board +### 2.5 Start service by run android.hardware.neuralnetworks-shell-service-sample on Android board -### 4. run test with VTS +### 2.6. run test with VTS ```sh ./VtsHalNeuralnetworksTargetTest --gtest_filter=TestGenerated/GeneratedTest.Test/android_hardware_neuralnetworks_IDevice_nnapi_sample_sl_updatable_reshape ``` -### 5. run test with CTS +### 2.7 run test with CTS ```sh ./CtsNNAPITestCases64 --gtest_filter=TestGenerated/QuantizationCouplingTest* ``` -## Integrate with TfLite +## 3 Integrate with TfLite -### Get the source code of TensorFlow +### 3.1 Get the source code of TensorFlow ```sh git clone https://github.com/tensorflow/tensorflow.git ``` -### Build benchmark_model +### 3.2 Build benchmark_model ```sh cd tensorflow/tensorflow/lite @@ -147,15 +118,8 @@ make benchmark_model -j8 push benchmark_model to board -### Run benchmark_model with support library +### 3.3 Run benchmark_model with support library ```sh ./benchmark_model --graph=mobilenet_v1_1.0_224_quant.tflite --use_nnapi=true --nnapi_support_library_path=/vendor/lib64/libVsiSupportLibrary.so --nnapi_accelerator_name=vsi-device-0 -``` - -## How to pack source for release (VSI internal) - -cd build directory -run `make tim-vx Slang VsiSupportLibrary && make package_source`, you will get source code in archived files. - -Note: don't create build folder in your source code folder, else the package will include the build directory +``` \ No newline at end of file diff --git a/cmake/module/TimVxConfig.cmake b/cmake/module/TimVxConfig.cmake index cdb8cb7..2ac92f3 100644 --- a/cmake/module/TimVxConfig.cmake +++ b/cmake/module/TimVxConfig.cmake @@ -1,4 +1,4 @@ -# Copyright (c) 2021 Vivante Corporation +# Copyright (c) 2024 Vivante Corporation # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), diff --git a/nnapi_status.md b/nnapi_status.md index 139ac1d..5311956 100644 --- a/nnapi_status.md +++ b/nnapi_status.md @@ -75,38 +75,5 @@ | ANeuralNetworks_getDeviceCount | Yes | 5 | | ANeuralNetworks_getMaximumLoopTimeout | Yes | 5 | | ANeuralNetworks_getRuntimeFeatureLevel | Yes | 5 | -| - | - | - | - | -| SL_ANeuralNetworksCompilation_setCachingFromFds | Yes | 5 | -| SL_ANeuralNetworksDevice_getNumberOfCacheFilesNeeded | Yes | 5 | -| SL_ANeuralNetworksDevice_getPerformanceInfo | Yes | 5 | -| SL_ANeuralNetworksDevice_forEachOperandPerformanceInfo | Yes | 5 | -| SL_ANeuralNetworksDevice_getVendorExtensionCount | won't suppoprt | 5 | -| SL_ANeuralNetworksDevice_getVendorExtensionName | won't suppoprt | 5 | -| SL_ANeuralNetworksDevice_forEachVendorExtensionOperandTypeInformation | won't suppoprt | 5 | -| SL_ANeuralNetworksDiagnosticCompilationInfo_getSessionId | No | 5 | -| SL_ANeuralNetworksDiagnosticCompilationInfo_getNnApiVersion | No | 5 | -| SL_ANeuralNetworksDiagnosticCompilationInfo_getModelArchHash | No | 5 | -| SL_ANeuralNetworksDiagnosticCompilationInfo_getDeviceIds | No | 5 | -| SL_ANeuralNetworksDiagnosticCompilationInfo_getErrorCode | No | 5 | -| SL_ANeuralNetworksDiagnosticCompilationInfo_getInputDataClass | No | 5 | -| SL_ANeuralNetworksDiagnosticCompilationInfo_getOutputDataClass | No | 5 | -| SL_ANeuralNetworksDiagnosticCompilationInfo_getCompilationTimeNanos | No | 5 | -| SL_ANeuralNetworksDiagnosticCompilationInfo_isCachingEnabled | No | 5 | -| SL_ANeuralNetworksDiagnosticCompilationInfo_isControlFlowUsed | No | 5 | -| SL_ANeuralNetworksDiagnosticCompilationInfo_areDynamicTensorsUsed | No | 5 | -| SL_ANeuralNetworksDiagnosticExecutionInfo_getSessionId | No | 5 | -| SL_ANeuralNetworksDiagnosticExecutionInfo_getNnApiVersion | No | 5 | -| SL_ANeuralNetworksDiagnosticExecutionInfo_getModelArchHash | No | 5 | -| SL_ANeuralNetworksDiagnosticExecutionInfo_getDeviceIds | No | 5 | -| SL_ANeuralNetworksDiagnosticExecutionInfo_getExecutionMode | No | 5 | -| SL_ANeuralNetworksDiagnosticExecutionInfo_getInputDataClass | No | 5 | -| SL_ANeuralNetworksDiagnosticExecutionInfo_getOutputDataClass | No | 5 | -| SL_ANeuralNetworksDiagnosticExecutionInfo_getErrorCode | No | 5 | -| SL_ANeuralNetworksDiagnosticExecutionInfo_getRuntimeExecutionTimeNanos | No | 5 | -| SL_ANeuralNetworksDiagnosticExecutionInfo_getHardwareExecutionTimeNanos | No | 5 | -| SL_ANeuralNetworksDiagnosticExecutionInfo_isCachingEnabled | No | 5 | -| SL_ANeuralNetworksDiagnosticExecutionInfo_isControlFlowUsed | No | 5 | -| SL_ANeuralNetworksDiagnosticExecutionInfo_areDynamicTensorsUsed | No | 5 | -| SL_ANeuralNetworksDiagnostic_registerCallbacks | No | 5 | **NOTE**: test result with imx8mp android 14 \ No newline at end of file diff --git a/patches/Android_12/0001-Build-shell-service.patch b/patches/Android_12/0001-Build-shell-service.patch new file mode 100644 index 0000000..4eaa5c4 --- /dev/null +++ b/patches/Android_12/0001-Build-shell-service.patch @@ -0,0 +1,64 @@ +From 802ec938216e1609df54cd5cf612470c29608a9d Mon Sep 17 00:00:00 2001 +From: Xiaoran Weng +Date: Fri, 29 Dec 2023 10:24:03 +0800 +Subject: [PATCH 1/2] Build shell service + +--- + driver/sample_shim/Android.bp | 8 ++++++++ + driver/sample_shim/ShellServiceSample.cpp | 8 +++----- + 2 files changed, 11 insertions(+), 5 deletions(-) + +diff --git a/driver/sample_shim/Android.bp b/driver/sample_shim/Android.bp +index a4e4d76..e9127b5 100644 +--- a/driver/sample_shim/Android.bp ++++ b/driver/sample_shim/Android.bp +@@ -97,3 +97,11 @@ cc_binary { + init_rc: ["config/android.hardware.neuralnetworks-shim-service-sample.rc"], + vintf_fragments: ["config/android.hardware.neuralnetworks-shim-service-sample.xml"], + } ++ ++cc_binary { ++ name: "android.hardware.neuralnetworks-shell-service-sample", ++ srcs: ["ShellServiceSample.cpp"], ++ defaults: ["NeuralNetworksShimDriverAidl_server_defaults"], ++ init_rc: ["config/android.hardware.neuralnetworks-shell-service-sample.rc"], ++ vintf_fragments: ["config/android.hardware.neuralnetworks-shell-service-sample.xml"], ++} +diff --git a/driver/sample_shim/ShellServiceSample.cpp b/driver/sample_shim/ShellServiceSample.cpp +index 6c3eda5..1ce61ed 100644 +--- a/driver/sample_shim/ShellServiceSample.cpp ++++ b/driver/sample_shim/ShellServiceSample.cpp +@@ -34,8 +34,6 @@ + #include + #include + +-typedef struct NnApiSLDriverImpl NnApiSLDriverImpl; +- + namespace aidl::android::hardware::neuralnetworks { + namespace { + +@@ -95,7 +93,7 @@ int registerDevices(const std::string& driverPath, const std::vector& dev + + // The default is 15, use more only if there's more devices exposed. + ANeuralNetworksShimRegistrationParams_setNumberOfListenerThreads(params, 15); +- ANeuralNetworksShimRegistrationParams_registerAsLazyService(params, /*asLazy=*/true); ++ ANeuralNetworksShimRegistrationParams_registerAsLazyService(params, /*asLazy=*/false); + ANeuralNetworksShimRegistrationParams_fallbackToMinimumSupportDevice(params, /*fallback=*/true); + + for (const auto& device : devices) { +@@ -123,10 +121,10 @@ using aidl::android::hardware::neuralnetworks::Names; + using aidl::android::hardware::neuralnetworks::registerDevices; + + int main() { +- const std::string driverPath = "/vendor/lib64/neuralnetworks_sample_sl_driver_prebuilt.so"; ++ const std::string driverPath = "/vendor/lib64/libVsiSupportLibrary.so"; + + const std::vector devicesToRegister = { +- {.driverName = "nnapi-sample_sl", .serviceName = "nnapi-sample_sl_updatable"}, ++ {.driverName = "vsi-device-0", .serviceName = "nnapi-sample_sl_updatable"}, + }; + + return registerDevices(driverPath, devicesToRegister); +-- +2.34.1 + diff --git a/patches/Android_12/0002-Validate-model-in-shim-driver.patch b/patches/Android_12/0002-Validate-model-in-shim-driver.patch new file mode 100644 index 0000000..5b63867 --- /dev/null +++ b/patches/Android_12/0002-Validate-model-in-shim-driver.patch @@ -0,0 +1,60 @@ +From af587b3554695cb875c560399bc504816b80d086 Mon Sep 17 00:00:00 2001 +From: Xiaoran Weng +Date: Tue, 2 Jan 2024 09:52:40 +0800 +Subject: [PATCH 2/2] Validate model in shim driver + +--- + shim_and_sl/ShimDevice.cpp | 23 +++++++++++++++++++++++ + 1 file changed, 23 insertions(+) + +diff --git a/shim_and_sl/ShimDevice.cpp b/shim_and_sl/ShimDevice.cpp +index eadbbef..cdadea1 100644 +--- a/shim_and_sl/ShimDevice.cpp ++++ b/shim_and_sl/ShimDevice.cpp +@@ -475,6 +475,12 @@ ndk::ScopedAStatus ShimDevice::getSupportedExtensions(std::vector* ex + + ndk::ScopedAStatus ShimDevice::getSupportedOperations(const Model& model, + std::vector* supportedOperations) { ++ const auto canonicalModel = ::android::nn::convert(model); ++ if (!canonicalModel.has_value()) { ++ LOG(ERROR) << "HAL model is invalid: " << canonicalModel.error().message; ++ return toAStatus(ErrorStatus::INVALID_ARGUMENT, canonicalModel.error().message); ++ } ++ + const auto numOperations = model.main.operations.size(); + supportedOperations->resize(numOperations); + +@@ -546,6 +552,13 @@ ndk::ScopedAStatus ShimDevice::prepareModel( + return toAStatus(ErrorStatus::INVALID_ARGUMENT); + } + ++ const auto canonicalModel = ::android::nn::convert(model); ++ if (!canonicalModel.has_value()) { ++ LOG(ERROR) << "HAL model is invalid: " << canonicalModel.error().message; ++ callback->notify(ErrorStatus::INVALID_ARGUMENT, nullptr); ++ return toAStatus(ErrorStatus::INVALID_ARGUMENT, canonicalModel.error().message); ++ } ++ + ErrorStatus convertErrorStatus = ErrorStatus::NONE; + std::vector copiedOperandValues; + auto modelAndMemory = +@@ -556,6 +569,16 @@ ndk::ScopedAStatus ShimDevice::prepareModel( + return toAStatus(convertErrorStatus); + } + ++ std::vector supportedOps; ++ getSupportedOperations(model, &supportedOps); ++ bool allOpsSupported = std::all_of(supportedOps.cbegin(), supportedOps.cend(), ++ [](bool supported) { return supported; }); ++ ++ if (!allOpsSupported) { ++ callback->notify(ErrorStatus::INVALID_ARGUMENT, nullptr); ++ return ndk::ScopedAStatus::ok(); ++ } ++ + // b/185976051, past this point we pretend that compilation is asynchronous, and in + /// case of error we return OK status, but communicate the error through the callback. + auto compilation = ::android::nn::sl_wrapper::Compilation::createForDevice( +-- +2.34.1 + diff --git a/patches/0001-Build-shell-service.patch b/patches/Android_14/0001-Build-shell-service.patch similarity index 100% rename from patches/0001-Build-shell-service.patch rename to patches/Android_14/0001-Build-shell-service.patch diff --git a/patches/0002-Validate-model-in-shim-driver.patch b/patches/Android_14/0002-Validate-model-in-shim-driver.patch similarity index 100% rename from patches/0002-Validate-model-in-shim-driver.patch rename to patches/Android_14/0002-Validate-model-in-shim-driver.patch diff --git a/src/Compilation.cpp b/src/Compilation.cpp new file mode 100644 index 0000000..db599d2 --- /dev/null +++ b/src/Compilation.cpp @@ -0,0 +1,207 @@ +/**************************************************************************** + * + * Copyright (c) 2024 Vivante Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ + +#include "Compilation.h" + +#include +#include +#include +#include + +#include +#include +#include + +#include "Types.h" +#include "Utils.h" + +namespace vsi::android::sl { + +Compilation::~Compilation() { + close(cacheFd_); +} + +int Compilation::setPreference(PreferenceCode preference) { + if (finished_) { + LOGE("Compilation::setPreference called after compilation finished"); + return ANEURALNETWORKS_BAD_STATE; + } + preference_ = preference; + return ANEURALNETWORKS_NO_ERROR; +} + +int Compilation::setPriority(PriorityCode priority) { + if (finished_) { + LOGE("Compilation::setPriority called after compilation finished"); + return ANEURALNETWORKS_BAD_STATE; + } + priority_ = priority; + return ANEURALNETWORKS_NO_ERROR; +} + +int Compilation::setTimeout(Duration duration) { + if (finished_) { + LOGE("Compilation::setTimeout called after compilation finished"); + return ANEURALNETWORKS_BAD_STATE; + } + timeoutDuration_ = duration; + return ANEURALNETWORKS_NO_ERROR; +} + +int Compilation::setCaching(const fs::path& cacheDir, const uint8_t* token) { + if (finished_) { + LOGE("Compilation::setCaching called after compilation finished"); + return ANEURALNETWORKS_BAD_STATE; + } + + // The filename includes kByteSizeOfCacheToken * 2 characters. + std::string filename(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN * 2UL, '0'); + for (size_t i = 0; i < ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN; i++) { + filename[i * 2] = 'A' + (token[i] & 0x0F); // NOLINT(*-magic-numbers) + filename[i * 2 + 1] = 'A' + (token[i] >> 4); + } + + fs::path cacheFile = cacheDir / filename; + int fd = open(cacheFile.c_str(), O_CREAT | O_EXCL | O_RDWR, 0); + if (fd == -1) { + if (errno == EEXIST) { + // The file exists, delete it and try again. + if (unlink(cacheFile.c_str()) == -1) { + // No point in retrying if the unlink failed. + LOGE("Compilation::setCaching error unlinking cache file %s: %s (%d)", + cacheFile.c_str(), strerror(errno), errno); + return ANEURALNETWORKS_BAD_DATA; + } + // Retry now that we've unlinked the file. + fd = open(cacheFile.c_str(), O_CREAT | O_EXCL | O_RDWR, 0); + } + if (fd == -1) { + LOGE("Compilation::setCaching error creating cache file %s: %s (%d)", cacheFile.c_str(), + strerror(errno), errno); + return ANEURALNETWORKS_BAD_DATA; + } + } + + return Compilation::setCaching(fd, token); +} + +int Compilation::setCaching(int fd, const uint8_t* token) { + if (finished_) { + LOGE("Compilation::setCaching called after compilation finished"); + return ANEURALNETWORKS_BAD_STATE; + } + + struct stat cacheStat; + if (fstat(fd, &cacheStat) < 0) { + LOGE("Compilation::setCaching failed to stat cache file: %s (%d)", strerror(errno), errno); + return ANEURALNETWORKS_BAD_DATA; + } + + if ((cacheStat.st_mode & (S_IRUSR | S_IWUSR)) == 0) { + LOGE("Compilation::setCaching cache file not in RW mode"); + return ANEURALNETWORKS_BAD_DATA; + } + + int cacheFd = dup(fd); + if (cacheFd == -1) { + LOGE("Compilation::setCaching failed to dup cache fd: %s (%d)", strerror(errno), errno); + return ANEURALNETWORKS_BAD_DATA; + } + cacheFd_ = cacheFd; + + std::copy_n(token, ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, cacheToken_.data()); + + return ANEURALNETWORKS_NO_ERROR; +} + +int Compilation::finish() { + if (cacheFd_ != -1) { + struct stat cacheStat; + if (fstat(cacheFd_, &cacheStat) < 0) { + LOGE("Compilation::finish failed to stat cache file: %s (%d)", strerror(errno), errno); + return ANEURALNETWORKS_BAD_DATA; + } + + off_t size = cacheStat.st_size; + if (size > kNBGMagic.size()) { + LOGD("Compilation::finish read cache file of %zd KB", size / 1024); + cacheBuffer_.resize(size); + + ssize_t readSize = 0; + size_t bufferOffset = 0; + do { + readSize = read(cacheFd_, cacheBuffer_.data() + bufferOffset, size); + bufferOffset += readSize; + } while (readSize > 0); + + if (readSize < 0) { + LOGE("Compilation::finish failed to read cache file: %s (%d)", strerror(errno), + errno); + return ANEURALNETWORKS_BAD_DATA; + } + + // Check if cache file is NBG format. + for (size_t i = 0; i < kNBGMagic.size(); i++) { + char symbol = static_cast(cacheBuffer_[i]); + if (symbol != kNBGMagic[i]) { + cacheBuffer_.clear(); + } + } + } + + cacheState_ = cacheBuffer_.empty() ? CacheState::EMPTY : CacheState::LOADED; + } + + finished_ = true; + return ANEURALNETWORKS_NO_ERROR; +} + +int Compilation::writeToCache(const uint8_t* data, size_t size) { + if (cacheState_ == CacheState::DISABLED) { + LOGE("Compilation::writeToCache cache is disabled"); + return ANEURALNETWORKS_BAD_STATE; + } + + cacheBuffer_.resize(size); + std::copy_n(data, size, cacheBuffer_.data()); + + lseek(cacheFd_, 0, SEEK_SET); + ssize_t writeSize = 0; + size_t bufferOffset = 0; + do { + writeSize = write(cacheFd_, cacheBuffer_.data() + bufferOffset, size - bufferOffset); + bufferOffset += writeSize; + } while (writeSize > 0); + + if (writeSize < 0) { + LOGE("Compilation::writeToCache failed to write cache file: %s (%d)", strerror(errno), + errno); + return ANEURALNETWORKS_BAD_DATA; + } + + cacheState_ = CacheState::LOADED; + return ANEURALNETWORKS_NO_ERROR; +} + +} // namespace vsi::android::sl \ No newline at end of file diff --git a/src/Compilation.h b/src/Compilation.h index aef432d..44945f7 100644 --- a/src/Compilation.h +++ b/src/Compilation.h @@ -1,6 +1,6 @@ /**************************************************************************** * - * Copyright (c) 2022 Vivante Corporation + * Copyright (c) 2024 Vivante Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -21,56 +21,94 @@ * DEALINGS IN THE SOFTWARE. * *****************************************************************************/ + #ifndef VSI_ANDROID_SL_COMPILATION_H #define VSI_ANDROID_SL_COMPILATION_H + +#include +#include #include -#include +#include +#include "Device.h" #include "Model.h" -#include "VsiDevice.h" -#include "slang/type_system.h" -#include "tim/vx/ops.h" +#include "Types.h" +#include "tim/vx/context.h" +#include "tim/vx/graph.h" -namespace vsi { -namespace android { -namespace sl { +namespace vsi::android::sl { + +namespace fs = std::filesystem; class Compilation { public: - Compilation(Model* model) : model_(model) {} - Compilation(Model* model, const std::vector devices) - : model_(model), devices_(devices) {} - int Finish() { - finished_ = true; - return ANEURALNETWORKS_NO_ERROR; - } - Model* GetModel() { return model_; } - Model* GetModel() const { return model_; } - const std::vector& Devices() { return devices_; } - int SetPreference(PreferenceCode preference) { - preference_ = preference; - return ANEURALNETWORKS_NO_ERROR; + enum class CacheState { + DISABLED, + EMPTY, + LOADED, + }; + + static constexpr uint32_t kNumModelCacheFiles = 1; + static constexpr uint32_t kNumDataCacheFiles = 0; + + explicit Compilation(Model* model) + : model_(model), + cacheState_(CacheState::DISABLED), + vxContext_(tim::vx::Context::Create()) {} + explicit Compilation(Model* model, const std::vector>& devices) + : model_(model), + devices_(devices), + cacheState_(CacheState::DISABLED), + vxContext_(tim::vx::Context::Create()) {} + + ~Compilation(); + int finish(); + [[nodiscard]] bool isFinished() const { return finished_; } + + [[nodiscard]] Model* getModel() { return model_; } + [[nodiscard]] const Model* getModel() const { return model_; } + [[nodiscard]] const std::vector>& getDevices() const { + return devices_; } - int SetPriority(PriorityCode priority) { - priority_ = priority; - return ANEURALNETWORKS_NO_ERROR; + + [[nodiscard]] CacheState getCacheState() const { return cacheState_; } + [[nodiscard]] const uint8_t* getCacheData() const { + return cacheBuffer_.empty() ? nullptr : cacheBuffer_.data(); } - int SetTimeout(DurationCode duration) { - duration_ = duration; - return ANEURALNETWORKS_NO_ERROR; + [[nodiscard]] size_t getCacheSize() const { return cacheBuffer_.size(); } + int writeToCache(const uint8_t* data, size_t size); + + int setPreference(PreferenceCode preference); + int setPriority(PriorityCode priority); + int setTimeout(Duration duration); + int setCaching(int fd, const uint8_t* token); + int setCaching(const fs::path& cacheDir, const uint8_t* token); + + void setCompiledGraph(const std::shared_ptr& compiledGraph) { + vxGraph_ = compiledGraph; } + [[nodiscard]] std::shared_ptr getContext() { return vxContext_; } + [[nodiscard]] std::shared_ptr getCompiledGraph() { return vxGraph_; } private: + static constexpr std::array kNBGMagic = {'V', 'P', 'M', 'N'}; + Model* model_; PreferenceCode preference_; PriorityCode priority_; - DurationCode duration_; - const std::vector devices_; - bool finished_{false}; + Duration timeoutDuration_; + std::vector> devices_; + std::shared_ptr vxContext_; + std::shared_ptr vxGraph_; + + CacheState cacheState_; + std::vector cacheBuffer_; + std::array cacheToken_; + int cacheFd_ = -1; + + bool finished_ = false; }; -} // namespace sl -} // namespace android -} // namespace vsi +} // namespace vsi::android::sl #endif \ No newline at end of file diff --git a/src/Device.cpp b/src/Device.cpp new file mode 100644 index 0000000..95e7099 --- /dev/null +++ b/src/Device.cpp @@ -0,0 +1,90 @@ +/**************************************************************************** + * + * Copyright (c) 2024 Vivante Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ + +#include "Device.h" + +#include + +#include "NeuralNetworksSupportLibraryImpl.h" +#include "Utils.h" + +namespace vsi::android::sl { + +Device::Device(std::shared_ptr device) { + name_ = std::string(kNamePrefix) + '-' + std::to_string(device->Id()); + device_ = std::move(device); +} + +Device::PerformanceInfo Device::queryPerformanceInfo(int32_t kind) const { + switch (kind) { + case SL_ANEURALNETWORKS_CAPABILITIES_PERFORMANCE_RELAXED_SCALAR: + case SL_ANEURALNETWORKS_CAPABILITIES_PERFORMANCE_RELAXED_TENSOR: + return { + 0.5F, + 0.5F, + }; + break; + case SL_ANEURALNETWORKS_CAPABILITIES_PERFORMANCE_IF: + case SL_ANEURALNETWORKS_CAPABILITIES_PERFORMANCE_WHILE: + return { + 10.0F, + 10.0F, + }; + break; + default: + LOGW("Device::queryPerformanceInfo passed an invalid performance info code: %d", kind); + return {}; + } +} + +Device::PerformanceInfo Device::queryOperandPerformanceInfo(OperandType operandType) const { + switch (operandType) { + case OperandType::TENSOR_FLOAT32: + case OperandType::TENSOR_FLOAT16: + case OperandType::TENSOR_INT32: + case OperandType::TENSOR_BOOL8: + case OperandType::TENSOR_QUANT8_ASYMM: + case OperandType::TENSOR_QUANT8_SYMM: + case OperandType::TENSOR_QUANT8_ASYMM_SIGNED: + case OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL: + case OperandType::TENSOR_QUANT16_ASYMM: + case OperandType::TENSOR_QUANT16_SYMM: + case OperandType::FLOAT32: + case OperandType::FLOAT16: + case OperandType::INT32: + case OperandType::UINT32: + case OperandType::BOOL: + return { + 0.5F, + 0.5F, + }; + break; + default: + LOGW("Device::queryOperandPerformanceInfo passed an unsupported op type: %d", + static_cast(operandType)); + return {}; + } +} + +} // namespace vsi::android::sl \ No newline at end of file diff --git a/src/Device.h b/src/Device.h new file mode 100644 index 0000000..05073b6 --- /dev/null +++ b/src/Device.h @@ -0,0 +1,82 @@ +/**************************************************************************** + * + * Copyright (c) 2024 Vivante Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ + +#ifndef VSI_ANDROID_SL_VSI_DEVICE_H +#define VSI_ANDROID_SL_VSI_DEVICE_H + +#include + +#include +#include + +#include "Types.h" +#include "tim/vx/platform/platform.h" + +namespace vsi::android::sl { + +class Device { + struct PerformanceInfo { + float execTimeRatio; + float powerUsageRatio; + }; + + public: + static constexpr std::array kSupportedOperandTypes = { + OperandType::FLOAT32, + OperandType::INT32, + OperandType::UINT32, + OperandType::TENSOR_FLOAT32, + OperandType::TENSOR_INT32, + OperandType::TENSOR_QUANT8_ASYMM, + OperandType::BOOL, + OperandType::TENSOR_QUANT16_SYMM, + OperandType::TENSOR_FLOAT16, + OperandType::TENSOR_BOOL8, + OperandType::FLOAT16, + OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL, + OperandType::TENSOR_QUANT16_ASYMM, + OperandType::TENSOR_QUANT8_SYMM, + OperandType::TENSOR_QUANT8_ASYMM_SIGNED, + }; + + explicit Device(std::shared_ptr device); + + [[nodiscard]] std::string_view getName() const { return name_; } + [[nodiscard]] std::string_view getVersion() const { return kVersion; } + [[nodiscard]] int64_t getFeatureLevel() const { return kFeatureLevel; } + [[nodiscard]] PerformanceInfo queryPerformanceInfo(int32_t kind) const; + [[nodiscard]] PerformanceInfo queryOperandPerformanceInfo(OperandType operandType) const; + + private: + std::string name_; + std::shared_ptr device_; + + static constexpr std::string_view kNamePrefix = "vsi-device"; + static constexpr std::string_view kVersion = "0.0.1"; + static constexpr int64_t kFeatureLevel = ANEURALNETWORKS_FEATURE_LEVEL_7; +}; + +} // namespace vsi::android::sl + +#endif \ No newline at end of file diff --git a/src/DeviceManager.cpp b/src/DeviceManager.cpp new file mode 100644 index 0000000..bcfb312 --- /dev/null +++ b/src/DeviceManager.cpp @@ -0,0 +1,38 @@ +/**************************************************************************** + * + * Copyright (c) 2024 Vivante Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ + +#include "DeviceManager.h" + +#include "tim/vx/platform/native.h" + +namespace vsi::android::sl { + +DeviceManager::DeviceManager() { + auto vxDevices = tim::vx::platform::NativeDevice::Enumerate(); + for (auto vxDevice : vxDevices) { + devices_.push_back(std::make_shared(vxDevice)); + } +} + +} // namespace vsi::android::sl \ No newline at end of file diff --git a/src/DeviceManager.h b/src/DeviceManager.h index 3db6738..23c2baf 100644 --- a/src/DeviceManager.h +++ b/src/DeviceManager.h @@ -1,6 +1,6 @@ /**************************************************************************** * - * Copyright (c) 2022 Vivante Corporation + * Copyright (c) 2024 Vivante Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -21,58 +21,33 @@ * DEALINGS IN THE SOFTWARE. * *****************************************************************************/ + #ifndef VSI_ANDROID_SL_DEVICE_MANAGER_H #define VSI_ANDROID_SL_DEVICE_MANAGER_H -#include #include -#include -#include "VsiDevice.h" -#include "tim/vx/platform/native.h" -#ifdef USE_GRPC -#include "tim/vx/platform/grpc/grpc_remote.h" -#endif -namespace vsi { -namespace android { -namespace sl { +#include "Device.h" +#include "tim/vx/platform/platform.h" + +namespace vsi::android::sl { + class DeviceManager { public: - static DeviceManager* Instance() { - if (instance_ == nullptr) { - instance_ = new DeviceManager(); -#ifdef USE_GRPC - char env[32] = {0}; - __system_property_get("vendor.VSI_ASL_PORT", env); - std::string port(env); - auto devices = tim::vx::platform::GRPCRemoteDevice::Enumerate(port); - -#else - auto devices = tim::vx::platform::NativeDevice::Enumerate(); -#endif - for (int i = 0; i < devices.size(); ++i) { - std::string name("vsi-device-" + std::to_string(i)); - std::shared_ptr device = std::make_shared(devices[i], name); - instance_->GetDevices().push_back(device); - } - } - return instance_; + static DeviceManager* get() { + static DeviceManager manager; + return &manager; } - std::vector>& GetDevices() { return devices_; } + + [[nodiscard]] size_t getNumDevices() { return devices_.size(); } + [[nodiscard]] const std::vector>& getDevices() { return devices_; } private: - DeviceManager(){}; - DeviceManager(const DeviceManager&){}; - DeviceManager& operator=(const DeviceManager&) = delete; + DeviceManager(); // NOLINT(modernize-use-equals-delete) - static DeviceManager* instance_; - std::vector> devices_; + std::vector> devices_; }; -DeviceManager* DeviceManager::instance_ = nullptr; - -} // namespace sl -} // namespace android -} // namespace vsi +} // namespace vsi::android::sl #endif \ No newline at end of file diff --git a/src/Event.cpp b/src/Event.cpp new file mode 100644 index 0000000..ca4e2cb --- /dev/null +++ b/src/Event.cpp @@ -0,0 +1,145 @@ +/**************************************************************************** + * + * Copyright (c) 2024 Vivante Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ + +#include "Event.h" + +#include +#include +#include +#include + +#include +#include +#include + +#include "Utils.h" + +namespace vsi::android::sl { + +CallbackEvent::CallbackEvent(TimePoint deadline) { + deadline_ = deadline; + isNotified_ = false; +} + +int CallbackEvent::bindThread(std::thread thread) { + std::lock_guard lock(mutex_); + + if (thread_.joinable()) { + LOGE("CallbackEvent::bindThread a thread is already bound"); + return ANEURALNETWORKS_BAD_STATE; + } + + if (!thread.joinable()) { + LOGE("CallbackEvent::bindThread passed an invalid thread"); + return ANEURALNETWORKS_BAD_STATE; + } + + thread_ = std::move(thread); + return ANEURALNETWORKS_NO_ERROR; +} + +int CallbackEvent::wait() const { + std::unique_lock lock(mutex_); + + if (!cv_.wait_until(lock, deadline_, [this] { return isNotified_; })) { + return ANEURALNETWORKS_MISSED_DEADLINE_TRANSIENT; + } + + if (thread_.joinable()) { + thread_.join(); + } + + return ANEURALNETWORKS_NO_ERROR; +} + +void CallbackEvent::notify() { + { + std::lock_guard lock(mutex_); + if (isNotified_) { + return; + } + isNotified_ = true; + } + cv_.notify_all(); +} + +SyncFenceEvent::SyncFenceEvent(int syncFenceFd) { + if (syncFenceFd > 0) { + syncFenceFd_ = dup(syncFenceFd); + } +} + +SyncFenceEvent::~SyncFenceEvent() { + close(syncFenceFd_); +} + +int SyncFenceEvent::getSyncFenceFd(bool shouldDup) const { + int syncFenceFd = shouldDup ? dup(syncFenceFd_) : syncFenceFd_; + return syncFenceFd; +} + +int SyncFenceEvent::wait() const { + if (syncFenceFd_ == -1) { + // The SL don't support creating sync fence. + return ANEURALNETWORKS_NO_ERROR; + } + // if (syncFenceFd_ < 0) { + // errno = EINVAL; + // return ANEURALNETWORKS_BAD_STATE; + // } + + { + std::lock_guard lock(mutex_); + struct pollfd fds; + fds.fd = syncFenceFd_; + fds.events = POLLIN; + int timeout = -1; + + int ret; + do { + ret = poll(&fds, 1, timeout); + if (ret > 0) { + if ((fds.revents & POLLNVAL) != 0) { + errno = EINVAL; + return ANEURALNETWORKS_BAD_STATE; + } + if ((fds.revents & POLLERR) != 0) { + errno = EINVAL; + return ANEURALNETWORKS_BAD_STATE; + } + // Signaled. + return ANEURALNETWORKS_NO_ERROR; + } + if (ret == 0) { + // Timeouted. + errno = ETIME; + return ANEURALNETWORKS_MISSED_DEADLINE_TRANSIENT; + } + } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); + } + + return ANEURALNETWORKS_BAD_STATE; +} + +} // namespace vsi::android::sl \ No newline at end of file diff --git a/src/Event.h b/src/Event.h index db9d5c3..aa4ce7f 100644 --- a/src/Event.h +++ b/src/Event.h @@ -1,6 +1,6 @@ /**************************************************************************** * - * Copyright (c) 2022 Vivante Corporation + * Copyright (c) 2024 Vivante Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -21,32 +21,56 @@ * DEALINGS IN THE SOFTWARE. * *****************************************************************************/ + #ifndef VSI_ANDROID_SL_EVENT_H #define VSI_ANDROID_SL_EVENT_H -#include "NeuralNetworksTypes.h" +#include +#include +#include + #include "Types.h" -#include "Execution.h" -namespace vsi { -namespace android { -namespace sl { -class Event { +namespace vsi::android::sl { + +class IEvent { + public: + virtual ~IEvent() = default; + virtual int wait() const = 0; // NOLINT(modernize-use-nodiscard) + [[nodiscard]] virtual int getSyncFenceFd(bool shouldDup) const = 0; +}; + +class CallbackEvent : public IEvent { public: - Event() {} - Event(int sync_fence) : sync_fence_(sync_fence) {} - Event(Execution exec, Event eve) : execution_(exec) {} + explicit CallbackEvent(TimePoint deadline); + + [[nodiscard]] int getSyncFenceFd(bool /*shouldDup*/) const override { return -1; } + + int bindThread(std::thread thread); + int wait() const override; + void notify(); + + private: + mutable std::thread thread_; + mutable std::mutex mutex_; + mutable std::condition_variable cv_; + bool isNotified_; + TimePoint deadline_; +}; + +class SyncFenceEvent : public IEvent { + public: + explicit SyncFenceEvent(int syncFenceFd); + ~SyncFenceEvent() override; + + [[nodiscard]] int getSyncFenceFd(bool shouldDup) const override; + + int wait() const override; private: - int sync_fence_{0}; - Execution execution_{nullptr}; - Event depend_{nullptr}; - bool finished_{false}; - void* data_{nullptr}; - size_t length_{0}; + int syncFenceFd_ = -1; + mutable std::mutex mutex_; }; -} // namespace sl -} // namespace android -} // namespace vsi +} // namespace vsi::android::sl #endif \ No newline at end of file diff --git a/src/Execution.cpp b/src/Execution.cpp index 588b93a..f3b6014 100644 --- a/src/Execution.cpp +++ b/src/Execution.cpp @@ -1,6 +1,6 @@ /**************************************************************************** * - * Copyright (c) 2022 Vivante Corporation + * Copyright (c) 2024 Vivante Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -21,99 +21,543 @@ * DEALINGS IN THE SOFTWARE. * *****************************************************************************/ + #include "Execution.h" #include +#include +#include #include "MapOperation.h" #include "Memory.h" #include "Utils.h" #include "tim/transform/layout_inference.h" -#include "tim/vx/ops.h" -#include "tim/vx/platform/native.h" #include "tim/vx/platform/platform.h" -#ifdef USE_GRPC -#include "tim/vx/platform/grpc/grpc_remote.h" -#endif #include "tim/vx/tensor.h" -namespace vsi { -namespace android { -namespace sl { +namespace vsi::android::sl { -std::shared_ptr Execution::CreateTvxIOTensor( - const slang::type::tensor_storage& tensor, tim::vx::TensorAttribute attr) { - tim::vx::DataType data_type = ToTvxDataType(tensor.dtype); - tim::vx::ShapeType shape = tensor.shape; - std::reverse(shape.begin(), shape.end()); - tim::vx::Quantization quantization; - tim::vx::QuantType quant_type = ToTvxQuantType(tensor.qtype); - if (quant_type == tim::vx::QuantType::ASYMMETRIC) { - quantization = tim::vx::Quantization(quant_type, tensor.scale, tensor.zero_point); - } else if (quant_type == tim::vx::QuantType::SYMMETRIC_PER_CHANNEL) { - quantization = - tim::vx::Quantization(quant_type, tensor.channel_dim, tensor.per_channel_scales, - tensor.per_channel_zero_points); - } - tim::vx::TensorSpec spec(data_type, shape, attr, quantization); - return vx_graph_->CreateIOTensor(spec); +Execution::Execution(Compilation* compilation) + : compilation_(compilation), reusable_(false), measure_(false), state_(State::PREPARATION) { + if (auto graph = compilation->getCompiledGraph(); graph != nullptr) { + inputVxTensors_ = graph->InputsTensor(); + outputVxTensors_ = graph->OutputsTensor(); + runtimeGraph_ = std::move(graph); + } + + timeoutDuration_ = Duration::min(); + loopTimeoutDuration_ = Duration::min(); +} + +int Execution::setReusable(bool reusable) { + if (state_ != State::PREPARATION) { + LOGE("Execution::setReusable the execution may only be modified in the preparation state"); + return ANEURALNETWORKS_BAD_STATE; + } + + reusable_ = reusable; + return ANEURALNETWORKS_NO_ERROR; +} + +int Execution::setTimeout(Duration duration) { + if (state_ != State::PREPARATION) { + LOGE("Execution::setTimeout the execution may only be modified in the preparation state"); + return ANEURALNETWORKS_BAD_STATE; + } + + timeoutDuration_ = duration; + return ANEURALNETWORKS_NO_ERROR; +} + +int Execution::setLoopTimeout(Duration duration) { + if (state_ != State::PREPARATION) { + LOGE("Execution::setLoopTimeout the execution may only be modified in the preparation " + "state"); + return ANEURALNETWORKS_BAD_STATE; + } + + loopTimeoutDuration_ = duration; + return ANEURALNETWORKS_NO_ERROR; } -int Execution::SetInput(int32_t index, const ANeuralNetworksOperandType* type, const void* buffer, +int Execution::setMeasureTiming(bool measure) { + if (state_ != State::PREPARATION) { + LOGE("Execution::setMeasureTiming the execution may only be modified in the preparation " + "state"); + return ANEURALNETWORKS_BAD_STATE; + } + + measure_ = measure; + return ANEURALNETWORKS_NO_ERROR; +} + +int Execution::setInput(int32_t index, const ANeuralNetworksOperandType* type, const void* buffer, size_t length) { + if (state_ != State::PREPARATION) { + LOGE("Execution::setInput the execution may only be modified in the preparation state"); + return ANEURALNETWORKS_BAD_STATE; + } + if (type != nullptr) { - Model* model = compilation_->GetModel(); - int32_t input = model->Inputs()[index]; - auto& tensors = model->Tensors(); - auto& input_tensor = tensors[input]; - if (input_tensor.dtype != MapDataType(type->type) || input_tensor.scale != type->scale || - input_tensor.zero_point != type->zeroPoint) { - std::cout << "Get invalid ANeuralNetworksOperandType when setting input." << std::endl; + auto* model = compilation_->getModel(); + uint32_t input = model->getInputs()[index]; + auto& tensorMap = model->getTensorMap(); + auto& inputTensor = tensorMap[input]; + if (inputTensor.dtype != MapDataType(type->type) || + std::fabs(inputTensor.scale - type->scale) > std::numeric_limits::epsilon() || + inputTensor.zero_point != type->zeroPoint) { + LOGE("Execution::setInput get invalid ANeuralNetworksOperandType"); return ANEURALNETWORKS_BAD_DATA; } - inputs_dimension_[index] = + + auto shape = std::vector(type->dimensions, type->dimensions + type->dimensionCount); - input_tensor.shape = inputs_dimension_[index]; + inputTensor.shape = shape; } - Memory* mem = new Memory(); //this mem not hold data in memory, only hold data pointer - mem->SetData(const_cast(buffer)); - mem->SetLength(length); - inputs_memory_[index] = IOMemory(mem, 0, length); - free(mem); + + IOBufferInfo inputBufferInfo = { + .offset = 0, + .length = length, + .buffer = const_cast(buffer), + }; + inputBufferInfos_.push_back(inputBufferInfo); + return ANEURALNETWORKS_NO_ERROR; } -int Execution::SetOutput(int32_t index, const ANeuralNetworksOperandType* type, const void* buffer, - size_t length) { +int Execution::setOutput(int32_t index, const ANeuralNetworksOperandType* type, void* buffer, + size_t length) { + if (state_ != State::PREPARATION) { + LOGE("Execution::setOutput the execution may only be modified in the preparation state"); + return ANEURALNETWORKS_BAD_STATE; + } + if (type != nullptr) { - Model* model = compilation_->GetModel(); - int32_t output = model->Outputs()[index]; - auto& tensors = model->Tensors(); - auto& output_tensor = tensors[output]; - if (output_tensor.dtype != MapDataType(type->type) || output_tensor.scale != type->scale || - output_tensor.zero_point != type->zeroPoint) { - std::cout << "Get invalid ANeuralNetworksOperandType when setting output." << std::endl; + auto* model = compilation_->getModel(); + uint32_t output = model->getOutputs()[index]; + auto& tensorMap = model->getTensorMap(); + auto& outputTensor = tensorMap[output]; + if (outputTensor.dtype != MapDataType(type->type) || + std::fabs(outputTensor.scale - type->scale) > std::numeric_limits::epsilon() || + outputTensor.zero_point != type->zeroPoint) { + LOGE("Execution::setOutput get invalid ANeuralNetworksOperandType"); return ANEURALNETWORKS_BAD_DATA; } - outputs_dimension_[index] = + + auto shape = std::vector(type->dimensions, type->dimensions + type->dimensionCount); - output_tensor.shape = outputs_dimension_[index]; + outputTensor.shape = shape; + } + + IOBufferInfo outputBufferInfo = { + .offset = 0, + .length = length, + .buffer = buffer, + }; + outputBufferInfos_.push_back(outputBufferInfo); + + return ANEURALNETWORKS_NO_ERROR; +} + +int Execution::setInputFromMemory(int32_t index, const ANeuralNetworksOperandType* type, + const IMemory* memory, size_t offset, size_t length) { + if (state_ != State::PREPARATION) { + LOGE("Execution::setInputFromMemory the execution may only be modified in the preparation " + "state"); + return ANEURALNETWORKS_BAD_STATE; + } + + int status = memory->validate(compilation_, IOType::INPUT, index, type, offset, length); + if (status != ANEURALNETWORKS_NO_ERROR) { + LOGE("Execution::setInputFromMemory failed to validate memory"); + return status; + } + + if (type != nullptr && type->dimensionCount != 0) { // implies tensor + auto* model = compilation_->getModel(); + uint32_t input = model->getInputs()[index]; + auto& tensorMap = model->getTensorMap(); + auto& inputTensor = tensorMap[input]; + + auto shape = + std::vector(type->dimensions, type->dimensions + type->dimensionCount); + inputTensor.shape = shape; + } + + IOBufferInfo inputBufferInfo = { + .offset = offset, + .length = length, + .memory = memory, + }; + inputBufferInfos_.push_back(inputBufferInfo); + + return ANEURALNETWORKS_NO_ERROR; +} + +int Execution::setOutputFromMemory(int32_t index, const ANeuralNetworksOperandType* type, + const IMemory* memory, size_t offset, size_t length) { + if (state_ != State::PREPARATION) { + LOGE("Execution::setInputFromMemory the execution may only be modified in the preparation " + "state"); + return ANEURALNETWORKS_BAD_STATE; + } + + int status = memory->validate(compilation_, IOType::OUTPUT, index, type, offset, length); + if (status != ANEURALNETWORKS_NO_ERROR) { + LOGE("Execution::setInputFromMemory failed to validate memory"); + return status; + } + + if (type != nullptr) { + auto* model = compilation_->getModel(); + uint32_t output = model->getOutputs()[index]; + auto& tensorMap = model->getTensorMap(); + auto& outputTensor = tensorMap[output]; + + auto shape = + std::vector(type->dimensions, type->dimensions + type->dimensionCount); + outputTensor.shape = shape; + } + + IOBufferInfo outputBufferInfo = { + .offset = offset, + .length = length, + .memory = memory, + }; + outputBufferInfos_.push_back(outputBufferInfo); + + return ANEURALNETWORKS_NO_ERROR; +} + +int Execution::compute() { + if (state_ == State::COMPLETED && !reusable_) { + LOGE("Execution::compute try to schedule multiple computations for an execution which is " + "not reusable"); + return ANEURALNETWORKS_BAD_STATE; + } + state_ = State::COMPUTATION; + + // This function will be called multiple times, need to judge whether it is the first call. + if (runtimeGraph_ == nullptr) { + int result = compile(); + if (result != ANEURALNETWORKS_NO_ERROR) { + LOGE("Execution::compute failed to compile graph for the 1st time"); + + state_ = State::COMPLETED; + return result; + } + } + + if (inputVxTensors_.size() != inputBufferInfos_.size()) { + LOGE("Execution::compute not all inputs have set buffer or memory"); + state_ = State::COMPLETED; + return ANEURALNETWORKS_BAD_STATE; + } + + if (outputVxTensors_.size() != outputBufferInfos_.size()) { + LOGE("Execution::compute not all outputs have set buffer or memory"); + state_ = State::COMPLETED; + return ANEURALNETWORKS_BAD_STATE; + } + + for (size_t i = 0; i < inputVxTensors_.size(); i++) { + auto inputVxTensor = inputVxTensors_[i]; + auto inputBufferInfo = inputBufferInfos_[i]; + + if (const auto* memory = inputBufferInfo.memory; memory != nullptr) { + if (!memory->isInitialized()) { + LOGE("Execution::compute input memory is uninitialized"); + return ANEURALNETWORKS_OP_FAILED; + } + auto mapping = memory->map(); + void* data = reinterpret_cast(mapping.getData()) + inputBufferInfo.offset; + size_t length = + (inputBufferInfo.length == 0) ? mapping.getSize() : inputBufferInfo.length; + if (!inputVxTensor->CopyDataToTensor(data, length)) { + LOGE("Execution::compute failed to copy input data from memory"); + + state_ = State::COMPLETED; + return ANEURALNETWORKS_BAD_STATE; + } + } else if (const void* buffer = inputBufferInfo.buffer; buffer != nullptr) { + if (!inputVxTensor->CopyDataToTensor(buffer, inputBufferInfo.length)) { + LOGE("Execution::compute failed to copy input data from user buffer"); + + state_ = State::COMPLETED; + return ANEURALNETWORKS_BAD_STATE; + } + } else { + LOGW("Execution::compute input:%zu has null buffer or memory", i); + continue; + } + } + + if (!runtimeGraph_->Run()) { + LOGE("Execution::compute failed to run tim-vx graph"); + + state_ = State::COMPLETED; + return ANEURALNETWORKS_BAD_STATE; + } + + for (size_t i = 0; i < outputVxTensors_.size(); i++) { + auto outputVxTensor = outputVxTensors_[i]; + auto outputBufferInfo = outputBufferInfos_[i]; + + if (const auto* memory = outputBufferInfo.memory; memory != nullptr) { + auto mapping = memory->map(); + void* data = reinterpret_cast(mapping.getData()) + outputBufferInfo.offset; + if (!outputVxTensor->CopyDataFromTensor(data)) { + LOGE("Execution::compute failed to copy output data to memory"); + + state_ = State::COMPLETED; + return ANEURALNETWORKS_BAD_STATE; + } + + const_cast(memory)->setInitialized(true); + } else if (void* buffer = outputBufferInfo.buffer; buffer != nullptr) { + if (!outputVxTensor->CopyDataFromTensor(buffer)) { + LOGE("Execution::compute failed to copy output data to user buffer"); + + state_ = State::COMPLETED; + return ANEURALNETWORKS_BAD_STATE; + } + } else { + LOGE("Execution::compute output:%zu has null buffer or memory", i); + state_ = State::COMPLETED; + return ANEURALNETWORKS_BAD_STATE; + } + } + + state_ = State::COMPLETED; + return ANEURALNETWORKS_NO_ERROR; +} + +CallbackEvent* Execution::createSyncEvent() { + auto deadline = timeoutDuration_ != Duration::min() ? Clock::now() + timeoutDuration_ + : TimePoint::max(); + auto* event = new CallbackEvent(deadline); + syncEvent_ = event; + return event; +} + +int Execution::startCompute() { + if (state_ == State::COMPLETED && !reusable_) { + LOGE("Execution::startCompute try to schedule multiple computations for an execution which " + "is " + "not reusable"); + + state_ = State::COMPLETED; + return ANEURALNETWORKS_BAD_STATE; } - Memory* mem = new Memory(); - mem->SetData(const_cast(buffer)); - mem->SetLength(length); - outputs_memory_[index] = IOMemory(mem, 0, length); - free(mem); + state_ = State::COMPUTATION; + + // This function will be called multiple times, need to judge whether it is the first call. + if (runtimeGraph_ == nullptr) { + int result = compile(); + if (result != ANEURALNETWORKS_NO_ERROR) { + LOGE("Execution::startCompute failed to compile graph for the 1st time"); + + state_ = State::COMPLETED; + return result; + } + } + + if (inputVxTensors_.size() != inputBufferInfos_.size()) { + LOGE("Execution::startCompute not all inputs have set buffer or memory"); + state_ = State::COMPLETED; + return ANEURALNETWORKS_BAD_STATE; + } + + if (outputVxTensors_.size() != outputBufferInfos_.size()) { + LOGE("Execution::startCompute not all outputs have set buffer or memory"); + state_ = State::COMPLETED; + return ANEURALNETWORKS_BAD_STATE; + } + + auto asyncThread = std::thread([this]() { + for (size_t i = 0; i < inputVxTensors_.size(); i++) { + auto inputVxTensor = inputVxTensors_[i]; + auto inputBufferInfo = inputBufferInfos_[i]; + + if (const auto* memory = inputBufferInfo.memory; memory != nullptr) { + if (!memory->isInitialized()) { + LOGE("Execution::startCompute input memory is uninitialized"); + return ANEURALNETWORKS_OP_FAILED; + } + + auto mapping = memory->map(); + void* data = reinterpret_cast(mapping.getData()) + inputBufferInfo.offset; + size_t length = + (inputBufferInfo.length == 0) ? mapping.getSize() : inputBufferInfo.length; + if (!inputVxTensor->CopyDataToTensor(data, length)) { + LOGE("Execution::startCompute failed to copy input data from memory"); + + state_ = State::COMPLETED; + return ANEURALNETWORKS_OP_FAILED; + } + } else if (const void* buffer = inputBufferInfo.buffer; buffer != nullptr) { + if (!inputVxTensor->CopyDataToTensor(buffer, inputBufferInfo.length)) { + LOGE("Execution::startCompute failed to copy input data from user buffer"); + + state_ = State::COMPLETED; + return ANEURALNETWORKS_OP_FAILED; + } + } else { + LOGW("Execution::startCompute input:%zu has null buffer or memory", i); + continue; + } + } + + if (!runtimeGraph_->Run()) { + LOGE("Execution::startCompute failed to run tim-vx graph"); + + state_ = State::COMPLETED; + return ANEURALNETWORKS_OP_FAILED; + } + + for (size_t i = 0; i < outputVxTensors_.size(); i++) { + auto outputVxTensor = outputVxTensors_[i]; + auto outputBufferInfo = outputBufferInfos_[i]; + + if (const auto* memory = outputBufferInfo.memory; memory != nullptr) { + auto mapping = memory->map(); + void* data = + reinterpret_cast(mapping.getData()) + outputBufferInfo.offset; + if (!outputVxTensor->CopyDataFromTensor(data)) { + LOGE("Execution::startCompute failed to copy output data to memory"); + + state_ = State::COMPLETED; + return ANEURALNETWORKS_OP_FAILED; + } + + const_cast(memory)->setInitialized(true); + } else if (void* buffer = outputBufferInfo.buffer; buffer != nullptr) { + if (!outputVxTensor->CopyDataFromTensor(buffer)) { + LOGE("Execution::startCompute failed to copy output data to user buffer"); + + state_ = State::COMPLETED; + return ANEURALNETWORKS_OP_FAILED; + } + } else { + LOGE("Execution::startCompute output:%zu has null buffer or memory", i); + state_ = State::COMPLETED; + return ANEURALNETWORKS_OP_FAILED; + } + } + + syncEvent_->notify(); + state_ = State::COMPLETED; + return ANEURALNETWORKS_NO_ERROR; + }); + + return syncEvent_->bindThread(std::move(asyncThread)); +} + +int Execution::getDuration(DurationCode durationCode, uint64_t* duration) const { + if (state_ != State::COMPLETED) { + LOGE("Execution::getDuration called when the execution is not in the completed state"); + return ANEURALNETWORKS_BAD_STATE; + } + + switch (durationCode) { + case ANEURALNETWORKS_DURATION_ON_HARDWARE: + case ANEURALNETWORKS_DURATION_IN_DRIVER: + case ANEURALNETWORKS_FENCED_DURATION_ON_HARDWARE: + case ANEURALNETWORKS_FENCED_DURATION_IN_DRIVER: + break; + default: + LOGE("Execution::getDuration passed an invalid duration code"); + return ANEURALNETWORKS_BAD_DATA; + } + + // The driver does not support timing measurement for now. + *duration = std::numeric_limits::max(); + return ANEURALNETWORKS_NO_ERROR; } -int Execution::MapOperations(const std::vector>& op_creators, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { - for (const auto op_creator : op_creators) { - const std::vector& inputs = op_creator->Inputs(); - const std::vector& outputs = op_creator->Outputs(); +int Execution::getOutputOperandRank(int32_t index, uint32_t* rank) const { + if (state_ != State::COMPLETED) { + LOGE("Execution::getOutputOperandRank called when the execution is not in the completed " + "state"); + return ANEURALNETWORKS_BAD_STATE; + } + + auto* model = compilation_->getModel(); + uint32_t output = model->getOutputs()[index]; + auto& tensorMap = model->getTensorMap(); + auto outputTensor = tensorMap[output]; + *rank = outputTensor.shape.size(); + return ANEURALNETWORKS_NO_ERROR; +} + +int Execution::getOutputOperandDimensions(int32_t index, uint32_t* dimensions) const { + if (state_ != State::COMPLETED) { + LOGE("Execution::getOutputOperandDimensions called when the execution is not in the " + "completed state"); + return ANEURALNETWORKS_BAD_STATE; + } + + auto* model = compilation_->getModel(); + uint32_t output = model->getOutputs()[index]; + auto& tensorMap = model->getTensorMap(); + auto outputTensor = tensorMap[output]; + + const auto& shape = outputTensor.shape; + for (size_t i = 0; i < shape.size(); ++i) { + dimensions[i] = outputTensor.shape[i]; + } + return ANEURALNETWORKS_NO_ERROR; +} + +Execution::VxTensor Execution::createVxConstantTensor(const slang::type::tensor_storage& tensor, + Model::OperandValueInfo valueInfo) { + tim::vx::DataType dtype = ToTvxDataType(tensor.dtype); + tim::vx::ShapeType shape = tensor.shape; + std::reverse(shape.begin(), shape.end()); + tim::vx::Quantization quantization; + tim::vx::QuantType qtype = ToTvxQuantType(tensor.qtype); + if (qtype == tim::vx::QuantType::ASYMMETRIC) { + quantization = tim::vx::Quantization(qtype, tensor.scale, tensor.zero_point); + } else if (qtype == tim::vx::QuantType::SYMMETRIC_PER_CHANNEL) { + quantization = tim::vx::Quantization(qtype, tensor.channel_dim, tensor.per_channel_scales, + tensor.per_channel_zero_points); + } + tim::vx::TensorSpec spec(dtype, shape, tim::vx::TensorAttribute::CONSTANT, quantization); + + if (const auto* memory = valueInfo.memory; memory != nullptr) { + auto mapping = memory->map(); + const void* data = reinterpret_cast(mapping.getData()) + valueInfo.offset; + return vxGraph_->CreateTensor(spec, data); + } + + return vxGraph_->CreateTensor(spec, valueInfo.buffer); +} + +Execution::VxTensor Execution::createVxIOTensor(const slang::type::tensor_storage& tensor, + tim::vx::TensorAttribute attr) { + tim::vx::DataType dtype = ToTvxDataType(tensor.dtype); + tim::vx::ShapeType shape = tensor.shape; + std::reverse(shape.begin(), shape.end()); + tim::vx::Quantization quantization; + tim::vx::QuantType qtype = ToTvxQuantType(tensor.qtype); + if (qtype == tim::vx::QuantType::ASYMMETRIC) { + quantization = tim::vx::Quantization(qtype, tensor.scale, tensor.zero_point); + } else if (qtype == tim::vx::QuantType::SYMMETRIC_PER_CHANNEL) { + quantization = tim::vx::Quantization(qtype, tensor.channel_dim, tensor.per_channel_scales, + tensor.per_channel_zero_points); + } + tim::vx::TensorSpec spec(dtype, shape, attr, quantization); + return vxGraph_->CreateIOTensor(spec); +} + +int Execution::mapOperations(const std::vector>& opCreators, + const TensorMap& tensorMap, const ScalarMap& scalarMap) { + for (const auto& opCreator : opCreators) { + const auto& inputs = opCreator->getInputs(); + const auto& outputs = opCreator->getOutputs(); int result = ANEURALNETWORKS_NO_ERROR; - switch (op_creator->Type()) { + switch (opCreator->getType()) { case ANEURALNETWORKS_ABS: case ANEURALNETWORKS_ARGMAX: case ANEURALNETWORKS_ARGMIN: @@ -164,386 +608,283 @@ int Execution::MapOperations(const std::vector>& op_c case ANEURALNETWORKS_TANH: case ANEURALNETWORKS_TILE: case ANEURALNETWORKS_TRANSPOSE: - result = MapOneInputOneOutput(vx_graph_, op_creator, vx_tensors_, tensor_map, - scalar_map, inputs, outputs); + result = MapOneInputOneOutput(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, + inputs, outputs); break; case ANEURALNETWORKS_ADD: - result = MapEltwise(vx_graph_, op_creator, vx_tensors_, tensor_map, scalar_map, - inputs, outputs); + result = MapEltwise(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, inputs, + outputs); break; case ANEURALNETWORKS_AVERAGE_POOL_2D: - result = MapPool2D(vx_graph_, op_creator, vx_tensors_, tensor_map, scalar_map, - inputs, outputs); + result = MapPool2D(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, inputs, + outputs); break; case ANEURALNETWORKS_BATCH_MATMUL: - result = MapBatchMatmul(vx_graph_, op_creator, vx_tensors_, tensor_map, scalar_map, + result = MapBatchMatmul(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, inputs, outputs); break; case ANEURALNETWORKS_CONCATENATION: - result = MapConcatenation(vx_graph_, op_creator, vx_tensors_, tensor_map, - scalar_map, inputs, outputs); + result = MapConcatenation(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, + inputs, outputs); break; case ANEURALNETWORKS_CONV_2D: - result = MapConv2D(vx_graph_, op_creator, vx_tensors_, tensor_map, scalar_map, - inputs, outputs); + result = MapConv2D(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, inputs, + outputs); break; case ANEURALNETWORKS_DEPTHWISE_CONV_2D: - result = MapDepthwiseConv2D(vx_graph_, op_creator, vx_tensors_, tensor_map, - scalar_map, inputs, outputs); + result = MapDepthwiseConv2D(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, + inputs, outputs); break; case ANEURALNETWORKS_DIV: - result = MapEltwise(vx_graph_, op_creator, vx_tensors_, tensor_map, scalar_map, - inputs, outputs); + result = MapEltwise(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, inputs, + outputs); break; case ANEURALNETWORKS_EMBEDDING_LOOKUP: - result = MapEmbeddingLookup(vx_graph_, op_creator, vx_tensors_, tensor_map, - scalar_map, inputs, outputs); + result = MapEmbeddingLookup(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, + inputs, outputs); break; case ANEURALNETWORKS_EQUAL: - result = MapRelationalOp(vx_graph_, op_creator, vx_tensors_, tensor_map, scalar_map, + result = MapRelationalOp(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, inputs, outputs); break; case ANEURALNETWORKS_FULLY_CONNECTED: - result = MapFullyConnected(vx_graph_, op_creator, vx_tensors_, tensor_map, - scalar_map, inputs, outputs); + result = MapFullyConnected(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, + inputs, outputs); break; case ANEURALNETWORKS_GATHER: - result = MapGather(vx_graph_, op_creator, vx_tensors_, tensor_map, scalar_map, - inputs, outputs); + result = MapGather(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, inputs, + outputs); break; case ANEURALNETWORKS_GREATER: - result = MapRelationalOp(vx_graph_, op_creator, vx_tensors_, tensor_map, scalar_map, + result = MapRelationalOp(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, inputs, outputs); break; case ANEURALNETWORKS_GREATER_EQUAL: - result = MapRelationalOp(vx_graph_, op_creator, vx_tensors_, tensor_map, scalar_map, + result = MapRelationalOp(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, inputs, outputs); break; case ANEURALNETWORKS_GROUPED_CONV_2D: - result = MapGroupedConv2d(vx_graph_, op_creator, vx_tensors_, tensor_map, - scalar_map, inputs, outputs); + result = MapGroupedConv2d(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, + inputs, outputs); break; case ANEURALNETWORKS_HASHTABLE_LOOKUP: - result = MapHashtableLookup(vx_graph_, op_creator, vx_tensors_, tensor_map, - scalar_map, inputs, outputs); + result = MapHashtableLookup(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, + inputs, outputs); break; case ANEURALNETWORKS_INSTANCE_NORMALIZATION: - result = MapInstanceNormalization(vx_graph_, op_creator, vx_tensors_, tensor_map, - scalar_map, inputs, outputs); + result = MapInstanceNormalization(vxGraph_, opCreator, vxTensors_, tensorMap, + scalarMap, inputs, outputs); break; case ANEURALNETWORKS_LESS: - result = MapRelationalOp(vx_graph_, op_creator, vx_tensors_, tensor_map, scalar_map, + result = MapRelationalOp(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, inputs, outputs); break; case ANEURALNETWORKS_LESS_EQUAL: - result = MapRelationalOp(vx_graph_, op_creator, vx_tensors_, tensor_map, scalar_map, + result = MapRelationalOp(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, inputs, outputs); break; case ANEURALNETWORKS_LOGICAL_AND: - result = MapLogicalAndOr(vx_graph_, op_creator, vx_tensors_, tensor_map, scalar_map, + result = MapLogicalAndOr(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, inputs, outputs); break; case ANEURALNETWORKS_LOGICAL_OR: - result = MapLogicalAndOr(vx_graph_, op_creator, vx_tensors_, tensor_map, scalar_map, + result = MapLogicalAndOr(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, inputs, outputs); break; case ANEURALNETWORKS_L2_POOL_2D: - result = MapPool2D(vx_graph_, op_creator, vx_tensors_, tensor_map, scalar_map, - inputs, outputs); + result = MapPool2D(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, inputs, + outputs); break; case ANEURALNETWORKS_MAX_POOL_2D: - result = MapPool2D(vx_graph_, op_creator, vx_tensors_, tensor_map, scalar_map, - inputs, outputs); + result = MapPool2D(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, inputs, + outputs); break; case ANEURALNETWORKS_MAXIMUM: - result = MapEltwiseWithNoAct(vx_graph_, op_creator, vx_tensors_, tensor_map, - scalar_map, inputs, outputs); + result = MapEltwiseWithNoAct(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, + inputs, outputs); break; case ANEURALNETWORKS_MINIMUM: - result = MapEltwiseWithNoAct(vx_graph_, op_creator, vx_tensors_, tensor_map, - scalar_map, inputs, outputs); + result = MapEltwiseWithNoAct(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, + inputs, outputs); break; case ANEURALNETWORKS_MUL: - result = MapEltwise(vx_graph_, op_creator, vx_tensors_, tensor_map, scalar_map, - inputs, outputs); + result = MapEltwise(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, inputs, + outputs); break; case ANEURALNETWORKS_NOT_EQUAL: - result = MapRelationalOp(vx_graph_, op_creator, vx_tensors_, tensor_map, scalar_map, + result = MapRelationalOp(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, inputs, outputs); break; case ANEURALNETWORKS_PACK: - result = MapPack(vx_graph_, op_creator, vx_tensors_, tensor_map, scalar_map, inputs, + result = MapPack(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, inputs, outputs); break; case ANEURALNETWORKS_POW: - result = MapEltwiseWithNoAct(vx_graph_, op_creator, vx_tensors_, tensor_map, - scalar_map, inputs, outputs); + result = MapEltwiseWithNoAct(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, + inputs, outputs); break; case ANEURALNETWORKS_PRELU: - result = MapPrelu(vx_graph_, op_creator, vx_tensors_, tensor_map, scalar_map, - inputs, outputs); + result = MapPrelu(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, inputs, + outputs); break; case ANEURALNETWORKS_ROI_ALIGN: - result = MapRoi(vx_graph_, op_creator, vx_tensors_, tensor_map, scalar_map, inputs, + result = MapRoi(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, inputs, outputs); break; // case ANEURALNETWORKS_ROI_POOLING: // not support roi_pooling at present - // result = MapRoi(vx_graph_, op_creator, vx_tensors_, tensor_map, scalar_map, inputs, + // result = MapRoi(vx_graph_, op_creator, vx_tensors_, tensor_map, scalar_map, + // inputs, // outputs); // break; case ANEURALNETWORKS_SELECT: - result = MapSelect(vx_graph_, op_creator, vx_tensors_, tensor_map, scalar_map, - inputs, outputs); + result = MapSelect(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, inputs, + outputs); break; case ANEURALNETWORKS_SPLIT: - result = MapSplit(vx_graph_, op_creator, vx_tensors_, tensor_map, scalar_map, - inputs, outputs); + result = MapSplit(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, inputs, + outputs); break; case ANEURALNETWORKS_SUB: - result = MapEltwise(vx_graph_, op_creator, vx_tensors_, tensor_map, scalar_map, - inputs, outputs); + result = MapEltwise(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, inputs, + outputs); break; case ANEURALNETWORKS_SVDF: - result = MapSvdf(vx_graph_, op_creator, vx_tensors_, tensor_map, scalar_map, inputs, + result = MapSvdf(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, inputs, outputs); break; case ANEURALNETWORKS_TOPK_V2: - result = MapTopK(vx_graph_, op_creator, vx_tensors_, tensor_map, scalar_map, inputs, + result = MapTopK(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, inputs, outputs); break; case ANEURALNETWORKS_TRANSPOSE_CONV_2D: - result = MapTransposeConv2d(vx_graph_, op_creator, vx_tensors_, tensor_map, - scalar_map, inputs, outputs); + result = MapTransposeConv2d(vxGraph_, opCreator, vxTensors_, tensorMap, scalarMap, + inputs, outputs); break; default: - std::cout << "Op type: " << op_creator->Type() << " is not supported" << std::endl; + LOGE("Execution::mapOperation op type: %d not supported", opCreator->getType()); result = ANEURALNETWORKS_BAD_STATE; } - if (result != ANEURALNETWORKS_NO_ERROR) return result; + if (result != ANEURALNETWORKS_NO_ERROR) { + return result; + } } return ANEURALNETWORKS_NO_ERROR; } -int Execution::SetInputFromMemory(int32_t index, const ANeuralNetworksOperandType* type, - const Memory* memory, size_t offset, size_t length) { - if (type != nullptr) { - Model* model = compilation_->GetModel(); - int32_t input = model->Inputs()[index]; - auto& tensors = model->Tensors(); - auto& input_tensor = tensors[input]; - if (input_tensor.dtype != MapDataType(type->type) || input_tensor.scale != type->scale || - input_tensor.zero_point != type->zeroPoint) { - std::cout << "Get invalid ANeuralNetworksOperandType when setting input." << std::endl; - return ANEURALNETWORKS_BAD_DATA; +int Execution::compile() { + auto context = compilation_->getContext(); + const auto* model = compilation_->getModel(); + const auto& inputs = model->getInputs(); + const auto& outputs = model->getOutputs(); + const auto& tensorMap = model->getTensorMap(); + const auto& scalarMap = model->getScalarMap(); + const auto& operandValuesInfoMap = model->getOperandValueInfos(); + const auto& operations = model->getOpCreators(); + + // Check for output tensor with dynamic axis. + for (size_t i = 0; i < outputs.size(); i++) { + uint32_t output = outputs[i]; + auto outputTensor = tensorMap.at(output); + + bool hasDynamicAxis = std::any_of(outputTensor.shape.begin(), outputTensor.shape.end(), + [](uint32_t s) { return s == 0; }); + if (hasDynamicAxis) { + LOGE("Execution::compile output:%zu has dynamic axis which is not supported", i); + return ANEURALNETWORKS_OP_FAILED; } - inputs_dimension_[index] = - std::vector(type->dimensions, type->dimensions + type->dimensionCount); - input_tensor.shape = inputs_dimension_[index]; - } - auto mem = const_cast(memory); - if (mem->IsCreateFromAHWB()) { - mem->PraseAHWB(mem->AHWB()); } - if (mem->IsCreateFromDesc()) { - length = memory->Length(); - } - inputs_memory_[index] = IOMemory(memory, offset, length); - return ANEURALNETWORKS_NO_ERROR; -} + vxGraph_ = context->CreateGraph(); -int Execution::SetOutputFromMemory(int32_t index, const ANeuralNetworksOperandType* type, - const Memory* memory, size_t offset, size_t length) { - if (type != nullptr) { - Model* model = compilation_->GetModel(); - int32_t output = model->Outputs()[index]; - auto& tensors = model->Tensors(); - auto& output_tensor = tensors[output]; - if (output_tensor.dtype != MapDataType(type->type) || output_tensor.scale != type->scale || - output_tensor.zero_point != type->zeroPoint) { - std::cout << "Get invalid ANeuralNetworksOperandType when setting output." << std::endl; - return ANEURALNETWORKS_BAD_DATA; - } - outputs_dimension_[index] = - std::vector(type->dimensions, type->dimensions + type->dimensionCount); - output_tensor.shape = outputs_dimension_[index]; + // Create I/O vx tensors. + for (uint32_t input : inputs) { + auto inputTensor = tensorMap.at(input); + vxTensors_[input] = createVxIOTensor(inputTensor, tim::vx::TensorAttribute::INPUT); } - if (memory->IsCreateFromDesc()) { - length = memory->Length(); + for (uint32_t output : outputs) { + auto outputTensor = tensorMap.at(output); + vxTensors_[output] = createVxIOTensor(outputTensor, tim::vx::TensorAttribute::OUTPUT); } - outputs_memory_[index] = IOMemory(memory, offset, length); - return ANEURALNETWORKS_NO_ERROR; -} -int Execution::Compute() { - Model* model = compilation_->GetModel(); - // This function will be called multiple times, and need to judge whether it is the first call - if (vx_graph_ == nullptr) { - output_order_.clear(); - out_memory_order_.clear(); // Reset this two vector for new graphs - vx_graph_ = vx_context_->CreateGraph(); - auto tensor_map = model->Tensors(); - auto scalar_map = model->Scalars(); - auto operations = model->Operations(); - for (uint32_t in : model->Inputs()) { - vx_tensors_[in] = CreateTvxIOTensor(tensor_map[in], tim::vx::TensorAttribute::INPUT); + if (compilation_->getCacheState() == Compilation::CacheState::LOADED) { + auto nbg = vxGraph_->CreateOperation( + reinterpret_cast(compilation_->getCacheData()), inputs.size(), + outputs.size()); + for (uint32_t input : inputs) { + auto inputTensor = vxTensors_[input]; + nbg->BindInput(inputTensor); + } + for (uint32_t output : outputs) { + auto outputTensor = vxTensors_[output]; + nbg->BindOutput(outputTensor); } - for (uint32_t out : model->Outputs()) { - vx_tensors_[out] = CreateTvxIOTensor(tensor_map[out], tim::vx::TensorAttribute::OUTPUT); + } else { + // Create constant vx tensors. + for (const auto& [operandIndex, tensor] : tensorMap) { + if (auto it = operandValuesInfoMap.find(operandIndex); + it != operandValuesInfoMap.end()) { + auto [_, valueInfo] = *it; + if (valueInfo.buffer == nullptr && valueInfo.memory == nullptr) { + valueInfo.buffer = model->getConstantCopyData(valueInfo.offset); + } + vxTensors_[operandIndex] = createVxConstantTensor(tensor, valueInfo); + } } - int result = MapOperations(operations, tensor_map, scalar_map); + + int result = mapOperations(operations, tensorMap, scalarMap); if (result != ANEURALNETWORKS_NO_ERROR) { - std::cout << "map operation fail" << std::endl; + LOGE("Execution::compile failed to map operations"); return result; } - layout_infered_ = tim::transform::LayoutInference(vx_graph_, vx_context_); - auto infer_outputs = layout_infered_.first->OutputsTensor(); - auto src_outputs = vx_graph_->OutputsTensor(); - auto graph_io_map = layout_infered_.second; - // Confirm output order between infer_graph and src_graph - for (int i = 0; i < src_outputs.size(); ++i) { - auto infer_out = graph_io_map[src_outputs[i]]; - for (int j = 0; j < infer_outputs.size(); ++j) { - if (infer_out == infer_outputs[j]) { - output_order_.push_back(model->Outputs()[j]); - out_memory_order_.push_back(j); - } - } - } -#ifdef RUN_NBG - // compile graph to executable, just use the first device - auto device = compilation_->Devices()[0]->Device(); -#ifdef USE_GRPC - executor_ = std::make_shared(device); -#else - executor_ = std::make_shared(device); -#endif - executable_ = executor_->Compile(layout_infered_.first); - input_handles_.clear(); - output_handles_.clear(); - for (uint32_t i : model->Inputs()) { - auto input_handle = executable_->AllocateTensor(vx_tensors_[i]->GetSpec()); - executable_->SetInput(input_handle); - input_handles_.push_back(input_handle); + auto [vxGraph, _] = tim::transform::LayoutInference(vxGraph_, context); + auto inputTensors = vxGraph->InputsTensor(); + auto outputTensors = vxGraph->OutputsTensor(); + + for (size_t i = 0; i < inputs.size(); i++) { + uint32_t input = inputs[i]; + vxTensors_[input] = inputTensors[i]; } - for (uint32_t o : output_order_) { - auto output_handle = executable_->AllocateTensor(vx_tensors_[o]->GetSpec()); - executable_->SetOutput(output_handle); - output_handles_.push_back(output_handle); + for (size_t i = 0; i < outputs.size(); i++) { + uint32_t output = outputs[i]; + vxTensors_[output] = outputTensors[i]; } -#endif - - auto inputs = model->Inputs(); - for (int i = 0; i < inputs.size(); ++i) { -#ifdef RUN_NBG - auto input_handle = input_handles_[i]; - auto io_memory = inputs_memory_[i]; -#else - uint32_t index = inputs[i]; - auto src_input_tensor = vx_tensors_[index]; - auto io_memory = inputs_memory_[i]; -#endif - auto memory = io_memory.memory; - size_t offset = io_memory.offset; - size_t length = io_memory.length; - if (memory != nullptr) { - if (offset + length > memory->Length()) { - std::cout << "input memory is out of range." << std::endl; - return ANEURALNETWORKS_OUT_OF_MEMORY; - } - uint8_t* data = reinterpret_cast(memory->Data()); -#ifdef RUN_NBG - if (!input_handle->CopyDataToTensor(reinterpret_cast(data + offset), - length)) { - std::cout << "copy data to tensor fail." << std::endl; - return ANEURALNETWORKS_BAD_STATE; - } -#else - auto infered_input_tensor = layout_infered_.second[src_input_tensor]; - if (infered_input_tensor) { - if (!infered_input_tensor->CopyDataToTensor( - reinterpret_cast(data + offset), length)) { - std::cout << "copy data to tensor fail." << std::endl; - return ANEURALNETWORKS_BAD_STATE; - } - } else { - std::cout << "tensor in source graph removed before do layout " - "inference - if zero sized tensor involved" - << std::endl; - } -#endif + + if (compilation_->getCacheState() == Compilation::CacheState::EMPTY) { + size_t nbgSize; + if (!vxGraph->CompileToBinary(nullptr, &nbgSize)) { + LOGE("Execution::compile failed to compile tim-vx graph"); + return ANEURALNETWORKS_OP_FAILED; } - } -#ifdef RUN_NBG - executable_->Submit(executable_); -#endif - } else if (reusable_ == false) { - std::cout << "try to schedule multiple computations for a Execution which is not reusable" - << std::endl; - return ANEURALNETWORKS_BAD_STATE; - } -#ifdef RUN_NBG - executor_->Trigger(); -#else - // Run graph - if (!layout_infered_.first->Run()) { - std::cout << "failed to run graph." << std::endl; - return ANEURALNETWORKS_BAD_STATE; - } -#endif - // copy output to memory - for (int i = 0; i < output_order_.size(); ++i) { -#ifdef RUN_NBG - auto output_handle = output_handles_[i]; -#else - uint32_t index = output_order_[i]; - auto src_output_tensor = vx_tensors_[index]; -#endif - auto io_memory = outputs_memory_[out_memory_order_[i]]; - auto memory = io_memory.memory; - size_t offset = io_memory.offset; - size_t length = io_memory.length; - if (offset + length > memory->Length()) { - std::cout << "output memory is out of range." << std::endl; - return ANEURALNETWORKS_BAD_DATA; - } - uint8_t* data = reinterpret_cast(memory->Data()); -#ifdef RUN_NBG - if (!output_handle->CopyDataFromTensor(reinterpret_cast(data + offset))) { - std::cout << "copy data from tensor fail." << std::endl; - return ANEURALNETWORKS_BAD_STATE; - } -#else - auto infered_output_tesnor = layout_infered_.second[src_output_tensor]; - if (infered_output_tesnor) { - if (!infered_output_tesnor->CopyDataFromTensor( - reinterpret_cast(data + offset))) { - std::cout << "copy data from tensor fail." << std::endl; - return ANEURALNETWORKS_BAD_STATE; + std::vector nbgBuffer(nbgSize); + if (!vxGraph->CompileToBinary(nbgBuffer.data(), &nbgSize)) { + LOGE("Execution::compile failed to compile tim-vx graph"); + return ANEURALNETWORKS_OP_FAILED; } - } else { - std::cout << "Output tensor missing: report issue to VSI" << std::endl; + + compilation_->writeToCache(nbgBuffer.data(), nbgSize); } -#endif + + vxGraph_ = vxGraph; } - return ANEURALNETWORKS_NO_ERROR; -} -int Execution::GetOutputOperandRank(int32_t index, uint32_t* rank) { - *rank = outputs_dimension_[index].size(); - return ANEURALNETWORKS_NO_ERROR; -} + if (!vxGraph_->Compile()) { + LOGE("Execution::compile failed to compile tim-vx graph"); + return ANEURALNETWORKS_OP_FAILED; + } -int Execution::GetOutputOperandDimensions(int32_t index, uint32_t* dimensions) { - auto dim = outputs_dimension_[index]; - for (int i = 0; i < dim.size(); ++i) { - dimensions[i] = dim[i]; + compilation_->setCompiledGraph(vxGraph_); + + for (uint32_t input : inputs) { + inputVxTensors_.push_back(vxTensors_[input]); + } + for (uint32_t output : outputs) { + outputVxTensors_.push_back(vxTensors_[output]); } + runtimeGraph_ = vxGraph_; + return ANEURALNETWORKS_NO_ERROR; } -} // namespace sl -} // namespace android -} // namespace vsi \ No newline at end of file +} // namespace vsi::android::sl \ No newline at end of file diff --git a/src/Execution.h b/src/Execution.h index 341050e..f711b0c 100644 --- a/src/Execution.h +++ b/src/Execution.h @@ -1,6 +1,6 @@ /**************************************************************************** * - * Copyright (c) 2022 Vivante Corporation + * Copyright (c) 2024 Vivante Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -21,91 +21,105 @@ * DEALINGS IN THE SOFTWARE. * *****************************************************************************/ + #ifndef VSI_ANDROID_SL_EXECUTION_H #define VSI_ANDROID_SL_EXECUTION_H -#include + +#include +#include +#include #include "Compilation.h" +#include "Event.h" #include "Memory.h" +#include "Types.h" #include "tim/vx/context.h" #include "tim/vx/graph.h" -namespace vsi { -namespace android { -namespace sl { +namespace vsi::android::sl { class Execution { public: - Execution() {} - Execution(Compilation* compilation) : compilation_(compilation) { - auto model = compilation_->GetModel(); - inputs_memory_.resize(model->Inputs().size()); - outputs_memory_.resize(model->Outputs().size()); - inputs_dimension_.resize(model->Inputs().size()); - outputs_dimension_.resize(model->Outputs().size()); - vx_context_ = tim::vx::Context::Create(); - reusable_ = false; - } - int SetInput(int32_t index, const ANeuralNetworksOperandType* type, const void* buffer, - size_t length); - int SetOutput(int32_t index, const ANeuralNetworksOperandType* type, const void* buffer, + explicit Execution(Compilation* compilation); + + [[nodiscard]] const Compilation* getCompilation() const { return compilation_; } + + int setReusable(bool reusable); + int setTimeout(Duration duration); + int setLoopTimeout(Duration duration); + int setMeasureTiming(bool measure); + + int setInput(int32_t index, const ANeuralNetworksOperandType* type, const void* buffer, size_t length); - int SetInputFromMemory(int32_t index, const ANeuralNetworksOperandType* type, - const Memory* memory, size_t offset, size_t length); - int SetOutputFromMemory(int32_t index, const ANeuralNetworksOperandType* type, - const Memory* memory, size_t offset, size_t length); - int Compute(); - int GetOutputOperandRank(int32_t index, uint32_t* rank); - int GetOutputOperandDimensions(int32_t index, uint32_t* dimensions); - int SetReusable(bool reusable) { - reusable_ = reusable; - return ANEURALNETWORKS_NO_ERROR; - } - int SetLoopTimeout(uint64_t duration) { - duration_ = duration; - return ANEURALNETWORKS_NO_ERROR; - } + int setOutput(int32_t index, const ANeuralNetworksOperandType* type, void* buffer, + size_t length); + int setInputFromMemory(int32_t index, const ANeuralNetworksOperandType* type, + const IMemory* memory, size_t offset, size_t length); + int setOutputFromMemory(int32_t index, const ANeuralNetworksOperandType* type, + const IMemory* memory, size_t offset, size_t length); + + int compute(); + + CallbackEvent* createSyncEvent(); + int startCompute(); + + int getDuration(DurationCode durationCode, uint64_t* duration) const; + int getOutputOperandRank(int32_t index, uint32_t* rank) const; + int getOutputOperandDimensions(int32_t index, uint32_t* dimensions) const; + private: - std::shared_ptr CreateTvxIOTensor(const slang::type::tensor_storage& tensor, - tim::vx::TensorAttribute attr); - int MapOperations(const std::vector>& op_creators, - const TensorMap& tensor_map, const ScalarMap& scalar_map); - struct IOMemory { - IOMemory() {} - IOMemory(const Memory* memory, size_t offset, size_t length) - : memory(memory), offset(offset), length(length) {} - const Memory* memory; + // See execution state definitions in + // https://developer.android.com/ndk/reference/group/neural-networks#aneuralnetworksexecution + enum class State { + PREPARATION, + COMPUTATION, + COMPLETED, + }; + + struct IOBufferInfo { size_t offset; size_t length; + void* buffer; + const IMemory* memory; }; - std::vector inputs_memory_; - std::vector outputs_memory_; - std::vector> inputs_dimension_; - std::vector> outputs_dimension_; + using VxContext = std::shared_ptr; + using VxGraph = std::shared_ptr; + using VxTensor = std::shared_ptr; + using VxOp = std::shared_ptr; + using VxTensorMap = std::unordered_map; + + VxTensor createVxConstantTensor(const slang::type::tensor_storage& tensor, + Model::OperandValueInfo valueInfo); + VxTensor createVxIOTensor(const slang::type::tensor_storage& tensor, + tim::vx::TensorAttribute attr); + int mapOperations(const std::vector>& opCreators, + const TensorMap& tensorMap, const ScalarMap& scalarMap); + int compile(); + + // Indexed by execution I/O index, not model tensor index. + std::vector inputBufferInfos_; + std::vector outputBufferInfos_; + std::vector inputVxTensors_; + std::vector outputVxTensors_; + Compilation* compilation_; - std::shared_ptr vx_context_; - std::shared_ptr vx_graph_; - std::unordered_map> vx_tensors_; - std::pair, - std::map, std::shared_ptr>> - layout_infered_; + // Compile time graph. + VxGraph vxGraph_; + // Runtime graph. + VxGraph runtimeGraph_; + // Indexed by model tensor index. + VxTensorMap vxTensors_; + + CallbackEvent* syncEvent_; + Duration timeoutDuration_; + Duration loopTimeoutDuration_; + bool reusable_; - uint64_t duration_; - // src_graph output order may be different from infer_graph - std::vector output_order_; - std::vector out_memory_order_; - -#ifdef RUN_NBG - std::shared_ptr executor_; - std::shared_ptr executable_; - std::vector> input_handles_; - std::vector> output_handles_; -#endif + bool measure_; + State state_; }; -} // namespace sl -} // namespace android -} // namespace vsi +} // namespace vsi::android::sl #endif \ No newline at end of file diff --git a/src/MapOperation.cpp b/src/MapOperation.cpp index f14e82b..54680e7 100644 --- a/src/MapOperation.cpp +++ b/src/MapOperation.cpp @@ -1,6 +1,6 @@ /**************************************************************************** * - * Copyright (c) 2022 Vivante Corporation + * Copyright (c) 2024 Vivante Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -37,9 +37,7 @@ std::shared_ptr CreateTvxTensor(std::shared_ptr tim::vx::DataType data_type = ToTvxDataType(tensor.dtype); tim::vx::ShapeType shape = tensor.shape; std::reverse(shape.begin(), shape.end()); - const void* data = tensor.data; - tim::vx::TensorAttribute attr = - data ? tim::vx::TensorAttribute::CONSTANT : tim::vx::TensorAttribute::TRANSIENT; + tim::vx::Quantization quantization; tim::vx::QuantType quant_type = ToTvxQuantType(tensor.qtype); if (quant_type == tim::vx::QuantType::ASYMMETRIC) { @@ -49,8 +47,8 @@ std::shared_ptr CreateTvxTensor(std::shared_ptr tim::vx::Quantization(quant_type, tensor.channel_dim, tensor.per_channel_scales, tensor.per_channel_zero_points); } - tim::vx::TensorSpec spec(data_type, shape, attr, quantization); - return graph->CreateTensor(spec, data); + tim::vx::TensorSpec spec(data_type, shape, tim::vx::TensorAttribute::TRANSIENT, quantization); + return graph->CreateTensor(spec); } std::shared_ptr FuseActivation(std::shared_ptr graph, @@ -70,7 +68,7 @@ std::shared_ptr FuseActivation(std::shared_ptr op = graph->CreateOperation(); break; default: - std::cout << "Unkown fuse code" << std::endl; + LOGE("Unkown fuse code"); return nullptr; } auto input = graph->CreateTensor(output->GetSpec().AsTransientSpec()); @@ -78,6 +76,18 @@ std::shared_ptr FuseActivation(std::shared_ptr op->BindOutput(output); return input; } + +std::vector ExpandedShape( + const std::vector& long_shape, + const std::vector& short_shape) { + std::vector expanded_shape(short_shape); + int32_t ref_rank = long_shape.size(); + int32_t origin_rank = short_shape.size(); + for (int32_t i = 0; i < ref_rank; ++i) { + if (i >= origin_rank) expanded_shape.push_back(1); + } + return expanded_shape; +} } // namespace int MapOneInputOneOutput(std::shared_ptr graph, @@ -204,8 +214,8 @@ int MapConv2D(std::shared_ptr graph, std::shared_ptr auto bias = vx_tensors[idx_bias]; auto output = vx_tensors[idx_out]; - const uint8_t* p_act_code = scalar_map.at(idx_act).data.data(); - int32_t fuse_code = *(int32_t*)p_act_code; + auto activationCodeScalar = scalar_map.at(idx_act); + int32_t fuse_code = *reinterpret_cast(activationCodeScalar.data.data()); output = FuseActivation(graph, fuse_code, output); @@ -301,6 +311,15 @@ int MapEltwise(std::shared_ptr graph, std::shared_ptr int32_t fuse_code = *reinterpret_cast(fuse_code_data); output = FuseActivation(graph, fuse_code, output); + if (output == nullptr) return ANEURALNETWORKS_BAD_DATA; + + auto in_shape = input->GetShape(); + auto in_shape1 = input1->GetShape(); + auto out_shape = output->GetShape(); + if (in_shape < out_shape && input->GetSpec().GetTensorAttribute() != tim::vx::CONSTANT) + input->GetSpec().SetShape(ExpandedShape(out_shape, in_shape)); + if (in_shape1 < out_shape && input1->GetSpec().GetTensorAttribute() != tim::vx::CONSTANT) + input1->GetSpec().SetShape(ExpandedShape(out_shape, in_shape1)); auto eltwise = op_creator->Lowering(graph); eltwise->BindInput(input); @@ -310,10 +329,11 @@ int MapEltwise(std::shared_ptr graph, std::shared_ptr return ANEURALNETWORKS_NO_ERROR; } -int MapEltwiseWithNoAct(std::shared_ptr graph, std::shared_ptr op_creator, - std::unordered_map>& vx_tensors, - const TensorMap& tensor_map, const ScalarMap& scalar_map, - const std::vector& inputs, const std::vector& outputs) { +int MapEltwiseWithNoAct(std::shared_ptr graph, + std::shared_ptr op_creator, + std::unordered_map>& vx_tensors, + const TensorMap& tensor_map, const ScalarMap& scalar_map, + const std::vector& inputs, const std::vector& outputs) { uint32_t idx_in = inputs[0]; uint32_t idx_in1 = inputs[1]; uint32_t idx_out = outputs[0]; @@ -547,11 +567,10 @@ int MapInstanceNormalization( return ANEURALNETWORKS_NO_ERROR; } -int MapHashtableLookup( - std::shared_ptr graph, std::shared_ptr op_creator, - std::unordered_map>& vx_tensors, - const TensorMap& tensor_map, const ScalarMap& scalar_map, - const std::vector& inputs, const std::vector& outputs) { +int MapHashtableLookup(std::shared_ptr graph, std::shared_ptr op_creator, + std::unordered_map>& vx_tensors, + const TensorMap& tensor_map, const ScalarMap& scalar_map, + const std::vector& inputs, const std::vector& outputs) { uint32_t idx_lookups = inputs[0]; uint32_t idx_keys = inputs[1]; uint32_t idx_values = inputs[2]; @@ -625,7 +644,7 @@ int MapPack(std::shared_ptr graph, std::shared_ptr op int32_t inputs_num = inputs.size(); uint32_t idx_out = outputs[0]; std::vector> inputs_tensors; - for(int i = 1; i graph, std::shared_ptr o auto input = vx_tensors[idx_in]; auto alpha = vx_tensors[idx_alpha]; + auto alpha_shape = alpha->GetShape(); + bool dims_all_1 = std::all_of(alpha_shape.begin(), alpha_shape.end(), + [](uint32_t dims) { return dims == 1; }); + if (dims_all_1) alpha->GetSpec().SetShape(std::vector{1}); auto output = vx_tensors[idx_out]; auto prelu = op_creator->Lowering(graph); @@ -734,9 +757,9 @@ int MapRelationalOp(std::shared_ptr graph, std::shared_ptr graph, std::shared_ptr op_creator, - std::unordered_map>& vx_tensors, - const TensorMap& tensor_map, const ScalarMap& scalar_map, - const std::vector& inputs, const std::vector& outputs) { + std::unordered_map>& vx_tensors, + const TensorMap& tensor_map, const ScalarMap& scalar_map, + const std::vector& inputs, const std::vector& outputs) { uint32_t idx_in = inputs[0]; uint32_t idx_regions = inputs[1]; uint32_t idx_batch_index = inputs[2]; @@ -749,7 +772,8 @@ int MapRoi(std::shared_ptr graph, std::shared_ptr op_ vx_tensors.insert({idx_regions, CreateTvxTensor(graph, tensor_map.at(idx_regions))}); } if (!vx_tensors.count(idx_batch_index)) { - vx_tensors.insert({idx_batch_index, CreateTvxTensor(graph, tensor_map.at(idx_batch_index))}); + vx_tensors.insert( + {idx_batch_index, CreateTvxTensor(graph, tensor_map.at(idx_batch_index))}); } if (!vx_tensors.count(idx_out)) { vx_tensors.insert({idx_out, CreateTvxTensor(graph, tensor_map.at(idx_out))}); @@ -870,10 +894,12 @@ int MapSvdf(std::shared_ptr graph, std::shared_ptr op vx_tensors.insert({idx_in, CreateTvxTensor(graph, tensor_map.at(idx_in))}); } if (!vx_tensors.count(idx_weights_feature)) { - vx_tensors.insert({idx_weights_feature, CreateTvxTensor(graph, tensor_map.at(idx_weights_feature))}); + vx_tensors.insert( + {idx_weights_feature, CreateTvxTensor(graph, tensor_map.at(idx_weights_feature))}); } if (!vx_tensors.count(idx_weights_time)) { - vx_tensors.insert({idx_weights_time, CreateTvxTensor(graph, tensor_map.at(idx_weights_time))}); + vx_tensors.insert( + {idx_weights_time, CreateTvxTensor(graph, tensor_map.at(idx_weights_time))}); } if (!vx_tensors.count(idx_state_in)) { vx_tensors.insert({idx_state_in, CreateTvxTensor(graph, tensor_map.at(idx_state_in))}); diff --git a/src/MapOperation.h b/src/MapOperation.h index 90f4078..72ce5f9 100644 --- a/src/MapOperation.h +++ b/src/MapOperation.h @@ -1,6 +1,6 @@ /**************************************************************************** * - * Copyright (c) 2022 Vivante Corporation + * Copyright (c) 2024 Vivante Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), diff --git a/src/Memory.cpp b/src/Memory.cpp new file mode 100644 index 0000000..5031f29 --- /dev/null +++ b/src/Memory.cpp @@ -0,0 +1,291 @@ +#include "Memory.h" + +#include +#include +#include + +#include +#include + +#include "Compilation.h" +#include "Utils.h" + +namespace vsi::android::sl { + +MemoryMapping::~MemoryMapping() { + if (status_ != ANEURALNETWORKS_NO_ERROR) { + return; + } + + if (std::holds_alternative(context_)) { + int fd = std::get(context_); + if (fd > 0) { + munmap(data_, size_); + } + } else if (std::holds_alternative(context_)) { + const auto* ahwb = std::get(context_); + AHardwareBuffer_unlock(const_cast(ahwb), nullptr); + } +} + +int IMemory::copy(const IMemory* src, const IMemory* dst) { + if (src == dst) { + return ANEURALNETWORKS_NO_ERROR; + } + + if (!src->isInitialized()) { + LOGE("IMemory::copy src memory is uninitialized"); + return ANEURALNETWORKS_BAD_DATA; + } + + auto srcMapping = src->map(); + auto dstMapping = dst->map(); + + if (srcMapping.getStatus() != ANEURALNETWORKS_NO_ERROR) { + LOGE("IMemory::copy failed to map src memory"); + return srcMapping.getStatus(); + } + + if (dstMapping.getStatus() != ANEURALNETWORKS_NO_ERROR) { + LOGE("IMemory::copy failed to map dst memory"); + return dstMapping.getStatus(); + } + + size_t srcSize = srcMapping.getSize(); + size_t dstSize = dstMapping.getSize(); + if (srcSize != dstSize) { + LOGE("IMemory::copy src size (%zu) and dst size (%zu) not matched", srcSize, dstSize); + return ANEURALNETWORKS_BAD_DATA; + } + + const void* srcData = srcMapping.getData(); + void* dstData = dstMapping.getData(); + std::memcpy(dstData, srcData, srcSize); + + const_cast(dst)->setInitialized(true); + + return ANEURALNETWORKS_NO_ERROR; +} + +FdMemory* FdMemory::create(size_t size, int prot, int fd, size_t offset) { + if (size == 0) { + LOGE("FdMemory::create size is 0"); + return nullptr; + } + + int memFd = dup(fd); + if (memFd == -1) { + LOGE("FdMemory::create failed to dup memory fd: %s (%d)", strerror(errno), errno); + return nullptr; + } + + auto* memory = new FdMemory(size, prot, memFd, offset); + return memory; +} + +FdMemory::~FdMemory() { + close(fd_); +} + +int FdMemory::validate(const Compilation* compilation, IOType ioType, uint32_t index, + const ANeuralNetworksOperandType* type, size_t offset, size_t length) const { + if (offset + length > size_) { + LOGE("FdMemory::validate requested size is larger than memory size"); + return ANEURALNETWORKS_BAD_DATA; + } + return ANEURALNETWORKS_NO_ERROR; +} + +MemoryMapping FdMemory::map() const { + void* data = mmap(nullptr, size_, prot_, MAP_SHARED, fd_, static_cast(offset_)); + if (data == MAP_FAILED) { + LOGE("FdMemory::create failed to mmap fd: %s (%d)", strerror(errno), errno); + return MemoryMapping(ANEURALNETWORKS_BAD_DATA); + } + + return {data, size_, fd_}; +} + +AHardwareBufferMemory* AHardwareBufferMemory::create(const AHardwareBuffer* ahwb) { + auto* memory = new AHardwareBufferMemory(ahwb); + return memory; +} + +size_t AHardwareBufferMemory::getSize() const { + AHardwareBuffer_Desc desc; + AHardwareBuffer_describe(ahwb_, &desc); + return (desc.format == AHARDWAREBUFFER_FORMAT_BLOB) ? desc.width : 0; +} + +int AHardwareBufferMemory::validate(const Compilation* compilation, IOType ioType, uint32_t index, + const ANeuralNetworksOperandType* type, size_t offset, + size_t length) const { + AHardwareBuffer_Desc desc; + AHardwareBuffer_describe(ahwb_, &desc); + + if (compilation == nullptr) { + // The memory is used for constant tensors. + + if (desc.format != AHARDWAREBUFFER_FORMAT_BLOB) { + LOGE("AHardwareBufferMemory::validate cannot set constant operand with non-blob " + "AHardwareBuffer memory"); + return ANEURALNETWORKS_BAD_DATA; + } + } else { + // The memory is used for runtime I/O tensors. + if (desc.format != AHARDWAREBUFFER_FORMAT_BLOB && (length != 0 || offset != 0)) { + LOGE("AHardwareBufferMemory::validate both offset and length must be 0"); + return ANEURALNETWORKS_BAD_DATA; + } + + if (offset + length > desc.width) { + LOGE("DeviceMemory::validate requested size is larger than memory size"); + return ANEURALNETWORKS_BAD_DATA; + } + } + return ANEURALNETWORKS_NO_ERROR; +} + +MemoryMapping AHardwareBufferMemory::map() const { + AHardwareBuffer_Desc desc; + AHardwareBuffer_describe(ahwb_, &desc); + + if (desc.format != AHARDWAREBUFFER_FORMAT_BLOB) { + LOGE("AHardwareBufferMemory::map unable to map non-blob AHardwareBuffer memory"); + return MemoryMapping(ANEURALNETWORKS_BAD_DATA); + } + uint32_t size = desc.width; + + constexpr uint64_t kCpuUsageMask = + AHARDWAREBUFFER_USAGE_CPU_READ_MASK | AHARDWAREBUFFER_USAGE_CPU_WRITE_MASK; + void* data = nullptr; + int status = AHardwareBuffer_lock(const_cast(ahwb_), + desc.usage & kCpuUsageMask, -1, nullptr, &data); + if (status != 0) { + LOGE("AHardwareBufferMemory::map cannot lock the AHardwareBuffer, error: %d", status); + return MemoryMapping(ANEURALNETWORKS_BAD_DATA); + } + + return {data, size, ahwb_}; +} + +DeviceMemory* DeviceMemory::create(const MemoryDesc* desc) { + if (!desc->finished()) { + LOGE("DeviceMemory::create cannot create device memory from an unfinished desc"); + return nullptr; + } + + size_t size = desc->getSize(); + if (size == 0) { + LOGE("DeviceMemory::create cannot create device memory from an zero-sized desc"); + return nullptr; + } + + size_t alignedSize = alignSize(size, kAlignment); + void* data = aligned_alloc(kAlignment, alignedSize); + if (data == nullptr) { + LOGE("DeviceMemory::create failed to allocate heap buffer"); + return nullptr; + } + + auto* memory = new DeviceMemory(desc, data, size); + return memory; +} + +DeviceMemory::~DeviceMemory() { + free(data_); +} + +int DeviceMemory::validate(const Compilation* compilation, IOType ioType, uint32_t index, + const ANeuralNetworksOperandType* type, size_t offset, + size_t length) const { + if (compilation == nullptr) { + // The memory is used for constant tensors. + LOGE("DeviceMemory::validate cannot set constant operand values with device memory"); + return ANEURALNETWORKS_BAD_DATA; + } + + // The memory is used for runtime I/O tensors. + if (length != 0 || offset != 0) { + LOGE("DeviceMemory::validate both offset and length must be 0"); + return ANEURALNETWORKS_BAD_DATA; + } + + if (roles_.count({compilation, ioType, index}) == 0) { + LOGE("DeviceMemory::validate role not specified"); + return ANEURALNETWORKS_BAD_DATA; + } + + const auto* model = compilation->getModel(); + const auto& tensorMap = model->getTensorMap(); + slang::type::tensor_storage tensorOperand; + + if (ioType == IOType::INPUT) { + const auto& inputs = model->getInputs(); + if (index >= inputs.size()) { + LOGE("MemoryDesc::validate input index (%u) out of range", index); + return ANEURALNETWORKS_BAD_DATA; + } + + uint32_t input = inputs[index]; + if (tensorMap.count(input) == 0) { + LOGE("MemoryDesc::validate cannot find corresponding tensor for input index (%u)", + index); + return ANEURALNETWORKS_BAD_DATA; + } + + tensorOperand = tensorMap.at(input); + } else if (ioType == IOType::OUTPUT) { + const auto& outputs = model->getOutputs(); + if (index >= outputs.size()) { + LOGE("MemoryDesc::validate output index (%u) out of range", index); + return ANEURALNETWORKS_BAD_DATA; + } + + uint32_t output = outputs[index]; + if (tensorMap.count(output) == 0) { + LOGE("MemoryDesc::validate cannot find corresponding tensor for output index (%u)", + index); + return ANEURALNETWORKS_BAD_DATA; + } + + tensorOperand = tensorMap.at(output); + } + + if (type != nullptr) { + uint32_t rank = type->dimensionCount; + if (shape_.size() != rank) { + LOGE("DeviceMemory::validate incompatible tensor rank"); + return ANEURALNETWORKS_BAD_DATA; + } + + for (size_t i = 0; i < rank; i++) { + if (type->dimensions[i] == 0) { + LOGE("DeviceMemory::validate dynamic (0-sized) axis is not supported"); + return ANEURALNETWORKS_OP_FAILED; + } + + if (shape_[i] != type->dimensions[i]) { + LOGE("DeviceMemory::validate incompatible dim length at axis %zu:" + " device memory (%u) vs. requested (%u)", + i, shape_[i], type->dimensions[i]); + return ANEURALNETWORKS_BAD_DATA; + } + } + + if (tensorOperand.dtype != MapDataType(type->type) || + std::fabs(tensorOperand.scale - type->scale) > std::numeric_limits::epsilon() || + tensorOperand.zero_point != type->zeroPoint) { + LOGE("DeviceMemory::validate incompatible tensor metadata"); + return ANEURALNETWORKS_BAD_DATA; + } + } + + return ANEURALNETWORKS_NO_ERROR; +} + +MemoryMapping DeviceMemory::map() const { + return {data_, size_, nullptr}; +} + +} // namespace vsi::android::sl \ No newline at end of file diff --git a/src/Memory.h b/src/Memory.h index 8d9750d..436a3d3 100644 --- a/src/Memory.h +++ b/src/Memory.h @@ -1,6 +1,6 @@ /**************************************************************************** * - * Copyright (c) 2022 Vivante Corporation + * Copyright (c) 2024 Vivante Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -21,183 +21,132 @@ * DEALINGS IN THE SOFTWARE. * *****************************************************************************/ -#ifndef VSI_ANDROID_SL_MEMORY_TYPE_H -#define VSI_ANDROID_SL_MEMORY_TYPE_H -#include -#include -#include +#ifndef VSI_ANDROID_SL_MEMORY_H +#define VSI_ANDROID_SL_MEMORY_H #include +#include + +#include +#include + +#include "MemoryDesc.h" #include "Types.h" -namespace vsi { -namespace android { -namespace sl { +namespace vsi::android::sl { -class MemoryDesc { +struct MemoryMapping { public: - void UpdateDataSize() { - auto dims = 1; - for (int i = 0; i < shape_.size(); ++i) dims *= shape_.at(i); - switch (t_storage_.dtype) { - case slang::type::data_type::kINT64: - length_ = dims * 8; - break; - case slang::type::data_type::kFP32: - case slang::type::data_type::kTF32: - case slang::type::data_type::kINT32: - case slang::type::data_type::kUINT32: - length_ = dims * 4; - break; - case slang::type::data_type::kFP16: - case slang::type::data_type::kBF16: - case slang::type::data_type::kINT16: - case slang::type::data_type::kUINT16: - length_ = dims * 2; - break; - case slang::type::data_type::kUINT8: - case slang::type::data_type::kINT8: - case slang::type::data_type::kBOOL8: - length_ = dims; - break; - case slang::type::data_type::kUINT4: - case slang::type::data_type::kINT4: - length_ = dims / 2; - break; - default: - std::cout << "Invalid data type corresponding to the role" << std::endl; - break; - } - if (length_ <= 0) std::cout << "Invalid shape corresponding to the desc" << std::endl; - } - int SetDimensions(const std::vector& shape) { - shape_ = shape; - return ANEURALNETWORKS_NO_ERROR; - } - int AddRole(TensorMap& tensor_map, IOType io_type, uint32_t operand_id, float freq) { - // To do: check index validity - t_storage_ = tensor_map.at(operand_id); - io_type_ = io_type; - UpdateDataSize(); - return ANEURALNETWORKS_NO_ERROR; - } - int Finish() { - finished_ = true; - return ANEURALNETWORKS_NO_ERROR; - } - const size_t Length() const { return length_; } - size_t Length() { return length_; } - std::vector Shape() const { return shape_; } - std::vector& Shape() { return shape_; } - bool IsFinished() const { return finished_; } + using Context = std::variant; + + explicit MemoryMapping(int status) : status_(status), data_(nullptr), size_(0) {} + MemoryMapping(void* data, size_t size, Context context) + : status_(ANEURALNETWORKS_NO_ERROR), data_(data), size_(size), context_(context) {} + + MemoryMapping(const MemoryMapping&) = delete; + MemoryMapping& operator=(const MemoryMapping&) = delete; + + ~MemoryMapping(); + + [[nodiscard]] int getStatus() const { return status_; } + [[nodiscard]] void* getData() const { return data_; } + [[nodiscard]] size_t getSize() const { return size_; } private: - IOType io_type_; - std::vector shape_; - slang::type::tensor_storage t_storage_; - bool finished_{false}; - size_t length_{0}; + int status_; + void* data_; + size_t size_; + Context context_; }; -class Memory { + +class IMemory { + public: + virtual ~IMemory() = default; + + [[nodiscard]] virtual size_t getSize() const = 0; + [[nodiscard]] virtual int validate(const Compilation* compilation, IOType ioType, + uint32_t index, const ANeuralNetworksOperandType* type, + size_t offset, size_t length) const = 0; + [[nodiscard]] virtual MemoryMapping map() const = 0; + + [[nodiscard]] virtual bool isInitialized() const = 0; + virtual void setInitialized(bool initialized) = 0; + + static int copy(const IMemory* src, const IMemory* dst); +}; + +class FdMemory final : public IMemory { public: - ~Memory() { - if (create_from_fd_) munmap(data_, length_); - if (create_from_ahwb_ || create_from_desc_) free(data_); - } - int CreateFromFd(size_t size, int prot, int fd, size_t offset) { - if (size <= 0) { - std::cout << "Invalid size" << std::endl; - return ANEURALNETWORKS_BAD_DATA; - } - data_ = mmap(nullptr, size, prot, MAP_SHARED, fd, offset); - if (data_ == MAP_FAILED) { - std::cout << "Can't mmap with the fd." << std::endl; - return ANEURALNETWORKS_BAD_STATE; - } - length_ = size; - create_from_fd_ = true; - - return ANEURALNETWORKS_NO_ERROR; - } - int CreateFromAHWB(const AHardwareBuffer* ahwb) { - if (ahwb == nullptr) { - std::cout << "Invalid AHardwareBuffer pointer" << std::endl; - return ANEURALNETWORKS_BAD_DATA; - } - ahwb_ = ahwb; - create_from_ahwb_ = true; - - return ANEURALNETWORKS_NO_ERROR; - } - int PraseAHWB(const AHardwareBuffer* ahwb) { - AHardwareBuffer_Desc desc = {0}; - AHardwareBuffer_describe(ahwb, &desc); - if (desc.format != AHARDWAREBUFFER_FORMAT_BLOB) { - std::cout << "Unable to map non-blob AHardwareBuffer memory" << std::endl; - return ANEURALNETWORKS_BAD_DATA; - } - const uint32_t size = desc.width; - void* buffer = (void*)malloc(size); - if (buffer == nullptr) { - std::cout << "Malloc buffer fail" << std::endl; - return ANEURALNETWORKS_BAD_STATE; - } - const uint64_t kCpuUsageMask = - AHARDWAREBUFFER_USAGE_CPU_READ_MASK | AHARDWAREBUFFER_USAGE_CPU_WRITE_MASK; - void* data = nullptr; - auto status = AHardwareBuffer_lock(const_cast(ahwb), - desc.usage & kCpuUsageMask, -1, nullptr, &data); - if (status != /*NO_ERROR*/ 0) { - std::cout << "HardwareBuffer lock memory fail" << std::endl; - return ANEURALNETWORKS_BAD_DATA; - } - memcpy((void*)buffer, (void*)data, size); - data_ = buffer; - length_ = size; - status = AHardwareBuffer_unlock(const_cast(ahwb), nullptr); - if (status != /*NO_ERROR*/ 0) { - std::cout << "HardwareBuffer unlock memory fail" << std::endl; - return ANEURALNETWORKS_BAD_DATA; - } - return ANEURALNETWORKS_NO_ERROR; - } - int CreateFromDesc(const MemoryDesc* mdesc) { - create_from_desc_ = true; - mdesc_ = mdesc; - size_t length = mdesc->Length(); - void* buffer = (void*)malloc(length); - if (buffer == nullptr) { - std::cout << "Malloc buffer fail" << std::endl; - return ANEURALNETWORKS_BAD_STATE; - } - data_ = buffer; - length_ = length; - return ANEURALNETWORKS_NO_ERROR; - } - const MemoryDesc* GetDesc() { return mdesc_; } - void* Data() const { return data_; } - void* Data() { return data_; } - void SetData(void* buffer) { data_ = buffer; } - const size_t Length() const { return length_; } - size_t Length() { return length_; } - void SetLength(size_t length) { length_ = length; } - bool IsCreateFromFd() const { return create_from_fd_; } - bool IsCreateFromAHWB() const { return create_from_ahwb_; } - bool IsCreateFromDesc() const { return create_from_desc_; } - const AHardwareBuffer* AHWB() const { return ahwb_; } + explicit FdMemory(size_t size, int prot, int fd, size_t offset) + : size_(size), prot_(prot), fd_(fd), offset_(offset) {} + ~FdMemory() override; + static FdMemory* create(size_t size, int prot, int fd, size_t offset); + + [[nodiscard]] size_t getSize() const override { return size_; } + [[nodiscard]] int validate(const Compilation* compilation, IOType ioType, uint32_t index, + const ANeuralNetworksOperandType* type, size_t offset, + size_t length) const override; + [[nodiscard]] MemoryMapping map() const override; + [[nodiscard]] bool isInitialized() const override { return fd_ != -1; } + void setInitialized(bool initialized) override {} private: - bool create_from_fd_{false}; - bool create_from_ahwb_{false}; - bool create_from_desc_{false}; - const AHardwareBuffer* ahwb_{nullptr}; - const MemoryDesc* mdesc_{nullptr}; - void* data_{nullptr}; - size_t length_{0}; + int fd_ = -1; + int prot_ = 0; + size_t size_ = 0; + size_t offset_ = 0; }; -} // namespace sl -} // namespace android -} // namespace vsi +class AHardwareBufferMemory final : public IMemory { + public: + explicit AHardwareBufferMemory(const AHardwareBuffer* ahwb) : ahwb_(ahwb) {} + ~AHardwareBufferMemory() override = default; + static AHardwareBufferMemory* create(const AHardwareBuffer* ahwb); + + [[nodiscard]] size_t getSize() const override; + [[nodiscard]] int validate(const Compilation* compilation, IOType ioType, uint32_t index, + const ANeuralNetworksOperandType* type, size_t offset, + size_t length) const override; + [[nodiscard]] MemoryMapping map() const override; + [[nodiscard]] bool isInitialized() const override { return ahwb_ != nullptr; } + void setInitialized(bool initialized) override {} + + private: + const AHardwareBuffer* ahwb_; +}; + +class DeviceMemory final : public IMemory { + static constexpr size_t kAlignment = 64; + + public: + explicit DeviceMemory(const MemoryDesc* desc, void* data, size_t size) + : roles_(desc->getRoles()), + tensorOperand_(desc->getOperand()), + shape_(desc->getShape()), + data_(data), + size_(size) {} + ~DeviceMemory() override; + static DeviceMemory* create(const MemoryDesc* desc); + + [[nodiscard]] size_t getSize() const override { return size_; } + [[nodiscard]] int validate(const Compilation* compilation, IOType ioType, uint32_t index, + const ANeuralNetworksOperandType* type, size_t offset, + size_t length) const override; + [[nodiscard]] MemoryMapping map() const override; + [[nodiscard]] bool isInitialized() const override { return initialized_; } + void setInitialized(bool initialized) override { initialized_ = initialized; } + + private: + std::set roles_; + slang::type::tensor_storage tensorOperand_; + std::vector shape_; + + void* data_; + size_t size_; + bool initialized_ = false; +}; + +} // namespace vsi::android::sl + #endif \ No newline at end of file diff --git a/src/MemoryDesc.cpp b/src/MemoryDesc.cpp new file mode 100644 index 0000000..50da4c7 --- /dev/null +++ b/src/MemoryDesc.cpp @@ -0,0 +1,173 @@ +/**************************************************************************** + * + * Copyright (c) 2024 Vivante Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ + +#include "MemoryDesc.h" + +#include + +#include "Compilation.h" +#include "Model.h" +#include "Utils.h" + +namespace vsi::android::sl { + +int MemoryDesc::addRole(const Compilation* compilation, IOType ioType, uint32_t index, + float frequency) { + if (finished_) { + LOGE("MemoryDesc::addRole called after the memory desc is finished"); + return ANEURALNETWORKS_BAD_STATE; + } + + if (!compilation->isFinished()) { + LOGE("MemoryDesc::addRole passed an unfinished compilation"); + return ANEURALNETWORKS_BAD_STATE; + } + + if (frequency <= 0.0F || frequency > 1.0F) { + LOGE("MemoryDesc::addRole passed an invalid frequency"); + return ANEURALNETWORKS_BAD_DATA; + } + + if (roles_.count({compilation, ioType, index}) > 0) { + LOGE("MemoryDesc::addRole the same role is specified twice"); + return ANEURALNETWORKS_BAD_DATA; + } + + const auto* model = compilation->getModel(); + const auto& tensorMap = model->getTensorMap(); + slang::type::tensor_storage tensorOperand; + + if (ioType == IOType::INPUT) { + const auto& inputs = model->getInputs(); + if (index >= inputs.size()) { + LOGE("MemoryDesc::addRole input index (%u) out of range", index); + return ANEURALNETWORKS_BAD_DATA; + } + + uint32_t input = inputs[index]; + if (tensorMap.count(input) == 0) { + LOGE("MemoryDesc::addRole cannot find corresponding tensor for input index (%u)", + index); + return ANEURALNETWORKS_BAD_DATA; + } + + tensorOperand = tensorMap.at(input); + } else if (ioType == IOType::OUTPUT) { + const auto& outputs = model->getOutputs(); + if (index >= outputs.size()) { + LOGE("MemoryDesc::addRole output index (%u) out of range", index); + return ANEURALNETWORKS_BAD_DATA; + } + + uint32_t output = outputs[index]; + if (tensorMap.count(output) == 0) { + LOGE("MemoryDesc::addRole cannot find corresponding tensor for output index (%u)", + index); + return ANEURALNETWORKS_BAD_DATA; + } + + tensorOperand = tensorMap.at(output); + } + + if (tensorOperand_.has_value()) { + if (tensorOperand.attr != tensorOperand_->attr || + tensorOperand.dtype != tensorOperand_->dtype || + tensorOperand.scale != tensorOperand_->scale || + tensorOperand.zero_point != tensorOperand_->zero_point || + tensorOperand.per_channel_scales != tensorOperand_->per_channel_scales) { + LOGE("MemoryDesc::addRole incompatible tensor metadata"); + return ANEURALNETWORKS_BAD_DATA; + } + } else { + tensorOperand_ = tensorOperand; + } + + auto shape = combineShape(shape_, tensorOperand.shape); + if (shape.empty()) { + LOGE("MemoryDesc::addRole incompatible tensor shapes"); + return ANEURALNETWORKS_BAD_DATA; + } + shape_ = shape; + + roles_.insert({compilation, ioType, index}); + return ANEURALNETWORKS_NO_ERROR; +} + +int MemoryDesc::setShape(const std::vector& dimensions) { + if (finished_) { + LOGE("MemoryDesc::setDimensions called after the memory desc is finished"); + return ANEURALNETWORKS_BAD_STATE; + } + + if (tensorOperand_.has_value() && dimensions.empty()) { + LOGE("MemoryDesc::setDimensions incompatible shapes for scalars"); + return ANEURALNETWORKS_BAD_DATA; + } + + auto shape = combineShape(shape_, dimensions); + if (shape.empty() && !dimensions.empty()) { + LOGE("MemoryDesc::setDimensions incompatible shapes"); + return ANEURALNETWORKS_BAD_DATA; + } + + shape_ = shape; + return ANEURALNETWORKS_NO_ERROR; +} + +int MemoryDesc::finish() { + if (finished_) { + LOGE("MemoryDesc::finish called after the memory desc is finished"); + return ANEURALNETWORKS_BAD_STATE; + } + + if (roles_.empty()) { + LOGE("MemoryDesc::finish the memory desc has no role"); + return ANEURALNETWORKS_BAD_STATE; + } + + for (auto [c0, t0, i0] : roles_) { + for (auto [c1, t1, i1] : roles_) { + if (c0 == c1 && t0 != t1) { + LOGE("MemoryDesc::finish the same device memory cannot be used for both input and " + "output of the same compilation"); + return ANEURALNETWORKS_BAD_STATE; + } + } + } + + finished_ = true; + return ANEURALNETWORKS_NO_ERROR; +} + +size_t MemoryDesc::getSize() const { + if (!finished_) { + return 0; + } + + size_t numElements = std::reduce(shape_.cbegin(), shape_.cend(), 1, std::multiplies()); + size_t elementSize = getDtypeSize(tensorOperand_->dtype); + return elementSize * numElements; +} + +} // namespace vsi::android::sl \ No newline at end of file diff --git a/src/MemoryDesc.h b/src/MemoryDesc.h new file mode 100644 index 0000000..7913c3d --- /dev/null +++ b/src/MemoryDesc.h @@ -0,0 +1,63 @@ +/**************************************************************************** + * + * Copyright (c) 2024 Vivante Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ + +#ifndef VSI_ANDROID_SL_MEMORY_DESC_H +#define VSI_ANDROID_SL_MEMORY_DESC_H + +#include + +#include +#include +#include + +#include "Types.h" +#include "slang/type_system.h" + +namespace vsi::android::sl { + +class Compilation; +using CompilationRole = std::tuple; + +class MemoryDesc { + public: + int addRole(const Compilation* compilation, IOType ioType, uint32_t index, float frequency); + int setShape(const std::vector& dimensions); + int finish(); + + [[nodiscard]] bool finished() const { return finished_; } + [[nodiscard]] size_t getSize() const; + [[nodiscard]] std::set getRoles() const { return roles_; } + [[nodiscard]] slang::type::tensor_storage getOperand() const { return *tensorOperand_; } + [[nodiscard]] std::vector getShape() const { return shape_; } + + private: + std::set roles_; + std::optional tensorOperand_; + std::vector shape_; + bool finished_ = false; +}; + +} // namespace vsi::android::sl + +#endif \ No newline at end of file diff --git a/src/Model.cpp b/src/Model.cpp index bf04711..42e4a61 100644 --- a/src/Model.cpp +++ b/src/Model.cpp @@ -1,6 +1,6 @@ /**************************************************************************** * - * Copyright (c) 2022 Vivante Corporation + * Copyright (c) 2024 Vivante Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -21,639 +21,575 @@ * DEALINGS IN THE SOFTWARE. * *****************************************************************************/ -#include "Model.h" -#include -#include +#include "Model.h" #include "Utils.h" -namespace vsi { -namespace android { -namespace sl { +namespace vsi::android::sl { -int Model::AddOperand(const ANeuralNetworksOperandType& type) { +int Model::addOperand(const ANeuralNetworksOperandType& type) { if (finished_) { - std::cout << "Error: can not modify a finished model." << std::endl; + LOGE("Model::addOperand cannot modify a finished model"); return ANEURALNETWORKS_BAD_STATE; } - if (type.dimensionCount) { // implies tensor - if (type.dimensions == nullptr) { - std::cout << "Error: get an invalid operand" << std::endl; + auto operandType = static_cast(type.type); + if (operandType == OperandType::TENSOR_FLOAT32 || operandType == OperandType::TENSOR_FLOAT16 || + operandType == OperandType::TENSOR_INT32 || operandType == OperandType::TENSOR_BOOL8 || + operandType == OperandType::TENSOR_QUANT8_ASYMM || + operandType == OperandType::TENSOR_QUANT8_SYMM || + operandType == OperandType::TENSOR_QUANT8_ASYMM_SIGNED || + operandType == OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL || + operandType == OperandType::TENSOR_QUANT16_ASYMM || + operandType == OperandType::TENSOR_QUANT16_SYMM) { + // Implies tensor. + if (type.dimensionCount == 0) { + LOGE("Model::addOperand passed a tensor but has zero rank"); return ANEURALNETWORKS_BAD_DATA; } + auto shape = std::vector(type.dimensions, type.dimensions + type.dimensionCount); slang::type::tensor_storage tensor = { .dtype = MapDataType(type.type), .qtype = MapQuantType(type.type), - .shape = std::vector(type.dimensions, - type.dimensions + type.dimensionCount), + .shape = shape, .scale = type.scale, - .zero_point = type.zeroPoint}; - tensors_.insert({operand_id_++, tensor}); - } else { // implies scalar - if (type.dimensions != nullptr) { - std::cout << "Error: get an invalid operand" << std::endl; + .zero_point = type.zeroPoint, + }; + tensors_.insert({numOperands_, tensor}); + } else if (operandType == OperandType::FLOAT32 || operandType == OperandType::FLOAT16 || + operandType == OperandType::INT32 || operandType == OperandType::UINT32 || + operandType == OperandType::BOOL) { + // Implies scalar. + if (type.dimensionCount != 0) { + LOGE("Model::addOperand passed a scalar but has non-zero rank"); return ANEURALNETWORKS_BAD_DATA; } slang::type::scalar_storage scalar = {.dtype = MapDataType(type.type)}; - scalars_.insert({operand_id_++, scalar}); + scalars_.insert({numOperands_, scalar}); + } else { + LOGW("Model::addOperand passed an operand with unsupported type: %d", operandType); } + + numOperands_++; return ANEURALNETWORKS_NO_ERROR; } -int Model::SetOperandSymmPerChannelQuantParams( +int Model::setOperandSymmPerChannelQuantParams( int32_t index, const ANeuralNetworksSymmPerChannelQuantParams& channelQuant) { if (finished_) { - std::cout << "Error: can not modify a finished model." << std::endl; + LOGE("Model::setOperandSymmPerChannelQuantParams cannot modify a finished model"); return ANEURALNETWORKS_BAD_STATE; } - if (index >= operand_id_) { - std::cout << "ANeuralNetworksModel_SetOperandSymmPerChannelQuantParams get an invalid index" - << std::endl; + + if (index < 0 || index >= numOperands_) { + LOGE("Model::setOperandSymmPerChannelQuantParams passed an invalid operand index"); return ANEURALNETWORKS_BAD_DATA; } + if (tensors_.find(index) != tensors_.end()) { // reverse channel_dim axis - uint32_t channel_dim = tensors_[index].shape.size() - channelQuant.channelDim - 1; - tensors_[index].channel_dim = channel_dim; + uint32_t channelDim = tensors_[index].shape.size() - channelQuant.channelDim - 1; + tensors_[index].channel_dim = channelDim; tensors_[index].per_channel_scales.assign(channelQuant.scales, channelQuant.scales + channelQuant.scaleCount); tensors_[index].per_channel_zero_points.assign(channelQuant.scaleCount, 0); } else { - std::cout << "Error: Invalid operand index." << std::endl; + LOGE("Model::setOperandSymmPerChannelQuantParams passed an invalid operand index"); return ANEURALNETWORKS_BAD_DATA; } return ANEURALNETWORKS_NO_ERROR; } -int Model::SetOperandValue(uint32_t index, const void* buffer, size_t length) { +int Model::setOperandValue(int32_t index, const void* buffer, size_t length) { if (finished_) { - std::cout << "Error: can not modify a finished model." << std::endl; + LOGE("Model::setOperandValue cannot modify a finished model"); return ANEURALNETWORKS_BAD_STATE; } - if (index >= operand_id_) { - std::cout << "ANeuralNetworksModel_setOperandValue get an invalid index" << std::endl; - return ANEURALNETWORKS_BAD_DATA; - } - if (length > 0xFFFFFFFF) { - std::cout << "ANeuralNetworksModel_setOperandValue value length of " << length - << " exceeds max size" << std::endl; + + if (index < 0 || index >= numOperands_) { + LOGE("Model::setOperandValue passed an invalid operand index"); return ANEURALNETWORKS_BAD_DATA; } + if (buffer == nullptr) { - std::cout << "Warning: Operand index " << index << " is empty" << std::endl; + LOGW("Model::setOperandValue operand (%d) is marked as optional", index); return ANEURALNETWORKS_NO_ERROR; } + if (length <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES) { - const uint8_t* copied_values = reinterpret_cast(buffer); - constant_copy_.insert({index, std::vector(copied_values, copied_values + length)}); - } + size_t storageOffset = constantCopyStorage_.size(); + size_t alignedLength = alignSize(length, 4); - if (tensors_.find(index) != tensors_.end()) { - tensors_[index].attr = slang::type::tensor_attr::kCONSTANT; - if (length <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES) { - tensors_[index].data = constant_copy_[index].data(); - tensors_[index].data_length = constant_copy_[index].size(); - } else { - tensors_[index].data = buffer; - tensors_[index].data_length = length; + constantCopyStorage_.resize(storageOffset + alignedLength); + + uint8_t* storageBuffer = constantCopyStorage_.data() + storageOffset; + std::copy_n(reinterpret_cast(buffer), length, storageBuffer); + + operandValueInfos_[index] = { + .size = length, + .offset = storageOffset, + .buffer = nullptr, + }; + + if (auto it = tensors_.find(index); it != tensors_.end()) { + auto& [_, tensor] = *it; + tensor.data.assign(storageBuffer, storageBuffer + length); + } + + if (auto it = scalars_.find(index); it != scalars_.end()) { + auto& [_, scalar] = *it; + scalar.data.assign(storageBuffer, storageBuffer + length); } } else { - scalars_[index].data = constant_copy_[index]; + operandValueInfos_[index] = { + .size = length, + .buffer = buffer, + }; + } + + if (auto it = tensors_.find(index); it != tensors_.end()) { + auto& [_, tensor] = *it; + tensor.attr = slang::type::tensor_attr::kCONSTANT; } return ANEURALNETWORKS_NO_ERROR; } -int Model::SetOperandValueFromMemory(int32_t index, const Memory* memory, size_t offset, +int Model::setOperandValueFromMemory(int32_t index, const IMemory* memory, size_t offset, size_t length) { if (finished_) { - std::cout << "Error: can not modify a finished model." << std::endl; + LOGE("Model::setOperandValueFromMemory cannot modify a finished model"); return ANEURALNETWORKS_BAD_STATE; } - if (index >= operand_id_) { - std::cout << "ANeuralNetworksModel_setOperandValueFromMemory get an invalid index" - << std::endl; - return ANEURALNETWORKS_BAD_DATA; - } - if (length > 0xFFFFFFFF) { - std::cout << "ANeuralNetworksModel_setOperandValueFromMemory value length of " << length - << " exceeds max size" << std::endl; + + if (index < 0 || index >= numOperands_) { + LOGE("Model::setOperandValueFromMemory passed an invalid operand index"); return ANEURALNETWORKS_BAD_DATA; } - if (memory == nullptr) { - std::cout << "ANeuralNetworksModel_setOperandValueFromMemory get a null memory" - << std::endl; - return ANEURALNETWORKS_BAD_DATA; + + int status = memory->validate(nullptr, IOType::NONE, index, nullptr, offset, length); + if (status != ANEURALNETWORKS_NO_ERROR) { + LOGE("Model::setOperandValueFromMemory failed to validate memory"); + return status; } - if (tensors_.find(index) != tensors_.end()) { - tensors_[index].attr = slang::type::tensor_attr::kCONSTANT; - if (memory->IsCreateFromAHWB()) { - auto mem = const_cast(memory); - auto status = mem->PraseAHWB(mem->AHWB()); - if (status != ANEURALNETWORKS_NO_ERROR) return status; + if (length <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES) { + auto mapping = memory->map(); + if (mapping.getStatus() != ANEURALNETWORKS_NO_ERROR) { + LOGE("Model::setOperandValueFromMemory failed to map memory"); + return mapping.getStatus(); } - tensors_[index].data = (uint8_t*)memory->Data() + offset; - tensors_[index].data_length = length; - } else { - std::cout << "ANeuralNetworksModel_setOperandValueFromMemory get an invalid index" - << std::endl; - return ANEURALNETWORKS_BAD_DATA; + + const uint8_t* data = reinterpret_cast(mapping.getData()) + offset; + + if (auto it = tensors_.find(index); it != tensors_.end()) { + auto& [_, tensor] = *it; + tensor.data.assign(data, data + length); + } + + if (auto it = scalars_.find(index); it != scalars_.end()) { + auto& [_, scalar] = *it; + scalar.data.assign(data, data + length); + } + } + + operandValueInfos_[index] = { + .offset = offset, + .memory = memory, + }; + + if (auto it = tensors_.find(index); it != tensors_.end()) { + auto& [_, tensor] = *it; + tensor.attr = slang::type::tensor_attr::kCONSTANT; + } + + return ANEURALNETWORKS_NO_ERROR; +} + +int Model::setOperandValueFromModel(int32_t index, const Model* reference) { + if (finished_) { + LOGE("Model::setOperandValueFromModel cannot modify a finished model"); + return ANEURALNETWORKS_BAD_STATE; + } + + if (!reference->isFinished()) { + LOGE("Model::setOperandValueFromModel reference model is not finished"); + return ANEURALNETWORKS_BAD_STATE; } + referenceModels_.push_back(reference); return ANEURALNETWORKS_NO_ERROR; } -int Model::AddOperation(ANeuralNetworksOperationType type, uint32_t inputCount, +int Model::addOperation(ANeuralNetworksOperationType type, uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount, const uint32_t* outputs) { if (finished_) { - std::cout << "Error: can not modify a finished model." << std::endl; + LOGE("Model::addOperation cannot modify a finished model"); return ANEURALNETWORKS_BAD_STATE; } + + auto opInputs = std::vector(inputs, inputs + inputCount); + auto opOutputs = std::vector(outputs, outputs + outputCount); + + bool hasEmptyScalar = std::any_of(opInputs.cbegin(), opInputs.cend(), [this](uint32_t i) { + if (auto it = scalars_.find(i); it != scalars_.cend()) { + auto [_, scalar] = *it; + return scalar.data.empty(); + } + return false; + }); + + if (hasEmptyScalar) { + LOGW("Model::addOperation OP type: %d has empty input scalars", type); + opCreators_.push_back(std::make_shared(type)); + opSupported_.push_back(false); + return ANEURALNETWORKS_NO_ERROR; + } + + std::shared_ptr opCreator; switch (type) { case ANEURALNETWORKS_ABS: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_ADD: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_ARGMAX: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_ARGMIN: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_AVERAGE_POOL_2D: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = + std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_BATCH_MATMUL: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = + std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_BATCH_TO_SPACE_ND: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = + std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_CAST: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_CHANNEL_SHUFFLE: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, + scalars_); break; case ANEURALNETWORKS_CONCATENATION: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = + std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_CONV_2D: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_DEQUANTIZE: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = + std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_DEPTHWISE_CONV_2D: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, + scalars_); break; case ANEURALNETWORKS_DEPTH_TO_SPACE: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = + std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_DIV: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_EMBEDDING_LOOKUP: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, + scalars_); break; case ANEURALNETWORKS_EQUAL: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_EXP: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_EXPAND_DIMS: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = + std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_ELU: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_FLOOR: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_FULLY_CONNECTED: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, + scalars_); break; case ANEURALNETWORKS_GATHER: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_GREATER: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_GREATER_EQUAL: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = + std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_GROUPED_CONV_2D: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = + std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_HASHTABLE_LOOKUP: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, + scalars_); break; case ANEURALNETWORKS_HARD_SWISH: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_INSTANCE_NORMALIZATION: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, + tensors_, scalars_); break; case ANEURALNETWORKS_L2_NORMALIZATION: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, + scalars_); break; case ANEURALNETWORKS_LESS_EQUAL: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_LESS: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_LOG: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, + tensors_, scalars_); break; case ANEURALNETWORKS_LOGISTIC: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_LOGICAL_NOT: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = + std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_LOGICAL_AND: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = + std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_LOGICAL_OR: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_LOG_SOFTMAX: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = + std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_L2_POOL_2D: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_MAX_POOL_2D: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_MEAN: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_MAXIMUM: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_MINIMUM: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_MIRROR_PAD: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_MUL: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_NEG: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_NOT_EQUAL: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_PACK: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_PAD: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_PAD_V2: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_POW: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_PRELU: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_QUANTIZE: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_REDUCE_ALL: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_REDUCE_ANY: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_REDUCE_MAX: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_REDUCE_MIN: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_REDUCE_PROD: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = + std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_REDUCE_SUM: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_RELU: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_RELU1: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_RELU6: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_RESHAPE: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_RESIZE_BILINEAR: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, + scalars_); break; case ANEURALNETWORKS_REVERSE: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = + std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_ROI_ALIGN: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; // case ANEURALNETWORKS_ROI_POOLING: // roi_pooling not support at present // op_creators_.push_back(std::make_shared( - // std::vector(inputs, inputs + inputCount), - // std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + // inputsList, + // outputsList, tensors_, scalars_); // break; case ANEURALNETWORKS_RSQRT: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_SELECT: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_SIN: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_SLICE: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_SOFTMAX: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_SPACE_TO_DEPTH: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = + std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_SPACE_TO_BATCH_ND: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = + std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_SPLIT: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_SQUEEZE: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_SQRT: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_STRIDED_SLICE: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = + std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_SUB: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); - break; - case ANEURALNETWORKS_SVDF: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; + // case ANEURALNETWORKS_SVDF: // svdf not support at present + // opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); + // break; case ANEURALNETWORKS_TANH: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_TILE: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_TOPK_V2: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_TRANSPOSE: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, scalars_); break; case ANEURALNETWORKS_TRANSPOSE_CONV_2D: - op_creators_.push_back(std::make_shared( - std::vector(inputs, inputs + inputCount), - std::vector(outputs, outputs + outputCount), tensors_, scalars_)); + opCreator = std::make_shared(opInputs, opOutputs, tensors_, + scalars_); break; default: - op_creators_.push_back(std::make_shared(type)); + opCreator = std::make_shared(type); break; } - auto op = op_creators_.back(); - op_supports_.push_back(op->support_state_); + + opCreators_.push_back(opCreator); + opSupported_.push_back(opCreator->isSupported()); + return ANEURALNETWORKS_NO_ERROR; +} + +int Model::relaxComputationFloat32toFloat16(bool relaxed) { + if (finished_) { + LOGE("Model::relaxComputationFloat32toFloat16 cannot modify a finished model"); + return ANEURALNETWORKS_BAD_STATE; + } + relaxed_ = relaxed; return ANEURALNETWORKS_NO_ERROR; } -int Model::IdentifyInputsAndOutputs(uint32_t inputCount, const uint32_t* inputs, +int Model::finish() { + if (finished_) { + LOGE("Model::finish the model is already finished"); + return ANEURALNETWORKS_BAD_STATE; + } + finished_ = true; + return ANEURALNETWORKS_NO_ERROR; +} + +int Model::identifyInputsAndOutputs(uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount, const uint32_t* outputs) { if (finished_) { - std::cout << "Error: can not modify a finished model." << std::endl; + LOGE("Model::identifyInputsAndOutputs cannot modify a finished model"); return ANEURALNETWORKS_BAD_STATE; } inputs_ = std::vector(inputs, inputs + inputCount); outputs_ = std::vector(outputs, outputs + outputCount); - // for (uint32_t in : inputs_) { - // auto in_shape = tensors_[in].shape; - // bool no_zero = std::all_of(in_shape.begin(), in_shape.end(), [](int s) { return s != 0; }); - // if (!no_zero) { - // std::cout << "Error: Can not support zero shape in input tensor" << std::endl; - // return ANEURALNETWORKS_BAD_DATA; - // } - // } - // for (uint32_t out : outputs_) { - // auto out_shape = tensors_[out].shape; - // bool no_zero = - // std::all_of(out_shape.begin(), out_shape.end(), [](int s) { return s != 0; }); - // if (!no_zero) { - // std::cout << "Error: Can not support zero shape in output tensor" << std::endl; - // return ANEURALNETWORKS_BAD_DATA; - // } - // } return ANEURALNETWORKS_NO_ERROR; } -int Model::GetSupportedOperations(bool* supported_ops) const { - std::cout << "SL graph has "<< op_creators_.size() << " ops totally"<< std::endl; - for (int i = 0; i < op_creators_.size(); ++i) { - supported_ops[i] = op_creators_[i]->Check() && op_supports_[i]; - std::cout << "op " << op_creators_[i]->Type() << " support status: " << supported_ops[i] - << std::endl; +int Model::getSupportedOperations(bool* supportedOps) const { + if (!finished_) { + LOGE("Model::getSupportedOperations the model is unfinished"); + return ANEURALNETWORKS_BAD_STATE; + } + + LOGV("Model::getSupportedOperations SL graph total ops count: %zu", opCreators_.size()); + for (size_t i = 0; i < opCreators_.size(); i++) { + supportedOps[i] = opCreators_[i]->checkSupported() && opSupported_[i]; + LOGV("Model::getSupportedOperations op index: %zu, type: %d, supported: %d", i, + opCreators_[i]->getType(), supportedOps[i]); } return ANEURALNETWORKS_NO_ERROR; } -} // namespace sl -} // namespace android -} // namespace vsi \ No newline at end of file +} // namespace vsi::android::sl \ No newline at end of file diff --git a/src/Model.h b/src/Model.h index c871f41..a260273 100644 --- a/src/Model.h +++ b/src/Model.h @@ -1,6 +1,6 @@ /**************************************************************************** * - * Copyright (c) 2022 Vivante Corporation + * Copyright (c) 2024 Vivante Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -21,69 +21,83 @@ * DEALINGS IN THE SOFTWARE. * *****************************************************************************/ + #ifndef VSI_ANDROID_SL_MODEL_H #define VSI_ANDROID_SL_MODEL_H + +#include + #include #include -#include +#include "Memory.h" #include "OpCreator.h" #include "Types.h" #include "tim/vx/tensor.h" -#include "Memory.h" -namespace vsi { -namespace android { -namespace sl { + +namespace vsi::android::sl { class Model { public: - Model() : operand_id_(0), relaxed_(false), finished_(false) {} - int AddOperand(const ANeuralNetworksOperandType& type); - int SetOperandSymmPerChannelQuantParams( + struct OperandValueInfo { + size_t size; + size_t offset; // Offset in const copy storage. + const void* buffer; + const IMemory* memory; + }; + using OperandValueInfoMap = std::unordered_map; + + int addOperand(const ANeuralNetworksOperandType& type); + int setOperandSymmPerChannelQuantParams( int32_t index, const ANeuralNetworksSymmPerChannelQuantParams& channelQuant); - int SetOperandValue(uint32_t index, const void* buffer, size_t length); - int SetOperandValueFromMemory(int32_t index, const Memory* memory, size_t offset, + int setOperandValue(int32_t index, const void* buffer, size_t length); + int setOperandValueFromMemory(int32_t index, const IMemory* memory, size_t offset, size_t length); - int AddOperation(ANeuralNetworksOperationType type, uint32_t inputCount, const uint32_t* inputs, + int setOperandValueFromModel(int32_t index, const Model* reference); + int addOperation(ANeuralNetworksOperationType type, uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount, const uint32_t* outputs); - int IdentifyInputsAndOutputs(uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount, + int identifyInputsAndOutputs(uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount, const uint32_t* outputs); - int RelaxComputationFloat32toFloat16(bool allow) { - if (finished_) { - std::cout << "can not modify a finished model." << std::endl; - return ANEURALNETWORKS_BAD_STATE; - } - relaxed_ = allow; - return ANEURALNETWORKS_NO_ERROR; + int relaxComputationFloat32toFloat16(bool relaxed); + int getSupportedOperations(bool* supportedOps) const; + int finish(); + + TensorMap& getTensorMap() { return tensors_; } + ScalarMap& getScalarMap() { return scalars_; } + [[nodiscard]] const TensorMap& getTensorMap() const { return tensors_; } + [[nodiscard]] const ScalarMap& getScalarMap() const { return scalars_; } + [[nodiscard]] const OperandValueInfoMap& getOperandValueInfos() const { + return operandValueInfos_; } - int Finish() { - finished_ = true; - return ANEURALNETWORKS_NO_ERROR; + [[nodiscard]] const void* getConstantCopyData(size_t offset) const { + return constantCopyStorage_.data() + offset; + }; + [[nodiscard]] const std::vector>& getOpCreators() const { + return opCreators_; } - int GetSupportedOperations(bool* supported_ops) const; - const TensorMap& Tensors() const { return tensors_; } - const ScalarMap& Scalars() const { return scalars_; } - TensorMap& Tensors() { return tensors_; } - ScalarMap& Scalars() { return scalars_; } - std::vector>& Operations() { return op_creators_; } - const std::vector& Inputs() { return inputs_; } - const std::vector& Outputs() { return outputs_; } - bool IsRelaxed() { return relaxed_; } + std::vector& getInputs() { return inputs_; } + std::vector& getOutputs() { return outputs_; } + [[nodiscard]] const std::vector& getInputs() const { return inputs_; } + [[nodiscard]] const std::vector& getOutputs() const { return outputs_; } + [[nodiscard]] bool isRelaxed() const { return relaxed_; } + [[nodiscard]] bool isFinished() const { return finished_; } private: TensorMap tensors_; ScalarMap scalars_; - std::vector> op_creators_; - std::vector op_supports_; - std::unordered_map> constant_copy_; + OperandValueInfoMap operandValueInfos_; + std::vector referenceModels_; + std::vector> opCreators_; + std::vector opSupported_; + std::vector constantCopyStorage_; std::vector inputs_; std::vector outputs_; - int32_t operand_id_; - bool relaxed_; - bool finished_; + + uint32_t numOperands_ = 0; + bool relaxed_ = false; + bool finished_ = false; }; -} // namespace sl -} // namespace android -} // namespace vsi + +} // namespace vsi::android::sl #endif \ No newline at end of file diff --git a/src/OpCreator.h b/src/OpCreator.h index 6572e34..6ed81dc 100755 --- a/src/OpCreator.h +++ b/src/OpCreator.h @@ -1,6 +1,6 @@ /**************************************************************************** * - * Copyright (c) 2022 Vivante Corporation + * Copyright (c) 2024 Vivante Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -21,12 +21,17 @@ * DEALINGS IN THE SOFTWARE. * *****************************************************************************/ + #ifndef VSI_ANDROID_SL_OP_CREATOR_H #define VSI_ANDROID_SL_OP_CREATOR_H + #include #include +#include #include +#include #include +#include #include "Utils.h" #include "slang/functional.h" @@ -90,28 +95,26 @@ #include "tim/vx/graph.h" #include "tim/vx/ops.h" -namespace vsi { -namespace android { -namespace sl { +namespace vsi::android::sl { using TensorMap = std::unordered_map; using ScalarMap = std::unordered_map; -inline int32_t ConvertAxis(int32_t axisIn, uint32_t dimNum) { - return dimNum - (axisIn < 0 ? dimNum + axisIn : axisIn) - 1; +static inline int32_t convertToVxAxis(int32_t axis, uint32_t rank) { + return static_cast(rank) - (axis < 0 ? rank + axis : axis) - 1; } -inline std::vector ConvertAndroidPermToVsi(std::vector& perm) { - int rank = perm.size(); +static inline std::vector convertToVxPerm(std::vector& perm) { + uint32_t rank = perm.size(); std::reverse(perm.begin(), perm.end()); - for (int i = 0; i < rank; ++i) { + for (uint32_t i = 0; i < rank; ++i) { perm[i] = rank - 1 - perm[i]; } return perm; } -inline tim::vx::PadType AndroidPadTypeToVsiPadType(int32_t padding_code) { - switch (padding_code) { +static inline tim::vx::PadType convertToVxPadType(int32_t code) { + switch (code) { case 0: return tim::vx::PadType::AUTO; case ANEURALNETWORKS_PADDING_SAME: @@ -119,75 +122,68 @@ inline tim::vx::PadType AndroidPadTypeToVsiPadType(int32_t padding_code) { case ANEURALNETWORKS_PADDING_VALID: return tim::vx::PadType::VALID; default: - std::cout << "Warning: Unsuppoted pad type." << std::endl; - return tim::vx::PadType::AUTO; + LOGW("Padding code: %d is not supported", code); + return tim::vx::PadType::NONE; } } -inline tim::vx::DataLayout AndroidLayoutToVsiLayout(uint8_t layout) { - switch (layout) { - case 0: - return tim::vx::DataLayout::CWHN; - case 1: - return tim::vx::DataLayout::WHCN; - default: - std::cout << "Warning: Unsuppoted layout type." << std::endl; - return tim::vx::DataLayout::ANY; - } +static inline tim::vx::DataLayout convertToVxLayout(bool isNCHW) { + return isNCHW ? tim::vx::DataLayout::WHCN : tim::vx::DataLayout::CWHN; } class OpCreator { public: - OpCreator() {} - virtual ~OpCreator() {} - virtual bool Check() = 0; + explicit OpCreator(ANeuralNetworksOperationType type, std::vector inputs, + std::vector outputs) + : type_(type), inputs_(std::move(inputs)), outputs_(std::move(outputs)), supported_(true) {} + + virtual ~OpCreator() = default; + virtual bool checkSupported() = 0; virtual std::shared_ptr Lowering(std::shared_ptr graph) = 0; - ANeuralNetworksOperationType Type() { return type_; } - std::vector& Inputs() { return inputs_; } - std::vector& Outputs() { return outputs_; } - bool support_state_{true}; + [[nodiscard]] ANeuralNetworksOperationType getType() const { return type_; } + [[nodiscard]] const std::vector& getInputs() const { return inputs_; } + [[nodiscard]] const std::vector& getOutputs() const { return outputs_; } + [[nodiscard]] bool isSupported() const { return supported_; } - protected: + private: ANeuralNetworksOperationType type_; std::vector inputs_; std::vector outputs_; + + protected: + bool supported_; }; -class OpPlaceHolderCreator : public OpCreator { +class PlaceHolderOpCreator final : public OpCreator { public: - OpPlaceHolderCreator(ANeuralNetworksOperationType type) { - std::cout << "operation " << type << " is not supported, create op placeholder instead" - << std::endl; - support_state_ = false; - type_ = type; + explicit PlaceHolderOpCreator(ANeuralNetworksOperationType type) : OpCreator(type, {}, {}) { + LOGW("OP type: %d is not supported by SL", type); } - bool Check() final { return false; } - std::shared_ptr Lowering(std::shared_ptr graph) final { - return graph->CreateOperation(); // Prevent compiler warnings, not use + bool checkSupported() override { return false; } + std::shared_ptr Lowering(std::shared_ptr graph) override { + return graph->CreateOperation(); // Prevent compiler warnings, not use } }; -class AbsCreator : public OpCreator { +class AbsCreator final : public OpCreator { public: AbsCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_ABS, inputs, outputs) { if (inputs.size() != 1 || outputs.size() != 1) { - std::cout << "Error: Abs gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("AbsCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_ABS; - inputs_ = inputs; - outputs_ = outputs; uint32_t idx_in = inputs[0]; uint32_t idx_out = outputs[0]; - std::get<0>(signature.field_tuple) = op::simple_op::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::simple_op::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::simple_op::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::simple_op::Output(tensorMap.at(idx_out)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -195,45 +191,55 @@ class AbsCreator : public OpCreator { op::simple_op::signature signature; }; -class AddCreator : public OpCreator { +class AddCreator final : public OpCreator { public: AddCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_ADD, inputs, outputs) { if (inputs.size() != 3 || outputs.size() != 1) { - std::cout << "Error: Add gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("AddCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_ADD; - inputs_ = inputs; - outputs_ = outputs; uint32_t idx_in = inputs[0]; uint32_t idx_in1 = inputs[1]; uint32_t idx_act = inputs[2]; uint32_t idx_out = outputs[0]; - auto in_shape = tensor_map.at(idx_in).shape; - auto out_shape = tensor_map.at(idx_out).shape; - - auto dim_iter0 = in_shape.begin(); - auto dim_iter1 = out_shape.begin(); - while (dim_iter0 != in_shape.end() && dim_iter1 != out_shape.end()) { - if (*dim_iter0 != *dim_iter1 ) { - auto dim_need_broadcast = *dim_iter0 > *dim_iter1 ? *dim_iter1 : *dim_iter0; - if (dim_need_broadcast != 1) { - std::cout << "Error: Can not broadcast in eletwise" << std::endl; - support_state_ = false; + auto in_shape = tensorMap.at(idx_in).shape; + auto out_shape = tensorMap.at(idx_out).shape; + + bool no_zero_dim = + std::all_of(in_shape.begin(), in_shape.end(), [](uint32_t dim) { return dim > 0; }); + if (!no_zero_dim) { + LOGI("AddCreator: Can not support zero Dims before broadcast"); + supported_ = false; + } else { + auto dim_iter0 = in_shape.begin(); + auto dim_iter1 = out_shape.begin(); + while (dim_iter0 != in_shape.end() && dim_iter1 != out_shape.end()) { + if (*dim_iter0 != *dim_iter1) { + auto dim_need_broadcast = *dim_iter0 > *dim_iter1 ? *dim_iter1 : *dim_iter0; + if (dim_need_broadcast != 1) { + LOGE("AddCreator: Invalid shape when broadcast"); + supported_ = false; + } } + ++dim_iter0; + ++dim_iter1; } - ++dim_iter0; - ++dim_iter1; } - std::get<0>(signature.field_tuple) = op::eltwise::Input0(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::eltwise::Input1(tensor_map.at(idx_in1)); - std::get<2>(signature.field_tuple) = op::eltwise::Output(tensor_map.at(idx_out)); - std::get<3>(signature.field_tuple) = op::eltwise::Activation(scalar_map.at(idx_act)); + auto act_code_data = scalarMap.at(idx_act).data.data(); + if (act_code_data == nullptr) { + LOGE("AddCreator: Activation code cannot be null"); + supported_ = false; + } + std::get<0>(signature.field_tuple) = op::eltwise::Input0(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::eltwise::Input1(tensorMap.at(idx_in1)); + std::get<2>(signature.field_tuple) = op::eltwise::Output(tensorMap.at(idx_out)); + std::get<3>(signature.field_tuple) = op::eltwise::Activation(scalarMap.at(idx_act)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -241,32 +247,31 @@ class AddCreator : public OpCreator { op::eltwise::signature signature; }; -class ArgmaxCreator : public OpCreator { +class ArgmaxCreator final : public OpCreator { public: ArgmaxCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_ARGMAX, inputs, outputs) { if (inputs.size() != 2 || outputs.size() != 1) { - std::cout << "Error: Argmax gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("ArgmaxCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_ARGMAX; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_axis = inputs[1]; uint32_t idx_out = outputs[0]; - auto p_axis = scalar_map.at(idx_axis).data.data(); + auto p_axis = scalarMap.at(idx_axis).data.data(); int32_t axis = *(int32_t*)p_axis; - uint32_t rank = tensor_map.at(idx_in).shape.size(); - int32_t axis_vx = ConvertAxis(axis, rank); + uint32_t rank = tensorMap.at(idx_in).shape.size(); + int32_t axis_vx = convertToVxAxis(axis, rank); - std::get<0>(signature.field_tuple) = op::arg::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::arg::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::arg::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::arg::Output(tensorMap.at(idx_out)); std::get<2>(signature.field_tuple) = op::arg::Axis(axis_vx); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { const uint8_t* p_axis = std::get<2>(signature.field_tuple).storage.data.data(); int32_t axis = *(int32_t*)p_axis; return graph->CreateOperation(axis); @@ -276,32 +281,31 @@ class ArgmaxCreator : public OpCreator { op::arg::signature signature; }; -class ArgminCreator : public OpCreator { +class ArgminCreator final : public OpCreator { public: ArgminCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_ARGMIN, inputs, outputs) { if (inputs.size() != 2 || outputs.size() != 1) { - std::cout << "Error: Argmin gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("ArgminCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_ARGMIN; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_axis = inputs[1]; uint32_t idx_out = outputs[0]; - auto p_axis = scalar_map.at(idx_axis).data.data(); + auto p_axis = scalarMap.at(idx_axis).data.data(); int32_t axis = *(int32_t*)p_axis; - uint32_t rank = tensor_map.at(idx_in).shape.size(); - int32_t axis_vx = ConvertAxis(axis, rank); + uint32_t rank = tensorMap.at(idx_in).shape.size(); + int32_t axis_vx = convertToVxAxis(axis, rank); - std::get<0>(signature.field_tuple) = op::arg::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::arg::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::arg::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::arg::Output(tensorMap.at(idx_out)); std::get<2>(signature.field_tuple) = op::arg::Axis(axis_vx); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { const uint8_t* p_axis = std::get<2>(signature.field_tuple).storage.data.data(); int32_t axis = *(int32_t*)p_axis; return graph->CreateOperation(axis); @@ -311,19 +315,18 @@ class ArgminCreator : public OpCreator { op::arg::signature signature; }; -class AveragePool2DCreator : public OpCreator { +class AveragePool2DCreator final : public OpCreator { public: AveragePool2DCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_AVERAGE_POOL_2D, inputs, outputs) { if ((inputs.size() != 7 && inputs.size() != 8 && inputs.size() != 10 && inputs.size() != 11) || outputs.size() != 1) { - std::cout << "Error: AveragePool2D gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("AveragePool2DCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_AVERAGE_POOL_2D; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_padding_code, idx_pad_left, idx_pad_right, idx_pad_top, idx_pad_bottom, idx_stride_width, idx_stride_height, idx_filter_width, idx_filter_height, idx_act, @@ -346,10 +349,10 @@ class AveragePool2DCreator : public OpCreator { idx_filter_height = inputs[8]; idx_act = inputs[9]; - const uint8_t* p_left = scalar_map.at(idx_pad_left).data.data(); - const uint8_t* p_right = scalar_map.at(idx_pad_right).data.data(); - const uint8_t* p_top = scalar_map.at(idx_pad_top).data.data(); - const uint8_t* p_bottom = scalar_map.at(idx_pad_bottom).data.data(); + const uint8_t* p_left = scalarMap.at(idx_pad_left).data.data(); + const uint8_t* p_right = scalarMap.at(idx_pad_right).data.data(); + const uint8_t* p_top = scalarMap.at(idx_pad_top).data.data(); + const uint8_t* p_bottom = scalarMap.at(idx_pad_bottom).data.data(); pad[0] = *(int32_t*)p_left; pad[1] = *(int32_t*)p_right; pad[2] = *(int32_t*)p_top; @@ -357,7 +360,7 @@ class AveragePool2DCreator : public OpCreator { if (inputs.size() == 11) { idx_layout = inputs[10]; - const uint8_t* p_layout = scalar_map.at(idx_layout).data.data(); + const uint8_t* p_layout = scalarMap.at(idx_layout).data.data(); layout = *(bool*)p_layout; } } @@ -369,36 +372,36 @@ class AveragePool2DCreator : public OpCreator { idx_filter_height = inputs[5]; idx_act = inputs[6]; - const uint8_t* p_code = scalar_map.at(idx_padding_code).data.data(); + const uint8_t* p_code = scalarMap.at(idx_padding_code).data.data(); padding_code = *(int32_t*)p_code; if (inputs.size() == 8) { idx_layout = inputs[7]; - const uint8_t* p_layout = scalar_map.at(idx_layout).data.data(); + const uint8_t* p_layout = scalarMap.at(idx_layout).data.data(); layout = *(bool*)p_layout; } } - const uint8_t* p_stride_width = scalar_map.at(idx_stride_width).data.data(); - const uint8_t* p_stride_height = scalar_map.at(idx_stride_height).data.data(); - const uint8_t* p_filter_width = scalar_map.at(idx_filter_width).data.data(); - const uint8_t* p_filter_height = scalar_map.at(idx_filter_height).data.data(); + const uint8_t* p_stride_width = scalarMap.at(idx_stride_width).data.data(); + const uint8_t* p_stride_height = scalarMap.at(idx_stride_height).data.data(); + const uint8_t* p_filter_width = scalarMap.at(idx_filter_width).data.data(); + const uint8_t* p_filter_height = scalarMap.at(idx_filter_height).data.data(); stride[0] = *(int32_t*)p_stride_width; stride[1] = *(int32_t*)p_stride_height; filter[0] = *(int32_t*)p_filter_width; - filter[1] = *(int32_t*)p_filter_width; + filter[1] = *(int32_t*)p_filter_height; - std::get<0>(signature.field_tuple) = op::pool2d::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::pool2d::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::pool2d::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::pool2d::Output(tensorMap.at(idx_out)); std::get<2>(signature.field_tuple) = op::pool2d::Pad(pad); // construct scalar_feild std::get<3>(signature.field_tuple) = op::pool2d::PaddingCode(padding_code); std::get<4>(signature.field_tuple) = op::pool2d::Stride(stride); std::get<5>(signature.field_tuple) = op::pool2d::Filter(filter); - std::get<6>(signature.field_tuple) = op::pool2d::Activation(scalar_map.at(idx_act)); + std::get<6>(signature.field_tuple) = op::pool2d::Activation(scalarMap.at(idx_act)); std::get<7>(signature.field_tuple) = op::pool2d::Layout(layout); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { const uint8_t* p_pad = std::get<2>(signature.field_tuple).storage.data.data(); const uint8_t* p_padding_code = std::get<3>(signature.field_tuple).storage.data.data(); const uint8_t* p_stride = std::get<4>(signature.field_tuple).storage.data.data(); @@ -408,8 +411,8 @@ class AveragePool2DCreator : public OpCreator { *((uint32_t*)p_pad + 2), *((uint32_t*)p_pad + 3)}; std::array stride = {*((uint32_t*)p_stride), *((uint32_t*)p_stride + 1)}; std::array filter = {*((uint32_t*)p_filter), *((uint32_t*)p_filter + 1)}; - auto layout = AndroidLayoutToVsiLayout(*(bool*)p_layout); - auto pad_type = AndroidPadTypeToVsiPadType(*(int32_t*)p_padding_code); + auto layout = convertToVxLayout(*(bool*)p_layout); + auto pad_type = convertToVxPadType(*(int32_t*)p_padding_code); if (pad_type == tim::vx::PadType::AUTO) { return graph->CreateOperation(tim::vx::PoolType::AVG_ANDROID, pad, filter, stride, @@ -425,17 +428,16 @@ class AveragePool2DCreator : public OpCreator { op::pool2d::signature signature; }; -class BatchMatmulCreator : public OpCreator { +class BatchMatmulCreator final : public OpCreator { public: BatchMatmulCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_BATCH_MATMUL, inputs, outputs) { if ((inputs.size() != 2 && inputs.size() != 4) || outputs.size() != 1) { - std::cout << "Error: BatchMatmul gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("BatchMatmulCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_BATCH_MATMUL; - inputs_ = inputs; - outputs_ = outputs; + bool adj_x = false, adj_y = false; uint32_t idx_in = inputs[0]; uint32_t idx_in2 = inputs[1]; @@ -443,25 +445,24 @@ class BatchMatmulCreator : public OpCreator { if (inputs.size() == 4) { uint32_t idx_adj_x = inputs[2]; uint32_t idx_adj_y = inputs[3]; - auto p_adj_x = scalar_map.at(idx_adj_x).data.data(); - auto p_adj_y = scalar_map.at(idx_adj_y).data.data(); + auto p_adj_x = scalarMap.at(idx_adj_x).data.data(); + auto p_adj_y = scalarMap.at(idx_adj_y).data.data(); adj_x = *(bool*)p_adj_x; adj_y = *(bool*)p_adj_y; if (adj_x && adj_y) { - std::cout << "Error: Matmul does not support x and y being true at the same time" - << std::endl; - support_state_ = false; + LOGI("OpCreator: x and y being true simultaneously is not support in Matmul"); + supported_ = false; } } - std::get<0>(signature.field_tuple) = op::batch_matmul::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::batch_matmul::Input2(tensor_map.at(idx_in2)); - std::get<2>(signature.field_tuple) = op::batch_matmul::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::batch_matmul::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::batch_matmul::Input2(tensorMap.at(idx_in2)); + std::get<2>(signature.field_tuple) = op::batch_matmul::Output(tensorMap.at(idx_out)); std::get<3>(signature.field_tuple) = op::batch_matmul::Adj_x(adj_x); std::get<4>(signature.field_tuple) = op::batch_matmul::Adj_y(adj_y); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { auto p_adj_x = std::get<3>(signature.field_tuple).storage.data.data(); auto p_adj_y = std::get<4>(signature.field_tuple).storage.data.data(); bool adj_x = *(bool*)p_adj_x; @@ -473,54 +474,53 @@ class BatchMatmulCreator : public OpCreator { op::batch_matmul::signature signature; }; -class BatchToSpaceCreator : public OpCreator { +class BatchToSpaceCreator final : public OpCreator { public: BatchToSpaceCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_BATCH_TO_SPACE_ND, inputs, outputs) { if ((inputs.size() != 2 && inputs.size() != 3) || outputs.size() != 1) { - std::cout << "Error: BatchToSpace gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("BatchToSpaceCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_BATCH_TO_SPACE_ND; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_block_size = inputs[1]; uint32_t idx_layout; uint32_t idx_out = outputs[0]; - auto block_size_attr = tensor_map.at(idx_block_size).attr; + auto block_size_attr = tensorMap.at(idx_block_size).attr; if (block_size_attr != slang::type::tensor_attr::kCONSTANT) { - std::cout << "Error: BlockSize tensor as INPUT is not supported in BatchToSpace" - << std::endl; - support_state_ = false; + LOGI("BatchToSpaceCreator: Can not support blockSize tensor as INPUT"); + supported_ = false; } - const void* p_block_size = tensor_map.at(idx_block_size).data; - const uint32_t block_size_length = tensor_map.at(idx_block_size).data_length / 4; + auto block_size_tensor = tensorMap.at(idx_block_size); + const void* p_block_size = block_size_tensor.data.data(); + const uint32_t block_size_length = block_size_tensor.data.size() / 4; std::vector block_size((int32_t*)p_block_size, (int32_t*)p_block_size + block_size_length); bool layout = false; if (inputs.size() == 3) { idx_layout = inputs[2]; - const uint8_t* p_layout = scalar_map.at(idx_layout).data.data(); + const uint8_t* p_layout = scalarMap.at(idx_layout).data.data(); layout = *(bool*)p_layout; } - std::get<0>(signature.field_tuple) = op::batch_to_space::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::batch_to_space::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::batch_to_space::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::batch_to_space::Output(tensorMap.at(idx_out)); std::get<2>(signature.field_tuple) = op::batch_to_space::BlockSize(block_size); std::get<3>(signature.field_tuple) = op::batch_to_space::Crop(std::vector{0, 0, 0, 0}); std::get<4>(signature.field_tuple) = op::batch_to_space::Layout(layout); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { const uint8_t* p_block_size = std::get<2>(signature.field_tuple).storage.data.data(); const uint8_t* p_layout = std::get<4>(signature.field_tuple).storage.data.data(); // block_size reverse as input shape reverse std::vector block_size = {*((int32_t*)p_block_size + 1), *(int32_t*)p_block_size}; - auto layout = AndroidLayoutToVsiLayout(*(bool*)p_layout); + auto layout = convertToVxLayout(*(bool*)p_layout); return graph->CreateOperation( block_size, std::vector{0, 0, 0, 0}, layout); } @@ -529,35 +529,34 @@ class BatchToSpaceCreator : public OpCreator { op::batch_to_space::signature signature; }; -class ConcatenationCreator : public OpCreator { +class ConcatenationCreator final : public OpCreator { public: ConcatenationCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_CONCATENATION, inputs, outputs) { if (inputs.size() < 2 || outputs.size() != 1) { - std::cout << "Concatenation gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("ConcatenationCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_CONCATENATION; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; auto iter = inputs.rbegin(); uint32_t idx_axis = *iter; int32_t input_cnt = inputs.size() - 1; uint32_t idx_out = outputs[0]; - auto p_axis = scalar_map.at(idx_axis).data.data(); + auto p_axis = scalarMap.at(idx_axis).data.data(); int32_t axis = *(int32_t*)p_axis; - int32_t axis_vx = ConvertAxis(axis, tensor_map.at(idx_in).shape.size()); + int32_t axis_vx = convertToVxAxis(axis, tensorMap.at(idx_in).shape.size()); - std::get<0>(signature.field_tuple) = op::concatenation::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::concatenation::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::concatenation::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::concatenation::Output(tensorMap.at(idx_out)); std::get<2>(signature.field_tuple) = op::concatenation::Axis(axis_vx); std::get<3>(signature.field_tuple) = op::concatenation::Input_cnt(input_cnt); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { const uint8_t* p_axis = std::get<2>(signature.field_tuple).storage.data.data(); const uint8_t* p_input_cnt = std::get<3>(signature.field_tuple).storage.data.data(); int32_t axis = *(int32_t*)p_axis; @@ -569,33 +568,31 @@ class ConcatenationCreator : public OpCreator { op::concatenation::signature signature; }; -class CastCreator : public OpCreator { +class CastCreator final : public OpCreator { public: CastCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_CAST, inputs, outputs) { if (inputs.size() != 1 && outputs.size() != 1) { - std::cout << "Cast gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("CastCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_CAST; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_out = outputs[0]; - auto input_type = tensor_map.at(idx_in).dtype; - auto quant_type = tensor_map.at(idx_in).qtype; + auto input_type = tensorMap.at(idx_in).dtype; + auto quant_type = tensorMap.at(idx_in).qtype; if (input_type == slang::type::data_type::kUINT16 && quant_type == slang::type::quant_type::kASYMM) { - std::cout << "Error: Cast can not support input dtype uint16 with qtype asymm" - << std::endl; - support_state_ = false; + LOGI("CastCreator: Can not support input_dtype uint16 with qtype asymm"); + supported_ = false; } - std::get<0>(signature.field_tuple) = op::cast::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::cast::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::cast::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::cast::Output(tensorMap.at(idx_out)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -603,33 +600,32 @@ class CastCreator : public OpCreator { op::cast::signature signature; }; -class ChannelShuffleCreator : public OpCreator { +class ChannelShuffleCreator final : public OpCreator { public: ChannelShuffleCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_CHANNEL_SHUFFLE, inputs, outputs) { if (inputs.size() != 3 && outputs.size() != 1) { - std::cout << "ChannelShuffle gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("ChannelShuffleCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_CHANNEL_SHUFFLE; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_groups = inputs[1]; uint32_t idx_axis = inputs[2]; uint32_t idx_out = outputs[0]; - auto p_axis = scalar_map.at(idx_axis).data.data(); + auto p_axis = scalarMap.at(idx_axis).data.data(); int32_t axis = *(int32_t*)p_axis; - int32_t axis_vx = ConvertAxis(axis, tensor_map.at(idx_in).shape.size()); + int32_t axis_vx = convertToVxAxis(axis, tensorMap.at(idx_in).shape.size()); - std::get<0>(signature.field_tuple) = op::channel_shuffle::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::channel_shuffle::Output(tensor_map.at(idx_out)); - std::get<2>(signature.field_tuple) = op::channel_shuffle::Groups(scalar_map.at(idx_groups)); + std::get<0>(signature.field_tuple) = op::channel_shuffle::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::channel_shuffle::Output(tensorMap.at(idx_out)); + std::get<2>(signature.field_tuple) = op::channel_shuffle::Groups(scalarMap.at(idx_groups)); std::get<3>(signature.field_tuple) = op::channel_shuffle::Axis(axis_vx); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { const uint8_t* p_groups = std::get<2>(signature.field_tuple).storage.data.data(); const uint8_t* p_axis = std::get<3>(signature.field_tuple).storage.data.data(); int32_t axis = *(int32_t*)p_axis; @@ -641,19 +637,18 @@ class ChannelShuffleCreator : public OpCreator { op::channel_shuffle::signature signature; }; -class Conv2DCreator : public OpCreator { +class Conv2DCreator final : public OpCreator { public: Conv2DCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_CONV_2D, inputs, outputs) { if ((inputs.size() != 7 && inputs.size() != 8 && inputs.size() != 10 && inputs.size() != 11 && inputs.size() != 13) || outputs.size() != 1) { - std::cout << "Error: Conv2D gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("Conv2DCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_CONV_2D; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_kernel = inputs[1]; uint32_t idx_bias = inputs[2]; @@ -667,25 +662,25 @@ class Conv2DCreator : public OpCreator { int32_t padding_code = 0; bool layout = false; // default to CWHN(false), true implies WHCN. - auto bias_type = tensor_map.at(idx_bias).dtype; + auto bias_type = tensorMap.at(idx_bias).dtype; if (inputs.size() == 7 || - scalar_map.at(inputs.at(7)).dtype == slang::type::data_type::kBOOL8) { + scalarMap.at(inputs.at(7)).dtype == slang::type::data_type::kBOOL8) { // implies implicit padding idx_padding_code = inputs[3]; idx_stride_width = inputs[4]; idx_stride_height = inputs[5]; idx_act = inputs[6]; - const uint8_t* p_code = scalar_map.at(idx_padding_code).data.data(); + const uint8_t* p_code = scalarMap.at(idx_padding_code).data.data(); padding_code = *(int32_t*)p_code; if (inputs.size() == 8 || inputs.size() == 10) { idx_layout = inputs[7]; - const uint8_t* p_layout = scalar_map.at(idx_layout).data.data(); + const uint8_t* p_layout = scalarMap.at(idx_layout).data.data(); layout = *(bool*)p_layout; if (inputs.size() == 10) { uint32_t idx_dilation_width = inputs[8]; uint32_t idx_dilation_height = inputs[9]; - const uint8_t* d_width = scalar_map.at(idx_dilation_width).data.data(); - const uint8_t* d_height = scalar_map.at(idx_dilation_height).data.data(); + const uint8_t* d_width = scalarMap.at(idx_dilation_width).data.data(); + const uint8_t* d_height = scalarMap.at(idx_dilation_height).data.data(); dilation[0] = *(int32_t*)d_width; dilation[1] = *(int32_t*)d_height; } @@ -699,51 +694,51 @@ class Conv2DCreator : public OpCreator { idx_stride_height = inputs[8]; idx_act = inputs[9]; - const uint8_t* p_left = scalar_map.at(idx_pad_left).data.data(); - const uint8_t* p_right = scalar_map.at(idx_pad_right).data.data(); - const uint8_t* p_top = scalar_map.at(idx_pad_top).data.data(); - const uint8_t* p_bottom = scalar_map.at(idx_pad_bottom).data.data(); + const uint8_t* p_left = scalarMap.at(idx_pad_left).data.data(); + const uint8_t* p_right = scalarMap.at(idx_pad_right).data.data(); + const uint8_t* p_top = scalarMap.at(idx_pad_top).data.data(); + const uint8_t* p_bottom = scalarMap.at(idx_pad_bottom).data.data(); pad[0] = *(int32_t*)p_left; pad[1] = *(int32_t*)p_right; pad[2] = *(int32_t*)p_top; pad[3] = *(int32_t*)p_bottom; if (inputs.size() == 11 || inputs.size() == 13) { idx_layout = inputs[10]; - const uint8_t* p_layout = scalar_map.at(idx_layout).data.data(); + const uint8_t* p_layout = scalarMap.at(idx_layout).data.data(); layout = *(bool*)p_layout; if (inputs.size() == 13) { uint32_t idx_dilation_width = inputs[11]; uint32_t idx_dilation_height = inputs[12]; - const uint8_t* d_width = scalar_map.at(idx_dilation_width).data.data(); - const uint8_t* d_height = scalar_map.at(idx_dilation_height).data.data(); + const uint8_t* d_width = scalarMap.at(idx_dilation_width).data.data(); + const uint8_t* d_height = scalarMap.at(idx_dilation_height).data.data(); dilation[0] = *(int32_t*)d_width; dilation[1] = *(int32_t*)d_height; } } } - const uint8_t* p_stride_width = scalar_map.at(idx_stride_width).data.data(); - const uint8_t* p_stride_height = scalar_map.at(idx_stride_height).data.data(); + const uint8_t* p_stride_width = scalarMap.at(idx_stride_width).data.data(); + const uint8_t* p_stride_height = scalarMap.at(idx_stride_height).data.data(); stride[0] = *(int32_t*)p_stride_width; stride[1] = *(int32_t*)p_stride_height; - std::get<0>(signature.field_tuple) = op::conv2d::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::conv2d::Kernel(tensor_map.at(idx_kernel)); - auto kernel_qtype = tensor_map.at(idx_kernel).qtype; - auto bias = tensor_map.at(idx_bias); + std::get<0>(signature.field_tuple) = op::conv2d::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::conv2d::Kernel(tensorMap.at(idx_kernel)); + auto kernel_qtype = tensorMap.at(idx_kernel).qtype; + auto bias = tensorMap.at(idx_bias); bias.qtype = kernel_qtype; std::get<2>(signature.field_tuple) = op::conv2d::Bias(bias); - std::get<3>(signature.field_tuple) = op::conv2d::Output(tensor_map.at(idx_out)); + std::get<3>(signature.field_tuple) = op::conv2d::Output(tensorMap.at(idx_out)); std::get<4>(signature.field_tuple) = op::conv2d::Stride(stride); std::get<5>(signature.field_tuple) = op::conv2d::Dilation(dilation); std::get<6>(signature.field_tuple) = op::conv2d::PadType(padding_code); std::get<7>(signature.field_tuple) = op::conv2d::Pad(pad); // construct scalar_feild - std::get<8>(signature.field_tuple) = op::conv2d::Activation(scalar_map.at(idx_act)); + std::get<8>(signature.field_tuple) = op::conv2d::Activation(scalarMap.at(idx_act)); std::get<9>(signature.field_tuple) = op::conv2d::Layout(layout); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { const uint32_t* p_ksize = std::get<1>(signature.field_tuple).storage.shape.data(); // IWHO const uint8_t* p_stride = std::get<4>(signature.field_tuple).storage.data.data(); const uint8_t* p_dilation = std::get<5>(signature.field_tuple).storage.data.data(); @@ -756,8 +751,8 @@ class Conv2DCreator : public OpCreator { *((uint32_t*)p_pad + 2), *((uint32_t*)p_pad + 3)}; std::array stride = {*((uint32_t*)p_stride), *((uint32_t*)p_stride + 1)}; std::array dilation = {*((uint32_t*)p_dilation), *((uint32_t*)p_dilation + 1)}; - auto pad_type = AndroidPadTypeToVsiPadType(*(int32_t*)p_padding_code); - auto layout = AndroidLayoutToVsiLayout(*(bool*)p_layout); + auto pad_type = convertToVxPadType(*(int32_t*)p_padding_code); + auto layout = convertToVxLayout(*(bool*)p_layout); return graph->CreateOperation( 0, pad_type, ksize, stride, dilation, pad, 0, layout, tim::vx::DataLayout::IcWHOc); } @@ -766,20 +761,19 @@ class Conv2DCreator : public OpCreator { op::conv2d::signature signature; }; -class DepthwiseConv2DCreator : public OpCreator { +class DepthwiseConv2DCreator final : public OpCreator { public: DepthwiseConv2DCreator(const std::vector& inputs, - const std::vector& outputs, const TensorMap& tensor_map, - const ScalarMap& scalar_map) { + const std::vector& outputs, const TensorMap& tensorMap, + const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_DEPTHWISE_CONV_2D, inputs, outputs) { if ((inputs.size() != 8 && inputs.size() != 9 && inputs.size() != 11 && inputs.size() != 12 && inputs.size() != 14) || outputs.size() != 1) { - std::cout << "Error: DepthwiseConv2D gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("DepthwiseConv2DCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_DEPTHWISE_CONV_2D; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_kernel = inputs[1]; uint32_t idx_bias = inputs[2]; @@ -793,30 +787,35 @@ class DepthwiseConv2DCreator : public OpCreator { int32_t padding_code = 0; bool layout = false; // default to CWHN(false), true implies WHCN. - auto bias_type = tensor_map.at(idx_bias).dtype; + auto bias_type = tensorMap.at(idx_bias).dtype; if (bias_type == slang::type::data_type::kFP16) { - std::cout << "Error: F16 bias is not support in depthwise conv" << std::endl; - support_state_ = false; + LOGI("DepthwiseConv2DCreator: Cannot support f16 bias"); + supported_ = false; + } + auto kernel_attr = tensorMap.at(idx_bias).attr; + if (kernel_attr != slang::type::tensor_attr::kCONSTANT) { + LOGI("DepthwiseConv2DCreator: Cannot support non-const weight"); + supported_ = false; } if (inputs.size() == 8 || - scalar_map.at(inputs.at(8)).dtype == slang::type::data_type::kBOOL8) { + scalarMap.at(inputs.at(8)).dtype == slang::type::data_type::kBOOL8) { // implies implicit padding idx_padding_code = inputs[3]; idx_stride_width = inputs[4]; idx_stride_height = inputs[5]; idx_multipier = inputs[6]; idx_act = inputs[7]; - const uint8_t* p_code = scalar_map.at(idx_padding_code).data.data(); + const uint8_t* p_code = scalarMap.at(idx_padding_code).data.data(); padding_code = *(int32_t*)p_code; if (inputs.size() == 9 || inputs.size() == 11) { idx_layout = inputs[8]; - const uint8_t* p_layout = scalar_map.at(idx_layout).data.data(); + const uint8_t* p_layout = scalarMap.at(idx_layout).data.data(); layout = *(bool*)p_layout; if (inputs.size() == 11) { uint32_t idx_dilation_width = inputs[9]; uint32_t idx_dilation_height = inputs[10]; - const uint8_t* d_width = scalar_map.at(idx_dilation_width).data.data(); - const uint8_t* d_height = scalar_map.at(idx_dilation_height).data.data(); + const uint8_t* d_width = scalarMap.at(idx_dilation_width).data.data(); + const uint8_t* d_height = scalarMap.at(idx_dilation_height).data.data(); dilation[0] = *(int32_t*)d_width; dilation[1] = *(int32_t*)d_height; } @@ -832,56 +831,60 @@ class DepthwiseConv2DCreator : public OpCreator { idx_multipier = inputs[9]; idx_act = inputs[10]; - const uint8_t* p_left = scalar_map.at(idx_pad_left).data.data(); - const uint8_t* p_right = scalar_map.at(idx_pad_right).data.data(); - const uint8_t* p_top = scalar_map.at(idx_pad_top).data.data(); - const uint8_t* p_bottom = scalar_map.at(idx_pad_bottom).data.data(); + const uint8_t* p_left = scalarMap.at(idx_pad_left).data.data(); + const uint8_t* p_right = scalarMap.at(idx_pad_right).data.data(); + const uint8_t* p_top = scalarMap.at(idx_pad_top).data.data(); + const uint8_t* p_bottom = scalarMap.at(idx_pad_bottom).data.data(); pad[0] = *(int32_t*)p_left; pad[1] = *(int32_t*)p_right; pad[2] = *(int32_t*)p_top; pad[3] = *(int32_t*)p_bottom; if (inputs.size() == 12 || inputs.size() == 14) { idx_layout = inputs[11]; - const uint8_t* p_layout = scalar_map.at(idx_layout).data.data(); + const uint8_t* p_layout = scalarMap.at(idx_layout).data.data(); layout = *(bool*)p_layout; if (inputs.size() == 14) { uint32_t idx_dilation_width = inputs[12]; uint32_t idx_dilation_height = inputs[13]; - const uint8_t* d_width = scalar_map.at(idx_dilation_width).data.data(); - const uint8_t* d_height = scalar_map.at(idx_dilation_height).data.data(); + const uint8_t* d_width = scalarMap.at(idx_dilation_width).data.data(); + const uint8_t* d_height = scalarMap.at(idx_dilation_height).data.data(); dilation[0] = *(int32_t*)d_width; dilation[1] = *(int32_t*)d_height; } } } - const uint8_t* p_stride_width = scalar_map.at(idx_stride_width).data.data(); - const uint8_t* p_stride_height = scalar_map.at(idx_stride_height).data.data(); + const uint8_t* p_stride_width = scalarMap.at(idx_stride_width).data.data(); + const uint8_t* p_stride_height = scalarMap.at(idx_stride_height).data.data(); stride[0] = *(int32_t*)p_stride_width; stride[1] = *(int32_t*)p_stride_height; - std::get<0>(signature.field_tuple) = op::depthwise_conv2d::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = - op::depthwise_conv2d::Kernel(tensor_map.at(idx_kernel)); - auto kernel_qtype = tensor_map.at(idx_kernel).qtype; - auto bias = tensor_map.at(idx_bias); + auto k_shape = tensorMap.at(idx_kernel).shape; + if (k_shape[0] != 1) { + LOGE("DepthwiseConv2DCreator: Invalid kernel shape"); + supported_ = false; + } + std::get<0>(signature.field_tuple) = op::depthwise_conv2d::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::depthwise_conv2d::Kernel(tensorMap.at(idx_kernel)); + auto kernel_qtype = tensorMap.at(idx_kernel).qtype; + auto bias = tensorMap.at(idx_bias); bias.qtype = kernel_qtype; std::get<2>(signature.field_tuple) = op::depthwise_conv2d::Bias(bias); - std::get<3>(signature.field_tuple) = op::depthwise_conv2d::Output(tensor_map.at(idx_out)); + std::get<3>(signature.field_tuple) = op::depthwise_conv2d::Output(tensorMap.at(idx_out)); std::get<4>(signature.field_tuple) = op::depthwise_conv2d::Stride(stride); std::get<5>(signature.field_tuple) = op::depthwise_conv2d::Dilation(dilation); std::get<6>(signature.field_tuple) = op::depthwise_conv2d::PadType(padding_code); std::get<7>(signature.field_tuple) = op::depthwise_conv2d::Pad(pad); // construct scalar_feild std::get<8>(signature.field_tuple) = - op::depthwise_conv2d::Multiplier(scalar_map.at(idx_multipier)); + op::depthwise_conv2d::Multiplier(scalarMap.at(idx_multipier)); std::get<9>(signature.field_tuple) = - op::depthwise_conv2d::Activation(scalar_map.at(idx_act)); + op::depthwise_conv2d::Activation(scalarMap.at(idx_act)); std::get<10>(signature.field_tuple) = op::depthwise_conv2d::Layout(layout); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { const uint32_t* p_ksize = std::get<1>(signature.field_tuple).storage.shape.data(); // OWH1 const uint8_t* p_stride = std::get<4>(signature.field_tuple).storage.data.data(); const uint8_t* p_dilation = std::get<5>(signature.field_tuple).storage.data.data(); @@ -896,8 +899,8 @@ class DepthwiseConv2DCreator : public OpCreator { *((uint32_t*)p_pad + 2), *((uint32_t*)p_pad + 3)}; std::array stride = {*((uint32_t*)p_stride), *((uint32_t*)p_stride + 1)}; std::array dilation = {*((uint32_t*)p_dilation), *((uint32_t*)p_dilation + 1)}; - auto pad_type = AndroidPadTypeToVsiPadType(*(int32_t*)p_padding_code); - auto layout = AndroidLayoutToVsiLayout(*(bool*)p_layout); + auto pad_type = convertToVxPadType(*(int32_t*)p_padding_code); + auto layout = convertToVxLayout(*(bool*)p_layout); return graph->CreateOperation(0, pad_type, ksize, stride, dilation, pad, multiplier, layout, tim::vx::DataLayout::IcWHOc); @@ -907,40 +910,39 @@ class DepthwiseConv2DCreator : public OpCreator { op::depthwise_conv2d::signature signature; }; -class DepthToSpaceCreator : public OpCreator { +class DepthToSpaceCreator final : public OpCreator { public: DepthToSpaceCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_DEPTH_TO_SPACE, inputs, outputs) { if ((inputs.size() != 2 && inputs.size() != 3) || outputs.size() != 1) { - std::cout << "Error: DepthToSpace gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("DepthToSpaceCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_DEPTH_TO_SPACE; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_block_size = inputs[1]; uint32_t idx_layout; bool layout = false; if (inputs.size() == 3) { idx_layout = inputs[2]; - const uint8_t* p_layout = scalar_map.at(idx_layout).data.data(); + const uint8_t* p_layout = scalarMap.at(idx_layout).data.data(); layout = *(bool*)p_layout; } uint32_t idx_out = outputs[0]; - std::get<0>(signature.field_tuple) = op::depth_to_space::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::depth_to_space::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::depth_to_space::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::depth_to_space::Output(tensorMap.at(idx_out)); std::get<2>(signature.field_tuple) = - op::depth_to_space::BlockSize(scalar_map.at(idx_block_size)); + op::depth_to_space::BlockSize(scalarMap.at(idx_block_size)); std::get<3>(signature.field_tuple) = op::depth_to_space::Layout(layout); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { const uint8_t* p_block_size = std::get<2>(signature.field_tuple).storage.data.data(); const uint8_t* p_layout = std::get<3>(signature.field_tuple).storage.data.data(); int32_t block_size = *(int32_t*)p_block_size; - auto layout = AndroidLayoutToVsiLayout(*(bool*)p_layout); + auto layout = convertToVxLayout(*(bool*)p_layout); return graph->CreateOperation( block_size, tim::vx::ops::DepthToSpace::DCR_mode, layout); } @@ -949,30 +951,29 @@ class DepthToSpaceCreator : public OpCreator { op::depth_to_space::signature signature; }; -class DequantizeCreator : public OpCreator { +class DequantizeCreator final : public OpCreator { public: DequantizeCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_DEQUANTIZE, inputs, outputs) { if (inputs.size() != 1 || outputs.size() != 1) { - std::cout << "Error: Dequantize gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("DequantizeCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_DEQUANTIZE; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_out = outputs[0]; - auto q_type = tensor_map.at(idx_in).qtype; + auto q_type = tensorMap.at(idx_in).qtype; if (q_type == slang::type::quant_type::kSYMM_PCQ) { - std::cout << "Error: Dequantize not support perchannel channel quantize" << std::endl; - support_state_ = false; + LOGI("DequantizeCreator: Cannot support perchannel quantize"); + supported_ = false; } - std::get<0>(signature.field_tuple) = op::dequantize::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::dequantize::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::dequantize::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::dequantize::Output(tensorMap.at(idx_out)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -980,29 +981,28 @@ class DequantizeCreator : public OpCreator { op::dequantize::signature signature; }; -class DivCreator : public OpCreator { +class DivCreator final : public OpCreator { public: DivCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_DIV, inputs, outputs) { if (inputs.size() != 3 || outputs.size() != 1) { - std::cout << "Error: Div gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("DivCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_DIV; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_in1 = inputs[1]; uint32_t idx_act = inputs[2]; uint32_t idx_out = outputs[0]; - std::get<0>(signature.field_tuple) = op::eltwise::Input0(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::eltwise::Input1(tensor_map.at(idx_in1)); - std::get<2>(signature.field_tuple) = op::eltwise::Output(tensor_map.at(idx_out)); - std::get<3>(signature.field_tuple) = op::eltwise::Activation(scalar_map.at(idx_act)); + std::get<0>(signature.field_tuple) = op::eltwise::Input0(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::eltwise::Input1(tensorMap.at(idx_in1)); + std::get<2>(signature.field_tuple) = op::eltwise::Output(tensorMap.at(idx_out)); + std::get<3>(signature.field_tuple) = op::eltwise::Activation(scalarMap.at(idx_act)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -1010,30 +1010,28 @@ class DivCreator : public OpCreator { op::eltwise::signature signature; }; -class EmbeddingLookupCreator : public OpCreator { +class EmbeddingLookupCreator final : public OpCreator { public: EmbeddingLookupCreator(const std::vector& inputs, - const std::vector& outputs, const TensorMap& tensor_map, - const ScalarMap& scalar_map) { + const std::vector& outputs, const TensorMap& tensorMap, + const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_EMBEDDING_LOOKUP, inputs, outputs) { if (inputs.size() != 2 || outputs.size() != 1) { - std::cout << "Error: EmbeddingLookup gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("EmbeddingLookupCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_EMBEDDING_LOOKUP; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_lookups = inputs[0]; uint32_t idx_values = inputs[1]; uint32_t idx_out = outputs[0]; std::get<0>(signature.field_tuple) = - op::embedding_lookup::Lookups(tensor_map.at(idx_lookups)); - std::get<1>(signature.field_tuple) = - op::embedding_lookup::Values(tensor_map.at(idx_values)); - std::get<2>(signature.field_tuple) = op::embedding_lookup::Output(tensor_map.at(idx_out)); + op::embedding_lookup::Lookups(tensorMap.at(idx_lookups)); + std::get<1>(signature.field_tuple) = op::embedding_lookup::Values(tensorMap.at(idx_values)); + std::get<2>(signature.field_tuple) = op::embedding_lookup::Output(tensorMap.at(idx_out)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -1041,27 +1039,31 @@ class EmbeddingLookupCreator : public OpCreator { op::embedding_lookup::signature signature; }; -class EluCreator : public OpCreator { +class EluCreator final : public OpCreator { public: EluCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_ELU, inputs, outputs) { if (inputs.size() != 2 || outputs.size() != 1) { - std::cout << "Error: Elu gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("EluCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_ELU; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_alpha = inputs[1]; uint32_t idx_out = outputs[0]; - std::get<0>(signature.field_tuple) = op::act_with_alpha::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::act_with_alpha::Output(tensor_map.at(idx_out)); - std::get<2>(signature.field_tuple) = op::act_with_alpha::Alpha(scalar_map.at(idx_alpha)); + auto shape = tensorMap.at(idx_in).shape; + if (shape.size() > 4) { + LOGE("EluCreator: Elu Only supports up to 4 dimensions"); + supported_ = false; + } + std::get<0>(signature.field_tuple) = op::act_with_alpha::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::act_with_alpha::Output(tensorMap.at(idx_out)); + std::get<2>(signature.field_tuple) = op::act_with_alpha::Alpha(scalarMap.at(idx_alpha)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { const uint8_t* p_alpha = std::get<2>(signature.field_tuple).storage.data.data(); auto datatype = std::get<0>(signature.field_tuple).storage.dtype; switch (datatype) { @@ -1076,27 +1078,26 @@ class EluCreator : public OpCreator { op::act_with_alpha::signature signature; }; -class EqualCreator : public OpCreator { +class EqualCreator final : public OpCreator { public: EqualCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_EQUAL, inputs, outputs) { if (inputs.size() != 2 || outputs.size() != 1) { - std::cout << "Error: Equal gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("EqualCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_EQUAL; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_in1 = inputs[1]; uint32_t idx_out = outputs[0]; - std::get<0>(signature.field_tuple) = op::relational_op::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::relational_op::Input1(tensor_map.at(idx_in1)); - std::get<2>(signature.field_tuple) = op::relational_op::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::relational_op::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::relational_op::Input1(tensorMap.at(idx_in1)); + std::get<2>(signature.field_tuple) = op::relational_op::Output(tensorMap.at(idx_out)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -1104,25 +1105,24 @@ class EqualCreator : public OpCreator { op::relational_op::signature signature; }; -class ExpCreator : public OpCreator { +class ExpCreator final : public OpCreator { public: ExpCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_EXP, inputs, outputs) { if (inputs.size() != 1 || outputs.size() != 1) { - std::cout << "Error: Exp gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("ExpCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_EXP; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_out = outputs[0]; - std::get<0>(signature.field_tuple) = op::simple_op::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::simple_op::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::simple_op::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::simple_op::Output(tensorMap.at(idx_out)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -1130,32 +1130,31 @@ class ExpCreator : public OpCreator { op::simple_op::signature signature; }; -class ExpandDimsCreator : public OpCreator { +class ExpandDimsCreator final : public OpCreator { public: ExpandDimsCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_EXPAND_DIMS, inputs, outputs) { if (inputs.size() != 2 || outputs.size() != 1) { - std::cout << "Error: ExpandDims gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("ExpandDimsCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_EXPAND_DIMS; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_axis = inputs[1]; uint32_t idx_out = outputs[0]; - auto p_axis = scalar_map.at(idx_axis).data.data(); + auto p_axis = scalarMap.at(idx_axis).data.data(); int32_t axis_android = *(int32_t*)p_axis; - int32_t rank = tensor_map.at(idx_in).shape.size(); - int32_t axis_vx = ConvertAxis(axis_android, rank + 1); + int32_t rank = tensorMap.at(idx_in).shape.size(); + int32_t axis_vx = convertToVxAxis(axis_android, rank + 1); - std::get<0>(signature.field_tuple) = op::expand_dims::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::expand_dims::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::expand_dims::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::expand_dims::Output(tensorMap.at(idx_out)); std::get<2>(signature.field_tuple) = op::expand_dims::Axis(axis_vx); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { auto p_axis = std::get<2>(signature.field_tuple).storage.data.data(); int32_t axis = *(int32_t*)p_axis; int32_t rank = std::get<0>(signature.field_tuple).storage.shape.size(); @@ -1175,25 +1174,24 @@ class ExpandDimsCreator : public OpCreator { op::expand_dims::signature signature; }; -class FloorCreator : public OpCreator { +class FloorCreator final : public OpCreator { public: FloorCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_FLOOR, inputs, outputs) { if (inputs.size() != 1 || outputs.size() != 1) { - std::cout << "Error: Floor gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("FloorCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_FLOOR; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_out = outputs[0]; - std::get<0>(signature.field_tuple) = op::simple_op::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::simple_op::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::simple_op::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::simple_op::Output(tensorMap.at(idx_out)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -1201,35 +1199,34 @@ class FloorCreator : public OpCreator { op::simple_op::signature signature; }; -class FullyConnectedCreator : public OpCreator { +class FullyConnectedCreator final : public OpCreator { public: FullyConnectedCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_FULLY_CONNECTED, inputs, outputs) { if (inputs.size() != 4 || outputs.size() != 1) { - std::cout << "Error: FullyConnected gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("FullyConnectedCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_FULLY_CONNECTED; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_weight = inputs[1]; uint32_t idx_bias = inputs[2]; uint32_t idx_out = outputs[0]; - auto bias_type = tensor_map.at(idx_bias).dtype; + auto bias_type = tensorMap.at(idx_bias).dtype; if (bias_type == slang::type::data_type::kFP16) { - std::cout << "Error: F16 bias is not support in fully connected" << std::endl; - support_state_ = false; + LOGI("FullyConnectedCreator: Cannot support f16 bias"); + supported_ = false; } - std::get<0>(signature.field_tuple) = op::fully_connected::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::fully_connected::Weight(tensor_map.at(idx_weight)); - std::get<2>(signature.field_tuple) = op::fully_connected::Bias(tensor_map.at(idx_bias)); - std::get<3>(signature.field_tuple) = op::fully_connected::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::fully_connected::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::fully_connected::Weight(tensorMap.at(idx_weight)); + std::get<2>(signature.field_tuple) = op::fully_connected::Bias(tensorMap.at(idx_bias)); + std::get<3>(signature.field_tuple) = op::fully_connected::Output(tensorMap.at(idx_out)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { auto p_weight = (uint32_t*)std::get<1>(signature.field_tuple).storage.shape.data(); int32_t weight = *(int32_t*)p_weight; return graph->CreateOperation(0, weight); @@ -1239,34 +1236,37 @@ class FullyConnectedCreator : public OpCreator { op::fully_connected::signature signature; }; -class GatherCreator : public OpCreator { +class GatherCreator final : public OpCreator { public: GatherCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_GATHER, inputs, outputs) { if (inputs.size() != 3 || outputs.size() != 1) { - std::cout << "Error: Gather gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("GatherCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_GATHER; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_axis = inputs[1]; uint32_t idx_indices = inputs[2]; uint32_t idx_out = outputs[0]; - auto p_axis = scalar_map.at(idx_axis).data.data(); + auto p_axis = scalarMap.at(idx_axis).data.data(); int32_t axis_android = *(int32_t*)p_axis; - int32_t rank = tensor_map.at(idx_in).shape.size(); - int32_t axis_vx = ConvertAxis(axis_android, rank); - - std::get<0>(signature.field_tuple) = op::gather::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::gather::Indices(tensor_map.at(idx_indices)); - std::get<2>(signature.field_tuple) = op::gather::Output(tensor_map.at(idx_out)); + int32_t in_rank = tensorMap.at(idx_in).shape.size(); + int32_t out_rank = tensorMap.at(idx_out).shape.size(); + int32_t axis_vx = convertToVxAxis(axis_android, in_rank); + if (in_rank > 6 || out_rank > 6) { + LOGI("GatherCreator: INPUT/OUTPUT rank bigger than 6 is not support"); + supported_ = false; + } + std::get<0>(signature.field_tuple) = op::gather::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::gather::Indices(tensorMap.at(idx_indices)); + std::get<2>(signature.field_tuple) = op::gather::Output(tensorMap.at(idx_out)); std::get<3>(signature.field_tuple) = op::gather::Axis(axis_vx); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { auto p_axis = std::get<3>(signature.field_tuple).storage.data.data(); int32_t axis = *(int32_t*)p_axis; return graph->CreateOperation(axis, 0); @@ -1276,27 +1276,26 @@ class GatherCreator : public OpCreator { op::gather::signature signature; }; -class GreaterCreator : public OpCreator { +class GreaterCreator final : public OpCreator { public: GreaterCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_GREATER, inputs, outputs) { if (inputs.size() != 2 || outputs.size() != 1) { - std::cout << "Error: Greater gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("GreaterCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_GREATER; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_in1 = inputs[1]; uint32_t idx_out = outputs[0]; - std::get<0>(signature.field_tuple) = op::relational_op::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::relational_op::Input1(tensor_map.at(idx_in1)); - std::get<2>(signature.field_tuple) = op::relational_op::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::relational_op::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::relational_op::Input1(tensorMap.at(idx_in1)); + std::get<2>(signature.field_tuple) = op::relational_op::Output(tensorMap.at(idx_out)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -1304,27 +1303,26 @@ class GreaterCreator : public OpCreator { op::relational_op::signature signature; }; -class GreaterEqualCreator : public OpCreator { +class GreaterEqualCreator final : public OpCreator { public: GreaterEqualCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_GREATER_EQUAL, inputs, outputs) { if (inputs.size() != 2 || outputs.size() != 1) { - std::cout << "Error: GreaterEqual gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("GreaterCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_GREATER_EQUAL; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_in1 = inputs[1]; uint32_t idx_out = outputs[0]; - std::get<0>(signature.field_tuple) = op::relational_op::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::relational_op::Input1(tensor_map.at(idx_in1)); - std::get<2>(signature.field_tuple) = op::relational_op::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::relational_op::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::relational_op::Input1(tensorMap.at(idx_in1)); + std::get<2>(signature.field_tuple) = op::relational_op::Output(tensorMap.at(idx_out)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -1332,17 +1330,16 @@ class GreaterEqualCreator : public OpCreator { op::relational_op::signature signature; }; -class GroupedConv2DCreator : public OpCreator { +class GroupedConv2DCreator final : public OpCreator { public: GroupedConv2DCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_GROUPED_CONV_2D, inputs, outputs) { if ((inputs.size() != 9 && inputs.size() != 12) || outputs.size() != 1) { - std::cout << "Error: GroupedConv2D gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("GroupedConv2DCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_GROUPED_CONV_2D; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_kernel = inputs[1]; uint32_t idx_bias = inputs[2]; @@ -1364,7 +1361,7 @@ class GroupedConv2DCreator : public OpCreator { idx_act = inputs[7]; idx_layout = inputs[8]; - const uint8_t* p_code = scalar_map.at(idx_padding_code).data.data(); + const uint8_t* p_code = scalarMap.at(idx_padding_code).data.data(); padding_code = *(int32_t*)p_code; } else { // implies explicit padding @@ -1378,40 +1375,49 @@ class GroupedConv2DCreator : public OpCreator { idx_act = inputs[10]; idx_layout = inputs[11]; - const uint8_t* p_left = scalar_map.at(idx_pad_left).data.data(); - const uint8_t* p_right = scalar_map.at(idx_pad_right).data.data(); - const uint8_t* p_top = scalar_map.at(idx_pad_top).data.data(); - const uint8_t* p_bottom = scalar_map.at(idx_pad_bottom).data.data(); + const uint8_t* p_left = scalarMap.at(idx_pad_left).data.data(); + const uint8_t* p_right = scalarMap.at(idx_pad_right).data.data(); + const uint8_t* p_top = scalarMap.at(idx_pad_top).data.data(); + const uint8_t* p_bottom = scalarMap.at(idx_pad_bottom).data.data(); pad[0] = *(int32_t*)p_left; pad[1] = *(int32_t*)p_right; pad[2] = *(int32_t*)p_top; pad[3] = *(int32_t*)p_bottom; } - const uint8_t* p_layout = scalar_map.at(idx_layout).data.data(); + const uint8_t* p_layout = scalarMap.at(idx_layout).data.data(); layout = *(bool*)p_layout; - const uint8_t* p_stride_width = scalar_map.at(idx_stride_width).data.data(); - const uint8_t* p_stride_height = scalar_map.at(idx_stride_height).data.data(); + const uint8_t* p_stride_width = scalarMap.at(idx_stride_width).data.data(); + const uint8_t* p_stride_height = scalarMap.at(idx_stride_height).data.data(); stride[0] = *(int32_t*)p_stride_width; stride[1] = *(int32_t*)p_stride_height; - - std::get<0>(signature.field_tuple) = op::grouped_conv2d::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::grouped_conv2d::Kernel(tensor_map.at(idx_kernel)); - auto kernel_qtype = tensor_map.at(idx_kernel).qtype; - auto bias = tensor_map.at(idx_bias); + auto kernel_attr = tensorMap.at(idx_kernel).attr; + auto bias_attr = tensorMap.at(idx_bias).attr; + if (bias_attr == slang::type::tensor_attr::kVARIABLE) { + LOGI("GroupedConv2DCreator: Cannot support non const bias"); + supported_ = false; + } + if (stride[0] != stride[1] && kernel_attr == slang::type::tensor_attr::kCONSTANT) { + LOGI("GroupedConv2DCreator: Cannot support unequal stride when kernel is constant"); + supported_ = false; + } + std::get<0>(signature.field_tuple) = op::grouped_conv2d::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::grouped_conv2d::Kernel(tensorMap.at(idx_kernel)); + auto kernel_qtype = tensorMap.at(idx_kernel).qtype; + auto bias = tensorMap.at(idx_bias); bias.qtype = kernel_qtype; std::get<2>(signature.field_tuple) = op::grouped_conv2d::Bias(bias); - std::get<3>(signature.field_tuple) = op::grouped_conv2d::Output(tensor_map.at(idx_out)); + std::get<3>(signature.field_tuple) = op::grouped_conv2d::Output(tensorMap.at(idx_out)); std::get<4>(signature.field_tuple) = op::grouped_conv2d::Stride(stride); std::get<5>(signature.field_tuple) = op::grouped_conv2d::Dilation(dilation); std::get<6>(signature.field_tuple) = op::grouped_conv2d::PadType(padding_code); std::get<7>(signature.field_tuple) = op::grouped_conv2d::Pad(pad); - std::get<8>(signature.field_tuple) = op::grouped_conv2d::Groups(scalar_map.at(idx_groups)); - std::get<9>(signature.field_tuple) = op::grouped_conv2d::Activation(scalar_map.at(idx_act)); + std::get<8>(signature.field_tuple) = op::grouped_conv2d::Groups(scalarMap.at(idx_groups)); + std::get<9>(signature.field_tuple) = op::grouped_conv2d::Activation(scalarMap.at(idx_act)); std::get<10>(signature.field_tuple) = op::grouped_conv2d::Layout(layout); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { const uint8_t* p_stride = std::get<4>(signature.field_tuple).storage.data.data(); const uint8_t* p_padding_code = std::get<6>(signature.field_tuple).storage.data.data(); const uint8_t* p_pad = std::get<7>(signature.field_tuple).storage.data.data(); @@ -1423,8 +1429,8 @@ class GroupedConv2DCreator : public OpCreator { *((uint32_t*)p_pad + 2), *((uint32_t*)p_pad + 3)}; std::array stride = {*((uint32_t*)p_stride), *((uint32_t*)p_stride + 1)}; std::array dilation = {0, 0}; - auto pad_type = AndroidPadTypeToVsiPadType(*(int32_t*)p_padding_code); - auto layout = AndroidLayoutToVsiLayout(*(bool*)p_layout); + auto pad_type = convertToVxPadType(*(int32_t*)p_padding_code); + auto layout = convertToVxLayout(*(bool*)p_layout); if (pad_type != tim::vx::PadType::AUTO) { return graph->CreateOperation( pad_type, stride, dilation, groups, layout, tim::vx::DataLayout::IcWHOc); @@ -1438,34 +1444,32 @@ class GroupedConv2DCreator : public OpCreator { op::grouped_conv2d::signature signature; }; -class HashtableLookupCreator : public OpCreator { +class HashtableLookupCreator final : public OpCreator { public: HashtableLookupCreator(const std::vector& inputs, - const std::vector& outputs, const TensorMap& tensor_map, - const ScalarMap& scalar_map) { + const std::vector& outputs, const TensorMap& tensorMap, + const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_HASHTABLE_LOOKUP, inputs, outputs) { if (inputs.size() != 3 || outputs.size() != 2) { - std::cout << "Error: HashtableLookup gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("HashtableLookupCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_HASHTABLE_LOOKUP; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_lookups = inputs[0]; uint32_t idx_keys = inputs[1]; uint32_t idx_values = inputs[2]; uint32_t idx_out = outputs[0]; uint32_t idx_hits = outputs[1]; std::get<0>(signature.field_tuple) = - op::hashtable_lookup::Lookups(tensor_map.at(idx_lookups)); - std::get<1>(signature.field_tuple) = op::hashtable_lookup::Keys(tensor_map.at(idx_keys)); - std::get<2>(signature.field_tuple) = - op::hashtable_lookup::Values(tensor_map.at(idx_values)); - std::get<3>(signature.field_tuple) = op::hashtable_lookup::Output(tensor_map.at(idx_out)); - std::get<4>(signature.field_tuple) = op::hashtable_lookup::Hits(tensor_map.at(idx_hits)); + op::hashtable_lookup::Lookups(tensorMap.at(idx_lookups)); + std::get<1>(signature.field_tuple) = op::hashtable_lookup::Keys(tensorMap.at(idx_keys)); + std::get<2>(signature.field_tuple) = op::hashtable_lookup::Values(tensorMap.at(idx_values)); + std::get<3>(signature.field_tuple) = op::hashtable_lookup::Output(tensorMap.at(idx_out)); + std::get<4>(signature.field_tuple) = op::hashtable_lookup::Hits(tensorMap.at(idx_hits)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -1473,25 +1477,24 @@ class HashtableLookupCreator : public OpCreator { op::hashtable_lookup::signature signature; }; -class HardSwishCreator : public OpCreator { +class HardSwishCreator final : public OpCreator { public: HardSwishCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_HARD_SWISH, inputs, outputs) { if (inputs.size() != 1 || outputs.size() != 1) { - std::cout << "Error: Hardswish gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("HardSwishCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_HARD_SWISH; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_out = outputs[0]; - std::get<0>(signature.field_tuple) = op::activation::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::activation::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::activation::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::activation::Output(tensorMap.at(idx_out)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -1499,40 +1502,46 @@ class HardSwishCreator : public OpCreator { op::activation::signature signature; }; -class InstanceNormalizationCreator : public OpCreator { +class InstanceNormalizationCreator final : public OpCreator { public: InstanceNormalizationCreator(const std::vector& inputs, - const std::vector& outputs, const TensorMap& tensor_map, - const ScalarMap& scalar_map) { + const std::vector& outputs, const TensorMap& tensorMap, + const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_INSTANCE_NORMALIZATION, inputs, outputs) { if (inputs.size() != 5 || outputs.size() != 1) { - std::cout << "Error: InstanceNormalization gets invalid number of operands" - << std::endl; - support_state_ = false; + LOGE("InstanceNormalizationCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_INSTANCE_NORMALIZATION; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; + uint32_t idx_gamma = inputs[1]; + uint32_t idx_beta = inputs[2]; uint32_t idx_epsilon = inputs[3]; uint32_t idx_layout = inputs[4]; uint32_t idx_out = outputs[0]; - + auto gamma_type = scalarMap.at(inputs[1]).dtype; + auto beta_type = scalarMap.at(inputs[2]).dtype; + if (gamma_type == slang::type::data_type::kFP16 && + beta_type == slang::type::data_type::kFP16) { + LOGI("InstanceNormalizationCreator: Cannot support f16 gamma & beta"); + supported_ = false; + } std::get<0>(signature.field_tuple) = - op::instance_normalization::Input(tensor_map.at(idx_in)); + op::instance_normalization::Input(tensorMap.at(idx_in)); std::get<1>(signature.field_tuple) = - op::instance_normalization::Output(tensor_map.at(idx_out)); + op::instance_normalization::Output(tensorMap.at(idx_out)); std::get<2>(signature.field_tuple) = - op::instance_normalization::Epsilon(scalar_map.at(idx_epsilon)); + op::instance_normalization::Epsilon(scalarMap.at(idx_epsilon)); std::get<3>(signature.field_tuple) = - op::instance_normalization::Layout(scalar_map.at(idx_layout)); + op::instance_normalization::Layout(scalarMap.at(idx_layout)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { const uint8_t* p_epsilon = std::get<2>(signature.field_tuple).storage.data.data(); const uint8_t* p_layout = std::get<3>(signature.field_tuple).storage.data.data(); auto input_type = std::get<0>(signature.field_tuple).storage.dtype; - auto layout = AndroidLayoutToVsiLayout(*(bool*)p_layout); + auto layout = convertToVxLayout(*(bool*)p_layout); if (input_type == slang::type::data_type::kFP16) { return graph->CreateOperation( *(_Float16*)p_epsilon, layout); @@ -1546,36 +1555,35 @@ class InstanceNormalizationCreator : public OpCreator { op::instance_normalization::signature signature; }; -class L2NormalizationCreator : public OpCreator { +class L2NormalizationCreator final : public OpCreator { public: L2NormalizationCreator(const std::vector& inputs, - const std::vector& outputs, const TensorMap& tensor_map, - const ScalarMap& scalar_map) { + const std::vector& outputs, const TensorMap& tensorMap, + const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_L2_NORMALIZATION, inputs, outputs) { if ((inputs.size() != 1 && inputs.size() != 2) || outputs.size() != 1) { - std::cout << "Error: L2Normalization gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("L2NormalizationCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_L2_NORMALIZATION; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_out = outputs[0]; int32_t axis = -1; if (inputs.size() == 2) { uint32_t idx_axis = inputs[1]; - auto p_axis = scalar_map.at(idx_axis).data.data(); + auto p_axis = scalarMap.at(idx_axis).data.data(); axis = *(int32_t*)p_axis; } - uint32_t rank = tensor_map.at(idx_in).shape.size(); - int32_t axis_vx = ConvertAxis(axis, rank); - std::get<0>(signature.field_tuple) = op::l2_normalization::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::l2_normalization::Output(tensor_map.at(idx_out)); + uint32_t rank = tensorMap.at(idx_in).shape.size(); + int32_t axis_vx = convertToVxAxis(axis, rank); + std::get<0>(signature.field_tuple) = op::l2_normalization::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::l2_normalization::Output(tensorMap.at(idx_out)); std::get<2>(signature.field_tuple) = op::l2_normalization::Axis(axis_vx); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { const uint8_t* p_axis = std::get<2>(signature.field_tuple).storage.data.data(); int32_t axis = *(int32_t*)p_axis; return graph->CreateOperation(axis); @@ -1585,27 +1593,26 @@ class L2NormalizationCreator : public OpCreator { op::l2_normalization::signature signature; }; -class LessCreator : public OpCreator { +class LessCreator final : public OpCreator { public: LessCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_LESS, inputs, outputs) { if (inputs.size() != 2 || outputs.size() != 1) { - std::cout << "Error: Less gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("LessCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_LESS; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_in1 = inputs[1]; uint32_t idx_out = outputs[0]; - std::get<0>(signature.field_tuple) = op::relational_op::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::relational_op::Input1(tensor_map.at(idx_in1)); - std::get<2>(signature.field_tuple) = op::relational_op::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::relational_op::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::relational_op::Input1(tensorMap.at(idx_in1)); + std::get<2>(signature.field_tuple) = op::relational_op::Output(tensorMap.at(idx_out)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -1613,27 +1620,26 @@ class LessCreator : public OpCreator { op::relational_op::signature signature; }; -class LessEqualCreator : public OpCreator { +class LessEqualCreator final : public OpCreator { public: LessEqualCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_LESS_EQUAL, inputs, outputs) { if (inputs.size() != 2 || outputs.size() != 1) { - std::cout << "Error: LessEqual gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("LessEqualCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_LESS_EQUAL; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_in1 = inputs[1]; uint32_t idx_out = outputs[0]; - std::get<0>(signature.field_tuple) = op::relational_op::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::relational_op::Input1(tensor_map.at(idx_in1)); - std::get<2>(signature.field_tuple) = op::relational_op::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::relational_op::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::relational_op::Input1(tensorMap.at(idx_in1)); + std::get<2>(signature.field_tuple) = op::relational_op::Output(tensorMap.at(idx_out)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -1641,19 +1647,17 @@ class LessEqualCreator : public OpCreator { op::relational_op::signature signature; }; -class LocalResponseNormalizationCreator : public OpCreator { +class LocalResponseNormalizationCreator final : public OpCreator { public: LocalResponseNormalizationCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION, inputs, outputs) { if ((inputs.size() != 5 && inputs.size() != 6) || outputs.size() != 1) { - std::cout << "Error: LocalResponseNormalization gets invalid number of operands" - << std::endl; - support_state_ = false; + LOGE("LocalResponseNormalizationCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_radius = inputs[1]; uint32_t idx_bias = inputs[2]; @@ -1664,27 +1668,27 @@ class LocalResponseNormalizationCreator : public OpCreator { int32_t axis_android = -1; if (inputs.size() == 6) { uint32_t idx_axis = inputs[5]; - auto p_axis = scalar_map.at(idx_axis).data.data(); + auto p_axis = scalarMap.at(idx_axis).data.data(); axis_android = *(int32_t*)p_axis; } - int32_t axis_vx = ConvertAxis(axis_android, tensor_map.at(idx_in).shape.size()); + int32_t axis_vx = convertToVxAxis(axis_android, tensorMap.at(idx_in).shape.size()); std::get<0>(signature.field_tuple) = - op::local_response_normalization::Input(tensor_map.at(idx_in)); + op::local_response_normalization::Input(tensorMap.at(idx_in)); std::get<1>(signature.field_tuple) = - op::local_response_normalization::Output(tensor_map.at(idx_out)); + op::local_response_normalization::Output(tensorMap.at(idx_out)); std::get<2>(signature.field_tuple) = - op::local_response_normalization::Radius(scalar_map.at(idx_radius)); + op::local_response_normalization::Radius(scalarMap.at(idx_radius)); std::get<3>(signature.field_tuple) = - op::local_response_normalization::Bias(scalar_map.at(idx_bias)); + op::local_response_normalization::Bias(scalarMap.at(idx_bias)); std::get<4>(signature.field_tuple) = - op::local_response_normalization::Alpha(scalar_map.at(idx_alpha)); + op::local_response_normalization::Alpha(scalarMap.at(idx_alpha)); std::get<5>(signature.field_tuple) = - op::local_response_normalization::Beta(scalar_map.at(idx_beta)); + op::local_response_normalization::Beta(scalarMap.at(idx_beta)); std::get<6>(signature.field_tuple) = op::local_response_normalization::Axis(axis_vx); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { const uint8_t* p_radius = std::get<2>(signature.field_tuple).storage.data.data(); const uint8_t* p_bias = std::get<3>(signature.field_tuple).storage.data.data(); const uint8_t* p_alpha = std::get<4>(signature.field_tuple).storage.data.data(); @@ -1706,25 +1710,29 @@ class LocalResponseNormalizationCreator : public OpCreator { op::local_response_normalization::signature signature; }; -class LogCreator : public OpCreator { +class LogCreator final : public OpCreator { public: LogCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_LOG, inputs, outputs) { if (inputs.size() != 1 || outputs.size() != 1) { - std::cout << "Error: Log gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("LogCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_LOG; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_out = outputs[0]; - std::get<0>(signature.field_tuple) = op::simple_op::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::simple_op::Output(tensor_map.at(idx_out)); + auto attr = tensorMap.at(idx_in).dtype; + if (attr == slang::type::data_type::kFP16) { + LOGI("LogCreator: Cannot support f16 input"); + supported_ = false; + } + std::get<0>(signature.field_tuple) = op::simple_op::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::simple_op::Output(tensorMap.at(idx_out)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -1732,25 +1740,29 @@ class LogCreator : public OpCreator { op::simple_op::signature signature; }; -class LogisticCreator : public OpCreator { +class LogisticCreator final : public OpCreator { public: LogisticCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_LOGISTIC, inputs, outputs) { if (inputs.size() != 1 || outputs.size() != 1) { - std::cout << "Error: Logistic gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("LogisticCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_LOGISTIC; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_out = outputs[0]; - std::get<0>(signature.field_tuple) = op::activation::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::activation::Output(tensor_map.at(idx_out)); + auto shape = tensorMap.at(idx_in).shape; + if (shape.size() > 4) { + LOGE("LogisticCreator: Logistic Only supports up to 4 dimensions"); + supported_ = false; + } + std::get<0>(signature.field_tuple) = op::activation::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::activation::Output(tensorMap.at(idx_out)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -1758,27 +1770,26 @@ class LogisticCreator : public OpCreator { op::activation::signature signature; }; -class LogicalAndCreator : public OpCreator { +class LogicalAndCreator final : public OpCreator { public: LogicalAndCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_LOGICAL_AND, inputs, outputs) { if (inputs.size() != 2 || outputs.size() != 1) { - std::cout << "Error: LogicalAnd gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("LogicalAndCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_LOGICAL_AND; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_in1 = inputs[1]; uint32_t idx_out = outputs[0]; - std::get<0>(signature.field_tuple) = op::logical_and_or::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::logical_and_or::Input1(tensor_map.at(idx_in1)); - std::get<2>(signature.field_tuple) = op::logical_and_or::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::logical_and_or::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::logical_and_or::Input1(tensorMap.at(idx_in1)); + std::get<2>(signature.field_tuple) = op::logical_and_or::Output(tensorMap.at(idx_out)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -1786,25 +1797,24 @@ class LogicalAndCreator : public OpCreator { op::logical_and_or::signature signature; }; -class LogicalNotCreator : public OpCreator { +class LogicalNotCreator final : public OpCreator { public: LogicalNotCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_LOGICAL_NOT, inputs, outputs) { if (inputs.size() != 1 || outputs.size() != 1) { - std::cout << "Error: LogicalNot gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("LogicalNotCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_LOGICAL_NOT; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_out = outputs[0]; - std::get<0>(signature.field_tuple) = op::logical_not::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::logical_not::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::logical_not::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::logical_not::Output(tensorMap.at(idx_out)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -1812,27 +1822,26 @@ class LogicalNotCreator : public OpCreator { op::logical_not::signature signature; }; -class LogicalOrCreator : public OpCreator { +class LogicalOrCreator final : public OpCreator { public: LogicalOrCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_LOGICAL_OR, inputs, outputs) { if (inputs.size() != 2 || outputs.size() != 1) { - std::cout << "Error: LogicalOr gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("LogicalOrCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_LOGICAL_OR; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_in1 = inputs[1]; uint32_t idx_out = outputs[0]; - std::get<0>(signature.field_tuple) = op::logical_and_or::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::logical_and_or::Input1(tensor_map.at(idx_in1)); - std::get<2>(signature.field_tuple) = op::logical_and_or::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::logical_and_or::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::logical_and_or::Input1(tensorMap.at(idx_in1)); + std::get<2>(signature.field_tuple) = op::logical_and_or::Output(tensorMap.at(idx_out)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -1840,36 +1849,35 @@ class LogicalOrCreator : public OpCreator { op::logical_and_or::signature signature; }; -class LogSoftmaxCreator : public OpCreator { +class LogSoftmaxCreator final : public OpCreator { public: LogSoftmaxCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_LOG_SOFTMAX, inputs, outputs) { if (inputs.size() != 3 || outputs.size() != 1) { - std::cout << "Error: LogSoftmax gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("LogSoftmaxCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_LOG_SOFTMAX; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_beta = inputs[1]; uint32_t idx_axis = inputs[2]; uint32_t idx_out = outputs[0]; - std::get<0>(signature.field_tuple) = op::log_softmax::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::log_softmax::Output(tensor_map.at(idx_out)); - std::get<2>(signature.field_tuple) = op::log_softmax::Beta(scalar_map.at(idx_beta)); - std::get<3>(signature.field_tuple) = op::log_softmax::Axis(scalar_map.at(idx_axis)); + std::get<0>(signature.field_tuple) = op::log_softmax::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::log_softmax::Output(tensorMap.at(idx_out)); + std::get<2>(signature.field_tuple) = op::log_softmax::Beta(scalarMap.at(idx_beta)); + std::get<3>(signature.field_tuple) = op::log_softmax::Axis(scalarMap.at(idx_axis)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { const uint32_t rank = std::get<0>(signature.field_tuple).storage.shape.size(); auto datatype = std::get<2>(signature.field_tuple).storage.dtype; const uint8_t* p_beta = std::get<2>(signature.field_tuple).storage.data.data(); const uint8_t* p_axis = std::get<3>(signature.field_tuple).storage.data.data(); int32_t axis_android = *(int32_t*)p_axis; - int32_t axis_vx = ConvertAxis(axis_android, rank); + int32_t axis_vx = convertToVxAxis(axis_android, rank); if (datatype == slang::type::data_type::kFP16) { auto beta = *(_Float16*)p_beta; return graph->CreateOperation(axis_vx, beta); @@ -1883,19 +1891,18 @@ class LogSoftmaxCreator : public OpCreator { op::log_softmax::signature signature; }; -class L2Pool2DCreator : public OpCreator { +class L2Pool2DCreator final : public OpCreator { public: L2Pool2DCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_L2_POOL_2D, inputs, outputs) { if ((inputs.size() != 7 && inputs.size() != 8 && inputs.size() != 10 && inputs.size() != 11) || outputs.size() != 1) { - std::cout << "Error: L2Pool2D gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("L2Pool2DCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_L2_POOL_2D; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_padding_code, idx_pad_left, idx_pad_right, idx_pad_top, idx_pad_bottom, idx_stride_width, idx_stride_height, idx_filter_width, idx_filter_height, idx_act, @@ -1918,10 +1925,10 @@ class L2Pool2DCreator : public OpCreator { idx_filter_height = inputs[8]; idx_act = inputs[9]; - const uint8_t* p_left = scalar_map.at(idx_pad_left).data.data(); - const uint8_t* p_right = scalar_map.at(idx_pad_right).data.data(); - const uint8_t* p_top = scalar_map.at(idx_pad_top).data.data(); - const uint8_t* p_bottom = scalar_map.at(idx_pad_bottom).data.data(); + const uint8_t* p_left = scalarMap.at(idx_pad_left).data.data(); + const uint8_t* p_right = scalarMap.at(idx_pad_right).data.data(); + const uint8_t* p_top = scalarMap.at(idx_pad_top).data.data(); + const uint8_t* p_bottom = scalarMap.at(idx_pad_bottom).data.data(); pad[0] = *(int32_t*)p_left; pad[1] = *(int32_t*)p_right; pad[2] = *(int32_t*)p_top; @@ -1929,7 +1936,7 @@ class L2Pool2DCreator : public OpCreator { if (inputs.size() == 11) { idx_layout = inputs[10]; - const uint8_t* p_layout = scalar_map.at(idx_layout).data.data(); + const uint8_t* p_layout = scalarMap.at(idx_layout).data.data(); layout = *(bool*)p_layout; } } @@ -1941,36 +1948,36 @@ class L2Pool2DCreator : public OpCreator { idx_filter_height = inputs[5]; idx_act = inputs[6]; - const uint8_t* p_code = scalar_map.at(idx_padding_code).data.data(); + const uint8_t* p_code = scalarMap.at(idx_padding_code).data.data(); padding_code = *(int32_t*)p_code; if (inputs.size() == 8) { idx_layout = inputs[7]; - const uint8_t* p_layout = scalar_map.at(idx_layout).data.data(); + const uint8_t* p_layout = scalarMap.at(idx_layout).data.data(); layout = *(bool*)p_layout; } } - const uint8_t* p_stride_width = scalar_map.at(idx_stride_width).data.data(); - const uint8_t* p_stride_height = scalar_map.at(idx_stride_height).data.data(); - const uint8_t* p_filter_width = scalar_map.at(idx_filter_width).data.data(); - const uint8_t* p_filter_height = scalar_map.at(idx_filter_height).data.data(); + const uint8_t* p_stride_width = scalarMap.at(idx_stride_width).data.data(); + const uint8_t* p_stride_height = scalarMap.at(idx_stride_height).data.data(); + const uint8_t* p_filter_width = scalarMap.at(idx_filter_width).data.data(); + const uint8_t* p_filter_height = scalarMap.at(idx_filter_height).data.data(); stride[0] = *(int32_t*)p_stride_width; stride[1] = *(int32_t*)p_stride_height; filter[0] = *(int32_t*)p_filter_width; - filter[1] = *(int32_t*)p_filter_width; + filter[1] = *(int32_t*)p_filter_height; - std::get<0>(signature.field_tuple) = op::pool2d::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::pool2d::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::pool2d::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::pool2d::Output(tensorMap.at(idx_out)); std::get<2>(signature.field_tuple) = op::pool2d::Pad(pad); // construct scalar_feild std::get<3>(signature.field_tuple) = op::pool2d::PaddingCode(padding_code); std::get<4>(signature.field_tuple) = op::pool2d::Stride(stride); std::get<5>(signature.field_tuple) = op::pool2d::Filter(filter); - std::get<6>(signature.field_tuple) = op::pool2d::Activation(scalar_map.at(idx_act)); + std::get<6>(signature.field_tuple) = op::pool2d::Activation(scalarMap.at(idx_act)); std::get<7>(signature.field_tuple) = op::pool2d::Layout(layout); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { const uint8_t* p_pad = std::get<2>(signature.field_tuple).storage.data.data(); const uint8_t* p_padding_code = std::get<3>(signature.field_tuple).storage.data.data(); const uint8_t* p_stride = std::get<4>(signature.field_tuple).storage.data.data(); @@ -1980,8 +1987,8 @@ class L2Pool2DCreator : public OpCreator { *((uint32_t*)p_pad + 2), *((uint32_t*)p_pad + 3)}; std::array stride = {*((uint32_t*)p_stride), *((uint32_t*)p_stride + 1)}; std::array filter = {*((uint32_t*)p_filter), *((uint32_t*)p_filter + 1)}; - auto pad_type = AndroidPadTypeToVsiPadType(*(int32_t*)p_padding_code); - auto layout = AndroidLayoutToVsiLayout(*(bool*)p_layout); + auto pad_type = convertToVxPadType(*(int32_t*)p_padding_code); + auto layout = convertToVxLayout(*(bool*)p_layout); if (pad_type == tim::vx::PadType::AUTO) { return graph->CreateOperation( tim::vx::PoolType::L2, pad, filter, stride, tim::vx::RoundType::FLOOR, layout); @@ -1996,19 +2003,18 @@ class L2Pool2DCreator : public OpCreator { op::pool2d::signature signature; }; -class MaxPool2DCreator : public OpCreator { +class MaxPool2DCreator final : public OpCreator { public: MaxPool2DCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_MAX_POOL_2D, inputs, outputs) { if ((inputs.size() != 7 && inputs.size() != 8 && inputs.size() != 10 && inputs.size() != 11) || outputs.size() != 1) { - std::cout << "Error: MaxPool2D gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("MaxPool2DCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_MAX_POOL_2D; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_padding_code, idx_pad_left, idx_pad_right, idx_pad_top, idx_pad_bottom, idx_stride_width, idx_stride_height, idx_filter_width, idx_filter_height, idx_act, @@ -2031,10 +2037,10 @@ class MaxPool2DCreator : public OpCreator { idx_filter_height = inputs[8]; idx_act = inputs[9]; - const uint8_t* p_left = scalar_map.at(idx_pad_left).data.data(); - const uint8_t* p_right = scalar_map.at(idx_pad_right).data.data(); - const uint8_t* p_top = scalar_map.at(idx_pad_top).data.data(); - const uint8_t* p_bottom = scalar_map.at(idx_pad_bottom).data.data(); + const uint8_t* p_left = scalarMap.at(idx_pad_left).data.data(); + const uint8_t* p_right = scalarMap.at(idx_pad_right).data.data(); + const uint8_t* p_top = scalarMap.at(idx_pad_top).data.data(); + const uint8_t* p_bottom = scalarMap.at(idx_pad_bottom).data.data(); pad[0] = *(int32_t*)p_left; pad[1] = *(int32_t*)p_right; pad[2] = *(int32_t*)p_top; @@ -2042,7 +2048,7 @@ class MaxPool2DCreator : public OpCreator { if (inputs.size() == 11) { idx_layout = inputs[10]; - const uint8_t* p_layout = scalar_map.at(idx_layout).data.data(); + const uint8_t* p_layout = scalarMap.at(idx_layout).data.data(); layout = *(bool*)p_layout; } } @@ -2054,36 +2060,36 @@ class MaxPool2DCreator : public OpCreator { idx_filter_height = inputs[5]; idx_act = inputs[6]; - const uint8_t* p_code = scalar_map.at(idx_padding_code).data.data(); + const uint8_t* p_code = scalarMap.at(idx_padding_code).data.data(); padding_code = *(int32_t*)p_code; if (inputs.size() == 8) { idx_layout = inputs[7]; - const uint8_t* p_layout = scalar_map.at(idx_layout).data.data(); + const uint8_t* p_layout = scalarMap.at(idx_layout).data.data(); layout = *(bool*)p_layout; } } - const uint8_t* p_stride_width = scalar_map.at(idx_stride_width).data.data(); - const uint8_t* p_stride_height = scalar_map.at(idx_stride_height).data.data(); - const uint8_t* p_filter_width = scalar_map.at(idx_filter_width).data.data(); - const uint8_t* p_filter_height = scalar_map.at(idx_filter_height).data.data(); + const uint8_t* p_stride_width = scalarMap.at(idx_stride_width).data.data(); + const uint8_t* p_stride_height = scalarMap.at(idx_stride_height).data.data(); + const uint8_t* p_filter_width = scalarMap.at(idx_filter_width).data.data(); + const uint8_t* p_filter_height = scalarMap.at(idx_filter_height).data.data(); stride[0] = *(int32_t*)p_stride_width; stride[1] = *(int32_t*)p_stride_height; filter[0] = *(int32_t*)p_filter_width; - filter[1] = *(int32_t*)p_filter_width; + filter[1] = *(int32_t*)p_filter_height; - std::get<0>(signature.field_tuple) = op::pool2d::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::pool2d::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::pool2d::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::pool2d::Output(tensorMap.at(idx_out)); std::get<2>(signature.field_tuple) = op::pool2d::Pad(pad); // construct scalar_feild std::get<3>(signature.field_tuple) = op::pool2d::PaddingCode(padding_code); std::get<4>(signature.field_tuple) = op::pool2d::Stride(stride); std::get<5>(signature.field_tuple) = op::pool2d::Filter(filter); - std::get<6>(signature.field_tuple) = op::pool2d::Activation(scalar_map.at(idx_act)); + std::get<6>(signature.field_tuple) = op::pool2d::Activation(scalarMap.at(idx_act)); std::get<7>(signature.field_tuple) = op::pool2d::Layout(layout); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { const uint8_t* p_pad = std::get<2>(signature.field_tuple).storage.data.data(); const uint8_t* p_padding_code = std::get<3>(signature.field_tuple).storage.data.data(); const uint8_t* p_stride = std::get<4>(signature.field_tuple).storage.data.data(); @@ -2094,8 +2100,8 @@ class MaxPool2DCreator : public OpCreator { std::array stride = {*((uint32_t*)p_stride), *((uint32_t*)p_stride + 1)}; std::array filter = {*((uint32_t*)p_filter), *((uint32_t*)p_filter + 1)}; - auto pad_type = AndroidPadTypeToVsiPadType(*(int32_t*)p_padding_code); - auto layout = AndroidLayoutToVsiLayout(*(bool*)p_layout); + auto pad_type = convertToVxPadType(*(int32_t*)p_padding_code); + auto layout = convertToVxLayout(*(bool*)p_layout); if (pad_type == tim::vx::PadType::AUTO) { return graph->CreateOperation( tim::vx::PoolType::MAX, pad, filter, stride, tim::vx::RoundType::FLOOR, layout); @@ -2110,28 +2116,27 @@ class MaxPool2DCreator : public OpCreator { op::pool2d::signature signature; }; -class MaximumCreator : public OpCreator { +class MaximumCreator final : public OpCreator { public: MaximumCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_MAXIMUM, inputs, outputs) { if (inputs.size() != 2 || outputs.size() != 1) { - std::cout << "Error: Maximum gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("MaximumCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_MAXIMUM; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_in1 = inputs[1]; uint32_t idx_out = outputs[0]; - std::get<0>(signature.field_tuple) = op::eltwise::Input0(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::eltwise::Input1(tensor_map.at(idx_in1)); - std::get<2>(signature.field_tuple) = op::eltwise::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::eltwise::Input0(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::eltwise::Input1(tensorMap.at(idx_in1)); + std::get<2>(signature.field_tuple) = op::eltwise::Output(tensorMap.at(idx_out)); std::get<3>(signature.field_tuple) = op::eltwise::Activation(0); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -2139,49 +2144,51 @@ class MaximumCreator : public OpCreator { op::eltwise::signature signature; }; -class MeanCreator : public OpCreator { +class MeanCreator final : public OpCreator { public: MeanCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_MEAN, inputs, outputs) { if (inputs.size() != 3 || outputs.size() != 1) { - std::cout << "Error: Mean gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("MeanCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_MEAN; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_axis = inputs[1]; uint32_t idx_keepdims = inputs[2]; uint32_t idx_out = outputs[0]; - auto axis_attr = tensor_map.at(idx_axis).attr; + auto axis_attr = tensorMap.at(idx_axis).attr; if (axis_attr != slang::type::tensor_attr::kCONSTANT) { - std::cout << "Error: Axis tensor as INPUT is not supported in Mean" << std::endl; - support_state_ = false; + LOGI("MeanCreator: Cannot support axis tensor as INPUT"); + supported_ = false; } - auto p_keepdims = (bool*)scalar_map.at(idx_keepdims).data.data(); + auto p_keepdims = (bool*)scalarMap.at(idx_keepdims).data.data(); bool keepdims = *p_keepdims; - std::get<0>(signature.field_tuple) = op::mean::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::mean::Output(tensor_map.at(idx_out)); - std::get<2>(signature.field_tuple) = op::mean::Axis(tensor_map.at(idx_axis)); + std::get<0>(signature.field_tuple) = op::mean::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::mean::Output(tensorMap.at(idx_out)); + std::get<2>(signature.field_tuple) = op::mean::Axis(tensorMap.at(idx_axis)); std::get<3>(signature.field_tuple) = op::mean::KeepDims(keepdims); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { std::vector axis_vx; - const uint32_t rank = std::get<0>(signature.field_tuple).storage.shape.size(); - const void* p_axis = std::get<2>(signature.field_tuple).storage.data; - const uint32_t axis_length = std::get<2>(signature.field_tuple).storage.data_length / 4; + const uint32_t in_rank = std::get<0>(signature.field_tuple).storage.shape.size(); + const uint32_t out_rank = std::get<1>(signature.field_tuple).storage.shape.size(); + const void* p_axis = std::get<2>(signature.field_tuple).storage.data.data(); + const uint32_t axis_length = std::get<2>(signature.field_tuple).storage.data.size() / 4; for (int i = 0; i < axis_length; i++) { int32_t axis_android = *((int32_t*)p_axis + i); - axis_vx.push_back(ConvertAxis(axis_android, rank)); + axis_vx.push_back(convertToVxAxis(axis_android, in_rank)); } const uint8_t* p_keepdims = std::get<3>(signature.field_tuple).storage.data.data(); - const bool keepdims = *(bool*)p_keepdims; - + bool keepdims = *(bool*)p_keepdims; + if (in_rank == out_rank) { + keepdims = true; + } return graph->CreateOperation(axis_vx, keepdims); } @@ -2189,28 +2196,27 @@ class MeanCreator : public OpCreator { op::mean::signature signature; }; -class MinimumCreator : public OpCreator { +class MinimumCreator final : public OpCreator { public: MinimumCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_MINIMUM, inputs, outputs) { if (inputs.size() != 2 || outputs.size() != 1) { - std::cout << "Error: Minimum gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("MinimumCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_MINIMUM; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_in1 = inputs[1]; uint32_t idx_out = outputs[0]; - std::get<0>(signature.field_tuple) = op::eltwise::Input0(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::eltwise::Input1(tensor_map.at(idx_in1)); - std::get<2>(signature.field_tuple) = op::eltwise::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::eltwise::Input0(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::eltwise::Input1(tensorMap.at(idx_in1)); + std::get<2>(signature.field_tuple) = op::eltwise::Output(tensorMap.at(idx_out)); std::get<3>(signature.field_tuple) = op::eltwise::Activation(0); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -2218,39 +2224,43 @@ class MinimumCreator : public OpCreator { op::eltwise::signature signature; }; -class MirrorPadCreator : public OpCreator { +class MirrorPadCreator final : public OpCreator { public: MirrorPadCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_MIRROR_PAD, inputs, outputs) { if ((inputs.size() != 3) || outputs.size() != 1) { - std::cout << "Error: MirrorPad gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("MirrorPadCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_MIRROR_PAD; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_pad = inputs[1]; uint32_t idx_mode = inputs[2]; uint32_t idx_out = outputs[0]; - auto pad_attr = tensor_map.at(idx_pad).attr; + auto rank = tensorMap.at(idx_in).shape.size(); + if (rank > 6) { + LOGI("MirrorPadCreator: Cannot support INPUT rank more than 6 in MirrorPad"); + supported_ = false; + } + auto pad_attr = tensorMap.at(idx_pad).attr; if (pad_attr != slang::type::tensor_attr::kCONSTANT) { - std::cout << "Error: Pad tensor as INPUT isn't supported in MirrorPad" << std::endl; - support_state_ = false; + LOGI("MirrorPadCreator: Cannot support Pad tensor as INPUT in MirrorPad"); + supported_ = false; } - auto p_pad = (int32_t*)tensor_map.at(idx_pad).data; - uint32_t pad_length = tensor_map.at(idx_pad).data_length / 4; + auto p_pad = (int32_t*)tensorMap.at(idx_pad).data.data(); + uint32_t pad_length = tensorMap.at(idx_pad).data.size() / 4; std::vector pad(p_pad, p_pad + pad_length); - std::get<0>(signature.field_tuple) = op::mirror_pad::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::mirror_pad::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::mirror_pad::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::mirror_pad::Output(tensorMap.at(idx_out)); std::get<2>(signature.field_tuple) = op::mirror_pad::Pad(pad); - std::get<3>(signature.field_tuple) = op::mirror_pad::PadMode(scalar_map.at(idx_mode)); + std::get<3>(signature.field_tuple) = op::mirror_pad::PadMode(scalarMap.at(idx_mode)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { uint32_t rank = std::get<0>(signature.field_tuple).storage.shape.size(); auto p_pad = (uint32_t*)std::get<2>(signature.field_tuple).storage.data.data(); auto p_pad_mode = (int32_t*)std::get<3>(signature.field_tuple).storage.data.data(); @@ -2272,7 +2282,7 @@ class MirrorPadCreator : public OpCreator { vsi_pad_mode = tim::vx::ops::Pad::PAD_MODE_SYMMETRIC; break; default: - std::cout << "Error: Invalid pad mode in MirrorPad" << std::endl; + LOGE("MirrorPadCreator:: Invalid pad mode"); break; } return graph->CreateOperation(front_size, back_size, 0, vsi_pad_mode); @@ -2282,29 +2292,32 @@ class MirrorPadCreator : public OpCreator { op::mirror_pad::signature signature; }; -class MulCreator : public OpCreator { +class MulCreator final : public OpCreator { public: MulCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_MUL, inputs, outputs) { if (inputs.size() != 3 || outputs.size() != 1) { - std::cout << "Error: Mul gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("MulCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_MUL; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_in1 = inputs[1]; uint32_t idx_act = inputs[2]; uint32_t idx_out = outputs[0]; - std::get<0>(signature.field_tuple) = op::eltwise::Input0(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::eltwise::Input1(tensor_map.at(idx_in1)); - std::get<2>(signature.field_tuple) = op::eltwise::Output(tensor_map.at(idx_out)); - std::get<3>(signature.field_tuple) = op::eltwise::Activation(scalar_map.at(idx_act)); + if (tensorMap.at(idx_in).dtype == slang::type::data_type::kINT32) { + LOGI("MulCreator: Cannot support int32 INPUT"); + supported_ = false; + } + std::get<0>(signature.field_tuple) = op::eltwise::Input0(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::eltwise::Input1(tensorMap.at(idx_in1)); + std::get<2>(signature.field_tuple) = op::eltwise::Output(tensorMap.at(idx_out)); + std::get<3>(signature.field_tuple) = op::eltwise::Activation(scalarMap.at(idx_act)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -2312,25 +2325,29 @@ class MulCreator : public OpCreator { op::eltwise::signature signature; }; -class NegCreator : public OpCreator { +class NegCreator final : public OpCreator { public: NegCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_NEG, inputs, outputs) { if (inputs.size() != 1 || outputs.size() != 1) { - std::cout << "Error: Neg gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("NegCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_NEG; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_out = outputs[0]; - std::get<0>(signature.field_tuple) = op::simple_op::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::simple_op::Output(tensor_map.at(idx_out)); + auto attr = tensorMap.at(idx_in).dtype; + if (attr == slang::type::data_type::kFP16) { + LOGI("NegCreator: Cannot support f16 input"); + supported_ = false; + } + std::get<0>(signature.field_tuple) = op::simple_op::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::simple_op::Output(tensorMap.at(idx_out)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -2338,27 +2355,26 @@ class NegCreator : public OpCreator { op::simple_op::signature signature; }; -class NotEqualCreator : public OpCreator { +class NotEqualCreator final : public OpCreator { public: NotEqualCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_NOT_EQUAL, inputs, outputs) { if (inputs.size() != 2 || outputs.size() != 1) { - std::cout << "Error: NotEqual gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("NotEqualCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_NOT_EQUAL; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_in1 = inputs[1]; uint32_t idx_out = outputs[0]; - std::get<0>(signature.field_tuple) = op::relational_op::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::relational_op::Input1(tensor_map.at(idx_in1)); - std::get<2>(signature.field_tuple) = op::relational_op::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::relational_op::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::relational_op::Input1(tensorMap.at(idx_in1)); + std::get<2>(signature.field_tuple) = op::relational_op::Output(tensorMap.at(idx_out)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -2366,32 +2382,31 @@ class NotEqualCreator : public OpCreator { op::relational_op::signature signature; }; -class PackCreator : public OpCreator { +class PackCreator final : public OpCreator { public: PackCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_PACK, inputs, outputs) { if ((inputs.size() < 2) || outputs.size() != 1) { - std::cout << "Error: Pack gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("PackCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_PACK; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_axis = inputs[0]; uint32_t idx_in = inputs[1]; uint32_t idx_out = outputs[0]; int32_t input_cnt = inputs.size() - 1; - auto p_axis = scalar_map.at(idx_axis).data.data(); + auto p_axis = scalarMap.at(idx_axis).data.data(); int32_t axis_android = *(int32_t*)p_axis; - int32_t axis_vx = ConvertAxis(axis_android, tensor_map.at(idx_out).shape.size()); - std::get<0>(signature.field_tuple) = op::pack::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::pack::Output(tensor_map.at(idx_out)); + int32_t axis_vx = convertToVxAxis(axis_android, tensorMap.at(idx_out).shape.size()); + std::get<0>(signature.field_tuple) = op::pack::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::pack::Output(tensorMap.at(idx_out)); std::get<2>(signature.field_tuple) = op::pack::Axis(axis_vx); std::get<3>(signature.field_tuple) = op::pack::Input_cnt(input_cnt); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { auto p_axis = std::get<2>(signature.field_tuple).storage.data.data(); auto p_input_cnt = std::get<3>(signature.field_tuple).storage.data.data(); int32_t axis = *(int32_t*)p_axis; @@ -2403,37 +2418,36 @@ class PackCreator : public OpCreator { op::pack::signature signature; }; -class PadCreator : public OpCreator { +class PadCreator final : public OpCreator { public: PadCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_PAD, inputs, outputs) { if ((inputs.size() != 2) || outputs.size() != 1) { - std::cout << "Error: Pad gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("PadCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_PAD; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_in_pad = inputs[1]; uint32_t idx_out = outputs[0]; - auto pad_attr = tensor_map.at(idx_in_pad).attr; + auto pad_attr = tensorMap.at(idx_in_pad).attr; if (pad_attr != slang::type::tensor_attr::kCONSTANT) { - std::cout << "Error: Pad tensor as INPUT is not supported in Pad" << std::endl; - support_state_ = false; + LOGI("PadCreator: Pad tensor as INPUT in Pad"); + supported_ = false; } - auto p_pad = (int32_t*)tensor_map.at(idx_in_pad).data; - uint32_t pad_length = tensor_map.at(idx_in_pad).data_length / 4; + auto p_pad = (int32_t*)tensorMap.at(idx_in_pad).data.data(); + uint32_t pad_length = tensorMap.at(idx_in_pad).data.size() / 4; std::vector pad(p_pad, p_pad + pad_length); - std::get<0>(signature.field_tuple) = op::pad::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::pad::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::pad::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::pad::Output(tensorMap.at(idx_out)); std::get<2>(signature.field_tuple) = op::pad::Pad(pad); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { uint32_t rank = std::get<0>(signature.field_tuple).storage.shape.size(); auto p_pad = (uint32_t*)std::get<2>(signature.field_tuple).storage.data.data(); std::vector front_size, back_size; @@ -2452,39 +2466,47 @@ class PadCreator : public OpCreator { op::pad::signature signature; }; -class PadV2Creator : public OpCreator { +class PadV2Creator final : public OpCreator { public: PadV2Creator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_PAD_V2, inputs, outputs) { if ((inputs.size() != 3) || outputs.size() != 1) { - std::cout << "Error: PadV2 gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("PadV2Creator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_PAD_V2; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_in_pad = inputs[1]; uint32_t idx_const_val = inputs[2]; uint32_t idx_out = outputs[0]; - auto pad_attr = tensor_map.at(idx_in_pad).attr; + auto pad_attr = tensorMap.at(idx_in_pad).attr; if (pad_attr != slang::type::tensor_attr::kCONSTANT) { - std::cout << "Error: Pad tensor as INPUT is not supported in Pad" << std::endl; - support_state_ = false; - } - auto p_pad = (int32_t*)tensor_map.at(idx_in_pad).data; - uint32_t pad_length = tensor_map.at(idx_in_pad).data_length / 4; + LOGI("PadV2Creator: Pad tensor as INPUT in Pad"); + supported_ = false; + } + auto in_dtype = tensorMap.at(idx_in).dtype; + auto const_dtype = scalarMap.at(idx_const_val).dtype; + if ((in_dtype == slang::type::data_type::kINT8 || + in_dtype == slang::type::data_type::kUINT8) && + const_dtype == slang::type::data_type::kINT32) { + // In the golden of vts case, the int32 const value is not quantized + LOGI("PadV2Creator: Cannot support INT8/UINT8 input with INT32 const value in PadV2"); + supported_ = false; + } + auto p_pad = (int32_t*)tensorMap.at(idx_in_pad).data.data(); + uint32_t pad_length = tensorMap.at(idx_in_pad).data.size() / 4; std::vector pad(p_pad, p_pad + pad_length); - std::get<0>(signature.field_tuple) = op::pad_v2::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::pad_v2::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::pad_v2::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::pad_v2::Output(tensorMap.at(idx_out)); std::get<2>(signature.field_tuple) = op::pad_v2::Pad(pad); - std::get<3>(signature.field_tuple) = op::pad_v2::Const_val(scalar_map.at(idx_const_val)); + std::get<3>(signature.field_tuple) = op::pad_v2::Const_val(scalarMap.at(idx_const_val)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { uint32_t rank = std::get<0>(signature.field_tuple).storage.shape.size(); uint32_t* p_pad = (uint32_t*)std::get<2>(signature.field_tuple).storage.data.data(); uint8_t* p_const_val = std::get<3>(signature.field_tuple).storage.data.data(); @@ -2515,28 +2537,27 @@ class PadV2Creator : public OpCreator { op::pad_v2::signature signature; }; -class PowCreator : public OpCreator { +class PowCreator final : public OpCreator { public: PowCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_POW, inputs, outputs) { if (inputs.size() != 2 || outputs.size() != 1) { - std::cout << "Error: Pow gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("PowCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_POW; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_in1 = inputs[1]; uint32_t idx_out = outputs[0]; - std::get<0>(signature.field_tuple) = op::eltwise::Input0(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::eltwise::Input1(tensor_map.at(idx_in1)); - std::get<2>(signature.field_tuple) = op::eltwise::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::eltwise::Input0(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::eltwise::Input1(tensorMap.at(idx_in1)); + std::get<2>(signature.field_tuple) = op::eltwise::Output(tensorMap.at(idx_out)); std::get<3>(signature.field_tuple) = op::eltwise::Activation(0); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -2544,33 +2565,38 @@ class PowCreator : public OpCreator { op::eltwise::signature signature; }; -class PreluCreator : public OpCreator { +class PreluCreator final : public OpCreator { public: PreluCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_PRELU, inputs, outputs) { if (inputs.size() != 2 || outputs.size() != 1) { - std::cout << "Error: Prelu gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("PreluCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_PRELU; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_alpha = inputs[1]; uint32_t idx_out = outputs[0]; - auto alpha_attr = tensor_map.at(idx_alpha).attr; + auto in_shape = tensorMap.at(idx_in).shape; + auto alpha_shape = tensorMap.at(idx_alpha).shape; + if (in_shape.size() < alpha_shape.size()) { + LOGI("PreluCreator: Cannot support alpha tensor longer than INPUT"); + supported_ = false; + } + auto alpha_attr = tensorMap.at(idx_alpha).attr; if (alpha_attr != slang::type::tensor_attr::kCONSTANT) { - std::cout << "Error: Alpha tensor as INPUT is not supported in Prelu" << std::endl; - support_state_ = false; + LOGI("PreluCreator: Cannot support alpha tensor as INPUT"); + supported_ = false; } - std::get<0>(signature.field_tuple) = op::prelu::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::prelu::Alpha(tensor_map.at(idx_alpha)); - std::get<2>(signature.field_tuple) = op::prelu::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::prelu::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::prelu::Alpha(tensorMap.at(idx_alpha)); + std::get<2>(signature.field_tuple) = op::prelu::Output(tensorMap.at(idx_out)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(0); } @@ -2578,30 +2604,29 @@ class PreluCreator : public OpCreator { op::prelu::signature signature; }; -class QuantizeCreator : public OpCreator { +class QuantizeCreator final : public OpCreator { public: QuantizeCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_QUANTIZE, inputs, outputs) { if (inputs.size() != 1 || outputs.size() != 1) { - std::cout << "Error: Quantize gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("QuantizeCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_QUANTIZE; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_out = outputs[0]; - auto q_type = tensor_map.at(idx_out).qtype; + auto q_type = tensorMap.at(idx_out).qtype; if (q_type == slang::type::quant_type::kSYMM_PCQ) { - std::cout << "Error: Quantize not support perchannel channel quantize" << std::endl; - support_state_ = false; + LOGI("QuantizeCreator: Cannot support Perchannel quantize"); + supported_ = false; } - std::get<0>(signature.field_tuple) = op::quantize::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::quantize::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::quantize::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::quantize::Output(tensorMap.at(idx_out)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -2609,47 +2634,64 @@ class QuantizeCreator : public OpCreator { op::quantize::signature signature; }; -class ReduceAllCreator : public OpCreator { +class ReduceAllCreator final : public OpCreator { public: ReduceAllCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_REDUCE_ALL, inputs, outputs) { if (inputs.size() != 3 || outputs.size() != 1) { - std::cout << "Error: ReduceAll gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("ReduceAllCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_REDUCE_ALL; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_axis = inputs[1]; uint32_t idx_keepdims = inputs[2]; uint32_t idx_out = outputs[0]; - auto axis_attr = tensor_map.at(idx_axis).attr; + auto axis_attr = tensorMap.at(idx_axis).attr; if (axis_attr != slang::type::tensor_attr::kCONSTANT) { - std::cout << "Error: Axis tensor as INPUT is not supported in ReduceAll" << std::endl; - support_state_ = false; - } - std::get<0>(signature.field_tuple) = op::reduce_all_any::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::reduce_all_any::Output(tensor_map.at(idx_out)); - std::get<2>(signature.field_tuple) = op::reduce_all_any::Axis(tensor_map.at(idx_axis)); + LOGI("ReduceAllCreator: Cannot support axis tensor as INPUT"); + supported_ = false; + } + auto rank = tensorMap.at(idx_in).shape.size(); + const uint8_t* data = tensorMap.at(idx_axis).data.data(); + auto length = tensorMap.at(idx_axis).data.size() / 4; + std::set unique_axis; + for (uint32_t i = 0; i < length; ++i) { + int32_t axis = *((int32_t*)data + i); + if (axis < 0) axis += rank; + unique_axis.insert(axis); + } + if (unique_axis.size() == rank) { + LOGI("ReduceAllCreator: Cannot support all dimensions need reduce"); + supported_ = false; + } + std::get<0>(signature.field_tuple) = op::reduce_all_any::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::reduce_all_any::Output(tensorMap.at(idx_out)); + std::get<2>(signature.field_tuple) = op::reduce_all_any::Axis(tensorMap.at(idx_axis)); std::get<3>(signature.field_tuple) = - op::reduce_all_any::KeepDims(scalar_map.at(idx_keepdims)); + op::reduce_all_any::KeepDims(scalarMap.at(idx_keepdims)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { - std::vector axis_vx; - const uint32_t rank = std::get<0>(signature.field_tuple).storage.shape.size(); - const void* p_axis = std::get<2>(signature.field_tuple).storage.data; - const uint32_t data_length = std::get<2>(signature.field_tuple).storage.data_length / 4; + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { + std::set unique_axis; + const uint32_t in_rank = std::get<0>(signature.field_tuple).storage.shape.size(); + const uint32_t out_rank = std::get<1>(signature.field_tuple).storage.shape.size(); + const void* p_axis = std::get<2>(signature.field_tuple).storage.data.data(); + const uint32_t data_length = std::get<2>(signature.field_tuple).storage.data.size() / 4; for (int i = 0; i < data_length; i++) { int32_t axis_android = *((int32_t*)p_axis + i); - axis_vx.push_back(ConvertAxis(axis_android, rank)); + unique_axis.insert(convertToVxAxis(axis_android, in_rank)); } + std::vector axis_vx(unique_axis.begin(), unique_axis.end()); const uint8_t* p_keepdims = std::get<3>(signature.field_tuple).storage.data.data(); - const bool keepdims = *(bool*)p_keepdims; + bool keepdims = *(bool*)p_keepdims; + if (in_rank == out_rank) { + keepdims = true; + } return graph->CreateOperation(axis_vx, keepdims); } @@ -2657,47 +2699,64 @@ class ReduceAllCreator : public OpCreator { op::reduce_all_any::signature signature; }; -class ReduceAnyCreator : public OpCreator { +class ReduceAnyCreator final : public OpCreator { public: ReduceAnyCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_REDUCE_ANY, inputs, outputs) { if (inputs.size() != 3 || outputs.size() != 1) { - std::cout << "Error: ReduceAny gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("ReduceAnyCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_REDUCE_ANY; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_axis = inputs[1]; uint32_t idx_keepdims = inputs[2]; uint32_t idx_out = outputs[0]; - auto axis_attr = tensor_map.at(idx_axis).attr; + auto axis_attr = tensorMap.at(idx_axis).attr; if (axis_attr != slang::type::tensor_attr::kCONSTANT) { - std::cout << "Error: Axis tensor as INPUT is not supported in ReduceAny" << std::endl; - support_state_ = false; - } - std::get<0>(signature.field_tuple) = op::reduce_all_any::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::reduce_all_any::Output(tensor_map.at(idx_out)); - std::get<2>(signature.field_tuple) = op::reduce_all_any::Axis(tensor_map.at(idx_axis)); + LOGI("ReduceAnyCreator: Cannot support axis tensor as INPUT"); + supported_ = false; + } + auto rank = tensorMap.at(idx_in).shape.size(); + const uint8_t* data = tensorMap.at(idx_axis).data.data(); + auto length = tensorMap.at(idx_axis).data.size() / 4; + std::set unique_axis; + for (uint32_t i = 0; i < length; ++i) { + int32_t axis = *((int32_t*)data + i); + if (axis < 0) axis += rank; + unique_axis.insert(axis); + } + if (unique_axis.size() == rank) { + LOGI("ReduceAllCreator: Cannot support all dimensions need reduce"); + supported_ = false; + } + std::get<0>(signature.field_tuple) = op::reduce_all_any::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::reduce_all_any::Output(tensorMap.at(idx_out)); + std::get<2>(signature.field_tuple) = op::reduce_all_any::Axis(tensorMap.at(idx_axis)); std::get<3>(signature.field_tuple) = - op::reduce_all_any::KeepDims(scalar_map.at(idx_keepdims)); + op::reduce_all_any::KeepDims(scalarMap.at(idx_keepdims)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { - std::vector axis_vx; - const uint32_t rank = std::get<0>(signature.field_tuple).storage.shape.size(); - const void* p_axis = std::get<2>(signature.field_tuple).storage.data; - const uint32_t data_length = std::get<2>(signature.field_tuple).storage.data_length / 4; + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { + std::set unique_axis; + const uint32_t in_rank = std::get<0>(signature.field_tuple).storage.shape.size(); + const uint32_t out_rank = std::get<1>(signature.field_tuple).storage.shape.size(); + const void* p_axis = std::get<2>(signature.field_tuple).storage.data.data(); + const uint32_t data_length = std::get<2>(signature.field_tuple).storage.data.size() / 4; for (int i = 0; i < data_length; i++) { int32_t axis_android = *((int32_t*)p_axis + i); - axis_vx.push_back(ConvertAxis(axis_android, rank)); + unique_axis.insert(convertToVxAxis(axis_android, in_rank)); } + std::vector axis_vx(unique_axis.begin(), unique_axis.end()); const uint8_t* p_keepdims = std::get<3>(signature.field_tuple).storage.data.data(); - const bool keepdims = *(bool*)p_keepdims; + bool keepdims = *(bool*)p_keepdims; + if (in_rank == out_rank) { + keepdims = true; + } return graph->CreateOperation(axis_vx, keepdims); } @@ -2705,51 +2764,67 @@ class ReduceAnyCreator : public OpCreator { op::reduce_all_any::signature signature; }; -class ReduceMaxCreator : public OpCreator { +class ReduceMaxCreator final : public OpCreator { public: ReduceMaxCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_REDUCE_MAX, inputs, outputs) { if (inputs.size() != 3 || outputs.size() != 1) { - std::cout << "Error: ReduceMax gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("ReduceMaxCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_REDUCE_MAX; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_axis = inputs[1]; uint32_t idx_keepdims = inputs[2]; uint32_t idx_out = outputs[0]; - auto axis_attr = tensor_map.at(idx_axis).attr; + auto axis_attr = tensorMap.at(idx_axis).attr; if (axis_attr != slang::type::tensor_attr::kCONSTANT) { - std::cout << "Error: Axis tensor as INPUT is not supported in ReduceMax" << std::endl; - support_state_ = false; + LOGI("ReduceMaxCreator: Cannot support axis tensor as INPUT"); + supported_ = false; + } + auto rank = tensorMap.at(idx_in).shape.size(); + const uint8_t* data = tensorMap.at(idx_axis).data.data(); + auto length = tensorMap.at(idx_axis).data.size() / 4; + std::set unique_axis; + for (uint32_t i = 0; i < length; ++i) { + int32_t axis = *((int32_t*)data + i); + if (axis < 0) axis += rank; + unique_axis.insert(axis); + } + if (unique_axis.size() == rank) { + LOGI("ReduceAllCreator: Cannot support all dimensions need reduce"); + supported_ = false; } std::get<0>(signature.field_tuple) = - op::reduce_max_min_prod_sum::Input(tensor_map.at(idx_in)); + op::reduce_max_min_prod_sum::Input(tensorMap.at(idx_in)); std::get<1>(signature.field_tuple) = - op::reduce_max_min_prod_sum::Output(tensor_map.at(idx_out)); + op::reduce_max_min_prod_sum::Output(tensorMap.at(idx_out)); std::get<2>(signature.field_tuple) = - op::reduce_max_min_prod_sum::Axis(tensor_map.at(idx_axis)); + op::reduce_max_min_prod_sum::Axis(tensorMap.at(idx_axis)); std::get<3>(signature.field_tuple) = - op::reduce_max_min_prod_sum::KeepDims(scalar_map.at(idx_keepdims)); + op::reduce_max_min_prod_sum::KeepDims(scalarMap.at(idx_keepdims)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { std::vector axis_vx; - const uint32_t rank = std::get<0>(signature.field_tuple).storage.shape.size(); - const void* p_axis = std::get<2>(signature.field_tuple).storage.data; - const uint32_t data_length = std::get<2>(signature.field_tuple).storage.data_length / 4; + const uint32_t in_rank = std::get<0>(signature.field_tuple).storage.shape.size(); + const uint32_t out_rank = std::get<1>(signature.field_tuple).storage.shape.size(); + const void* p_axis = std::get<2>(signature.field_tuple).storage.data.data(); + const uint32_t data_length = std::get<2>(signature.field_tuple).storage.data.size() / 4; for (int i = 0; i < data_length; i++) { int32_t axis_android = *((int32_t*)p_axis + i); - axis_vx.push_back(ConvertAxis(axis_android, rank)); + axis_vx.push_back(convertToVxAxis(axis_android, in_rank)); } const uint8_t* p_keepdims = std::get<3>(signature.field_tuple).storage.data.data(); - const bool keepdims = *(bool*)p_keepdims; + bool keepdims = *(bool*)p_keepdims; + if (in_rank == out_rank) { + keepdims = true; + } return graph->CreateOperation(axis_vx, keepdims); } @@ -2757,51 +2832,67 @@ class ReduceMaxCreator : public OpCreator { op::reduce_max_min_prod_sum::signature signature; }; -class ReduceMinCreator : public OpCreator { +class ReduceMinCreator final : public OpCreator { public: ReduceMinCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_REDUCE_MIN, inputs, outputs) { if (inputs.size() != 3 || outputs.size() != 1) { - std::cout << "Error: ReduceMin gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("ReduceMinCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_REDUCE_MIN; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_axis = inputs[1]; uint32_t idx_keepdims = inputs[2]; uint32_t idx_out = outputs[0]; - auto axis_attr = tensor_map.at(idx_axis).attr; + auto axis_attr = tensorMap.at(idx_axis).attr; if (axis_attr != slang::type::tensor_attr::kCONSTANT) { - std::cout << "Error: Axis tensor as INPUT is not supported in ReduceMin" << std::endl; - support_state_ = false; + LOGI("ReduceMinCreator: Cannot support axis tensor as INPUT"); + supported_ = false; + } + auto rank = tensorMap.at(idx_in).shape.size(); + const uint8_t* data = tensorMap.at(idx_axis).data.data(); + auto length = tensorMap.at(idx_axis).data.size() / 4; + std::set unique_axis; + for (uint32_t i = 0; i < length; ++i) { + int32_t axis = *((int32_t*)data + i); + if (axis < 0) axis += rank; + unique_axis.insert(axis); + } + if (unique_axis.size() == rank) { + LOGI("ReduceAllCreator: Cannot support all dimensions need reduce"); + supported_ = false; } std::get<0>(signature.field_tuple) = - op::reduce_max_min_prod_sum::Input(tensor_map.at(idx_in)); + op::reduce_max_min_prod_sum::Input(tensorMap.at(idx_in)); std::get<1>(signature.field_tuple) = - op::reduce_max_min_prod_sum::Output(tensor_map.at(idx_out)); + op::reduce_max_min_prod_sum::Output(tensorMap.at(idx_out)); std::get<2>(signature.field_tuple) = - op::reduce_max_min_prod_sum::Axis(tensor_map.at(idx_axis)); + op::reduce_max_min_prod_sum::Axis(tensorMap.at(idx_axis)); std::get<3>(signature.field_tuple) = - op::reduce_max_min_prod_sum::KeepDims(scalar_map.at(idx_keepdims)); + op::reduce_max_min_prod_sum::KeepDims(scalarMap.at(idx_keepdims)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { std::vector axis_vx; - const uint32_t rank = std::get<0>(signature.field_tuple).storage.shape.size(); - const void* p_axis = std::get<2>(signature.field_tuple).storage.data; - const uint32_t data_length = std::get<2>(signature.field_tuple).storage.data_length / 4; + const uint32_t in_rank = std::get<0>(signature.field_tuple).storage.shape.size(); + const uint32_t out_rank = std::get<1>(signature.field_tuple).storage.shape.size(); + const void* p_axis = std::get<2>(signature.field_tuple).storage.data.data(); + const uint32_t data_length = std::get<2>(signature.field_tuple).storage.data.size() / 4; for (int i = 0; i < data_length; i++) { int32_t axis_android = *((int32_t*)p_axis + i); - axis_vx.push_back(ConvertAxis(axis_android, rank)); + axis_vx.push_back(convertToVxAxis(axis_android, in_rank)); } const uint8_t* p_keepdims = std::get<3>(signature.field_tuple).storage.data.data(); - const bool keepdims = *(bool*)p_keepdims; + bool keepdims = *(bool*)p_keepdims; + if (in_rank == out_rank) { + keepdims = true; + } return graph->CreateOperation(axis_vx, keepdims); } @@ -2809,51 +2900,67 @@ class ReduceMinCreator : public OpCreator { op::reduce_max_min_prod_sum::signature signature; }; -class ReduceProdCreator : public OpCreator { +class ReduceProdCreator final : public OpCreator { public: ReduceProdCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_REDUCE_PROD, inputs, outputs) { if (inputs.size() != 3 || outputs.size() != 1) { - std::cout << "Error: ReduceProd gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("ReduceProdCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_REDUCE_PROD; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_axis = inputs[1]; uint32_t idx_keepdims = inputs[2]; uint32_t idx_out = outputs[0]; - auto axis_attr = tensor_map.at(idx_axis).attr; + auto axis_attr = tensorMap.at(idx_axis).attr; if (axis_attr != slang::type::tensor_attr::kCONSTANT) { - std::cout << "Error: Axis tensor as INPUT is not supported in ReduceProd" << std::endl; - support_state_ = false; + LOGI("ReduceProdCreator: Cannot support axis tensor as INPUT"); + supported_ = false; + } + auto rank = tensorMap.at(idx_in).shape.size(); + const uint8_t* data = tensorMap.at(idx_axis).data.data(); + auto length = tensorMap.at(idx_axis).data.size() / 4; + std::set unique_axis; + for (uint32_t i = 0; i < length; ++i) { + int32_t axis = *((int32_t*)data + i); + if (axis < 0) axis += rank; + unique_axis.insert(axis); + } + if (unique_axis.size() == rank) { + LOGI("ReduceAllCreator: Cannot support all dimensions need reduce"); + supported_ = false; } std::get<0>(signature.field_tuple) = - op::reduce_max_min_prod_sum::Input(tensor_map.at(idx_in)); + op::reduce_max_min_prod_sum::Input(tensorMap.at(idx_in)); std::get<1>(signature.field_tuple) = - op::reduce_max_min_prod_sum::Output(tensor_map.at(idx_out)); + op::reduce_max_min_prod_sum::Output(tensorMap.at(idx_out)); std::get<2>(signature.field_tuple) = - op::reduce_max_min_prod_sum::Axis(tensor_map.at(idx_axis)); + op::reduce_max_min_prod_sum::Axis(tensorMap.at(idx_axis)); std::get<3>(signature.field_tuple) = - op::reduce_max_min_prod_sum::KeepDims(scalar_map.at(idx_keepdims)); + op::reduce_max_min_prod_sum::KeepDims(scalarMap.at(idx_keepdims)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { std::vector axis_vx; - const uint32_t rank = std::get<0>(signature.field_tuple).storage.shape.size(); - const void* p_axis = std::get<2>(signature.field_tuple).storage.data; - const uint32_t data_length = std::get<2>(signature.field_tuple).storage.data_length / 4; + const uint32_t in_rank = std::get<0>(signature.field_tuple).storage.shape.size(); + const uint32_t out_rank = std::get<1>(signature.field_tuple).storage.shape.size(); + const void* p_axis = std::get<2>(signature.field_tuple).storage.data.data(); + const uint32_t data_length = std::get<2>(signature.field_tuple).storage.data.size() / 4; for (int i = 0; i < data_length; i++) { int32_t axis_android = *((int32_t*)p_axis + i); - axis_vx.push_back(ConvertAxis(axis_android, rank)); + axis_vx.push_back(convertToVxAxis(axis_android, in_rank)); } const uint8_t* p_keepdims = std::get<3>(signature.field_tuple).storage.data.data(); - const bool keepdims = *(bool*)p_keepdims; + bool keepdims = *(bool*)p_keepdims; + if (in_rank == out_rank) { + keepdims = true; + } return graph->CreateOperation(axis_vx, keepdims); } @@ -2861,51 +2968,67 @@ class ReduceProdCreator : public OpCreator { op::reduce_max_min_prod_sum::signature signature; }; -class ReduceSumCreator : public OpCreator { +class ReduceSumCreator final : public OpCreator { public: ReduceSumCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_REDUCE_SUM, inputs, outputs) { if (inputs.size() != 3 || outputs.size() != 1) { - std::cout << "Error: ReduceSum gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("ReduceSumCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_REDUCE_SUM; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_axis = inputs[1]; uint32_t idx_keepdims = inputs[2]; uint32_t idx_out = outputs[0]; - auto axis_attr = tensor_map.at(idx_axis).attr; + auto axis_attr = tensorMap.at(idx_axis).attr; if (axis_attr != slang::type::tensor_attr::kCONSTANT) { - std::cout << "Error: Axis tensor as INPUT is not supported in ReduceSum" << std::endl; - support_state_ = false; + LOGI("ReduceSumCreator: Cannot support axis tensor as INPUT"); + supported_ = false; + } + auto rank = tensorMap.at(idx_in).shape.size(); + const uint8_t* data = tensorMap.at(idx_axis).data.data(); + auto length = tensorMap.at(idx_axis).data.size() / 4; + std::set unique_axis; + for (uint32_t i = 0; i < length; ++i) { + int32_t axis = *((int32_t*)data + i); + if (axis < 0) axis += rank; + unique_axis.insert(axis); + } + if (unique_axis.size() == rank) { + LOGI("ReduceAllCreator: Cannot support all dimensions need reduce"); + supported_ = false; } std::get<0>(signature.field_tuple) = - op::reduce_max_min_prod_sum::Input(tensor_map.at(idx_in)); + op::reduce_max_min_prod_sum::Input(tensorMap.at(idx_in)); std::get<1>(signature.field_tuple) = - op::reduce_max_min_prod_sum::Output(tensor_map.at(idx_out)); + op::reduce_max_min_prod_sum::Output(tensorMap.at(idx_out)); std::get<2>(signature.field_tuple) = - op::reduce_max_min_prod_sum::Axis(tensor_map.at(idx_axis)); + op::reduce_max_min_prod_sum::Axis(tensorMap.at(idx_axis)); std::get<3>(signature.field_tuple) = - op::reduce_max_min_prod_sum::KeepDims(scalar_map.at(idx_keepdims)); + op::reduce_max_min_prod_sum::KeepDims(scalarMap.at(idx_keepdims)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { std::vector axis_vx; - const uint32_t rank = std::get<0>(signature.field_tuple).storage.shape.size(); - const void* p_axis = std::get<2>(signature.field_tuple).storage.data; - const uint32_t data_length = std::get<2>(signature.field_tuple).storage.data_length / 4; + const uint32_t in_rank = std::get<0>(signature.field_tuple).storage.shape.size(); + const uint32_t out_rank = std::get<1>(signature.field_tuple).storage.shape.size(); + const void* p_axis = std::get<2>(signature.field_tuple).storage.data.data(); + const uint32_t data_length = std::get<2>(signature.field_tuple).storage.data.size() / 4; for (int i = 0; i < data_length; i++) { int32_t axis_android = *((int32_t*)p_axis + i); - axis_vx.push_back(ConvertAxis(axis_android, rank)); + axis_vx.push_back(convertToVxAxis(axis_android, in_rank)); } const uint8_t* p_keepdims = std::get<3>(signature.field_tuple).storage.data.data(); - const bool keepdims = *(bool*)p_keepdims; + bool keepdims = *(bool*)p_keepdims; + if (in_rank == out_rank) { + keepdims = true; + } return graph->CreateOperation(axis_vx, keepdims); } @@ -2913,25 +3036,24 @@ class ReduceSumCreator : public OpCreator { op::reduce_max_min_prod_sum::signature signature; }; -class ReluCreator : public OpCreator { +class ReluCreator final : public OpCreator { public: ReluCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_RELU, inputs, outputs) { if (inputs.size() != 1 || outputs.size() != 1) { - std::cout << "Error: Relu gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("ReluCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_RELU; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_out = outputs[0]; - std::get<0>(signature.field_tuple) = op::activation::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::activation::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::activation::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::activation::Output(tensorMap.at(idx_out)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -2939,25 +3061,24 @@ class ReluCreator : public OpCreator { op::activation::signature signature; }; -class Relu1Creator : public OpCreator { +class Relu1Creator final : public OpCreator { public: Relu1Creator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_RELU1, inputs, outputs) { if (inputs.size() != 1 || outputs.size() != 1) { - std::cout << "Error: Relu1 gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("Relu1Creator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_RELU1; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_out = outputs[0]; - std::get<0>(signature.field_tuple) = op::activation::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::activation::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::activation::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::activation::Output(tensorMap.at(idx_out)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -2965,25 +3086,24 @@ class Relu1Creator : public OpCreator { op::activation::signature signature; }; -class Relu6Creator : public OpCreator { +class Relu6Creator final : public OpCreator { public: Relu6Creator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_RELU6, inputs, outputs) { if (inputs.size() != 1 || outputs.size() != 1) { - std::cout << "Error: Relu6 gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("Relu6Creator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_RELU6; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_out = outputs[0]; - std::get<0>(signature.field_tuple) = op::activation::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::activation::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::activation::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::activation::Output(tensorMap.at(idx_out)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -2991,33 +3111,32 @@ class Relu6Creator : public OpCreator { op::activation::signature signature; }; -class ReshapeCreator : public OpCreator { +class ReshapeCreator final : public OpCreator { public: ReshapeCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_RESHAPE, inputs, outputs) { if (inputs.size() != 2 || outputs.size() != 1) { - std::cout << "Error: Reshape gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("ReshapeCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_RESHAPE; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_shape = inputs[1]; uint32_t idx_out = outputs[0]; - auto shape_attr = tensor_map.at(idx_shape).attr; + auto shape_attr = tensorMap.at(idx_shape).attr; if (shape_attr != slang::type::tensor_attr::kCONSTANT) { - std::cout << "Error: Shape tensor as INPUT is not supported in Reshape" << std::endl; - support_state_ = false; + LOGI("ReshapeCreator: Cannot support shape tensor as INPUT"); + supported_ = false; } - std::get<0>(signature.field_tuple) = op::reshape::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::reshape::Shape(tensor_map.at(idx_shape)); - std::get<2>(signature.field_tuple) = op::reshape::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::reshape::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::reshape::Shape(tensorMap.at(idx_shape)); + std::get<2>(signature.field_tuple) = op::reshape::Output(tensorMap.at(idx_out)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { auto shape_tensor = std::get<1>(signature.field_tuple); const void* data = shape_tensor.data(); uint32_t length = shape_tensor.data_length() / 4; // The type of shape tensor is int32 @@ -3052,17 +3171,16 @@ class ReshapeCreator : public OpCreator { op::reshape::signature signature; }; -class ResizeBilinearCreator : public OpCreator { +class ResizeBilinearCreator final : public OpCreator { public: ResizeBilinearCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_RESIZE_BILINEAR, inputs, outputs) { if (inputs.size() < 3 || inputs.size() > 6 || outputs.size() != 1) { - std::cout << "Error: ResizeBilinear gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("ResizeBilinearCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_RESIZE_BILINEAR; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_output_width = inputs[1]; uint32_t idx_output_height = inputs[2]; @@ -3073,17 +3191,17 @@ class ResizeBilinearCreator : public OpCreator { bool half_pixel_centers = false; int32_t output_width = 0, output_height = 0; float factor_width = 0, factor_height = 0; - if (scalar_map.at(inputs[1]).dtype == slang::type::data_type::kINT32) { + if (scalarMap.at(inputs[1]).dtype == slang::type::data_type::kINT32) { std::get<4>(signature.field_tuple) = op::resize::Factor(0.0f); - auto p_output_width = scalar_map.at(inputs[1]).data.data(); - auto p_output_height = scalar_map.at(inputs[2]).data.data(); + auto p_output_width = scalarMap.at(inputs[1]).data.data(); + auto p_output_height = scalarMap.at(inputs[2]).data.data(); output_width = *(int32_t*)p_output_width; output_height = *(int32_t*)p_output_height; } else { - std::get<4>(signature.field_tuple) = op::resize::Factor(scalar_map.at(inputs[1])); - auto p_factor_width = scalar_map.at(inputs[1]).data.data(); - auto p_factor_height = scalar_map.at(inputs[2]).data.data(); - if (scalar_map.at(inputs[1]).dtype == slang::type::data_type::kFP16) { + std::get<4>(signature.field_tuple) = op::resize::Factor(scalarMap.at(inputs[1])); + auto p_factor_width = scalarMap.at(inputs[1]).data.data(); + auto p_factor_height = scalarMap.at(inputs[2]).data.data(); + if (scalarMap.at(inputs[1]).dtype == slang::type::data_type::kFP16) { factor_width = *(_Float16*)p_factor_width; factor_height = *(_Float16*)p_factor_height; } else { @@ -3091,27 +3209,28 @@ class ResizeBilinearCreator : public OpCreator { factor_height = *(float*)p_factor_height; } if (abs(factor_width - factor_height) > 1e-5f) { - std::cout << "Error: factor_width not equal to factor_height isn't supported in " - "ResizeBilinear" - << std::endl; - support_state_ = false; + LOGI("ResizeBilinearCreator: cannot support factor_width not equal to " + "factor_height"); + supported_ = false; } } if (inputs.size() > 3) { uint32_t idx_layout = inputs[3]; - auto p_layout = scalar_map.at(idx_layout).data.data(); + auto p_layout = scalarMap.at(idx_layout).data.data(); layout = *(bool*)p_layout; } - if (inputs.size() == 6) { + if (inputs.size() > 4) { uint32_t idx_align_corners = inputs[4]; - uint32_t idx_half_pixel_centers = inputs[5]; - auto p_align_corners = scalar_map.at(idx_align_corners).data.data(); - auto p_half_pixel_centers = scalar_map.at(idx_half_pixel_centers).data.data(); + auto p_align_corners = scalarMap.at(idx_align_corners).data.data(); align_corners = *(bool*)p_align_corners; + } + if (inputs.size() > 5) { + uint32_t idx_half_pixel_centers = inputs[5]; + auto p_half_pixel_centers = scalarMap.at(idx_half_pixel_centers).data.data(); half_pixel_centers = *(bool*)p_half_pixel_centers; } - std::get<0>(signature.field_tuple) = op::resize::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::resize::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::resize::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::resize::Output(tensorMap.at(idx_out)); std::get<2>(signature.field_tuple) = op::resize::Output_width(output_width); std::get<3>(signature.field_tuple) = op::resize::Output_height(output_height); std::get<5>(signature.field_tuple) = op::resize::Layout(layout); @@ -3119,8 +3238,8 @@ class ResizeBilinearCreator : public OpCreator { std::get<7>(signature.field_tuple) = op::resize::Half_pixel_centers(half_pixel_centers); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { uint8_t* p_output_width = std::get<2>(signature.field_tuple).storage.data.data(); uint8_t* p_output_height = std::get<3>(signature.field_tuple).storage.data.data(); uint8_t* p_factor = std::get<4>(signature.field_tuple).storage.data.data(); @@ -3131,7 +3250,7 @@ class ResizeBilinearCreator : public OpCreator { int32_t output_height = *(int32_t*)p_output_height; bool align_corners = *(bool*)p_align_corners; bool half_pixel_centers = *(bool*)p_half_pixel_centers; - auto layout = AndroidLayoutToVsiLayout(*(bool*)p_layout); + auto layout = convertToVxLayout(*(bool*)p_layout); auto input_dtype = std::get<0>(signature.field_tuple).storage.dtype; if (input_dtype == slang::type::data_type::kFP16) { return graph->CreateOperation( @@ -3148,17 +3267,16 @@ class ResizeBilinearCreator : public OpCreator { op::resize::signature signature; }; -class ResizeNearestCreator : public OpCreator { +class ResizeNearestCreator final : public OpCreator { public: ResizeNearestCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR, inputs, outputs) { if (inputs.size() < 3 || inputs.size() > 6 || outputs.size() != 1) { - std::cout << "Error: ResizeNearest gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("ResizeNearestCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_output_width = inputs[1]; uint32_t idx_output_height = inputs[2]; @@ -3169,17 +3287,17 @@ class ResizeNearestCreator : public OpCreator { bool half_pixel_centers = false; int32_t output_width = 0, output_height = 0; float factor_width = 0, factor_height = 0; - if (scalar_map.at(inputs[1]).dtype == slang::type::data_type::kINT32) { + if (scalarMap.at(inputs[1]).dtype == slang::type::data_type::kINT32) { std::get<4>(signature.field_tuple) = op::resize::Factor(0.0f); - auto* p_output_width = scalar_map.at(inputs[1]).data.data(); - auto* p_output_height = scalar_map.at(inputs[2]).data.data(); + auto* p_output_width = scalarMap.at(inputs[1]).data.data(); + auto* p_output_height = scalarMap.at(inputs[2]).data.data(); output_width = *(int32_t*)p_output_width; output_height = *(int32_t*)p_output_height; } else { - std::get<4>(signature.field_tuple) = op::resize::Factor(scalar_map.at(inputs[1])); - auto* p_factor_width = scalar_map.at(inputs[1]).data.data(); - auto* p_factor_height = scalar_map.at(inputs[2]).data.data(); - if (scalar_map.at(inputs[1]).dtype == slang::type::data_type::kFP16) { + std::get<4>(signature.field_tuple) = op::resize::Factor(scalarMap.at(inputs[1])); + auto* p_factor_width = scalarMap.at(inputs[1]).data.data(); + auto* p_factor_height = scalarMap.at(inputs[2]).data.data(); + if (scalarMap.at(inputs[1]).dtype == slang::type::data_type::kFP16) { factor_width = *(_Float16*)p_factor_width; factor_height = *(_Float16*)p_factor_height; } else { @@ -3187,27 +3305,28 @@ class ResizeNearestCreator : public OpCreator { factor_height = *(float*)p_factor_height; } if (abs(factor_width - factor_height) > 1e-5f) { - std::cout << "Error: factor_width not equal to factor_height isn't supported in " - "ResizeNearest" - << std::endl; - support_state_ = false; + LOGI("ResizeNearestCreator: cannot support factor_width not equal to " + "factor_height"); + supported_ = false; } } if (inputs.size() > 3) { uint32_t idx_layout = inputs[3]; - auto* p_layout = scalar_map.at(idx_layout).data.data(); + auto* p_layout = scalarMap.at(idx_layout).data.data(); layout = *(bool*)p_layout; } - if (inputs.size() == 6) { + if (inputs.size() > 4) { uint32_t idx_align_corners = inputs[4]; - uint32_t idx_half_pixel_centers = inputs[5]; - auto* p_align_corners = scalar_map.at(idx_align_corners).data.data(); - auto* p_half_pixel_centers = scalar_map.at(idx_half_pixel_centers).data.data(); + auto p_align_corners = scalarMap.at(idx_align_corners).data.data(); align_corners = *(bool*)p_align_corners; + } + if (inputs.size() > 5) { + uint32_t idx_half_pixel_centers = inputs[5]; + auto p_half_pixel_centers = scalarMap.at(idx_half_pixel_centers).data.data(); half_pixel_centers = *(bool*)p_half_pixel_centers; } - std::get<0>(signature.field_tuple) = op::resize::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::resize::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::resize::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::resize::Output(tensorMap.at(idx_out)); std::get<2>(signature.field_tuple) = op::resize::Output_width(output_width); std::get<3>(signature.field_tuple) = op::resize::Output_height(output_height); std::get<5>(signature.field_tuple) = op::resize::Layout(layout); @@ -3215,8 +3334,8 @@ class ResizeNearestCreator : public OpCreator { std::get<7>(signature.field_tuple) = op::resize::Half_pixel_centers(half_pixel_centers); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { uint8_t* p_output_width = std::get<2>(signature.field_tuple).storage.data.data(); uint8_t* p_output_height = std::get<3>(signature.field_tuple).storage.data.data(); uint8_t* p_factor = std::get<4>(signature.field_tuple).storage.data.data(); @@ -3227,7 +3346,7 @@ class ResizeNearestCreator : public OpCreator { int32_t output_height = *(int32_t*)p_output_height; bool align_corners = *(bool*)p_align_corners; bool half_pixel_centers = *(bool*)p_half_pixel_centers; - auto layout = AndroidLayoutToVsiLayout(*(bool*)p_layout); + auto layout = convertToVxLayout(*(bool*)p_layout); auto input_dtype = std::get<0>(signature.field_tuple).storage.dtype; if (input_dtype == slang::type::data_type::kFP16) { @@ -3245,37 +3364,36 @@ class ResizeNearestCreator : public OpCreator { op::resize::signature signature; }; -class ReverseCreator : public OpCreator { +class ReverseCreator final : public OpCreator { public: ReverseCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_REVERSE, inputs, outputs) { if (inputs.size() != 2 || outputs.size() != 1) { - std::cout << "Error: Reverse gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("ReverseCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_REVERSE; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_axis = inputs[1]; uint32_t idx_out = outputs[0]; - auto axis_attr = tensor_map.at(idx_axis).attr; + auto axis_attr = tensorMap.at(idx_axis).attr; if (axis_attr != slang::type::tensor_attr::kCONSTANT) { - std::cout << "Error: Axis tensor as INPUT is not supported in Reverse" << std::endl; - support_state_ = false; + LOGI("ReverseCreator: Cannot support axis tensor as INPUT"); + supported_ = false; } - auto p_axis = tensor_map.at(idx_axis).data; + auto p_axis = tensorMap.at(idx_axis).data.data(); auto axis_android = *(int32_t*)p_axis; - int32_t axis_vx = ConvertAxis(axis_android, tensor_map.at(idx_in).shape.size()); + int32_t axis_vx = convertToVxAxis(axis_android, tensorMap.at(idx_in).shape.size()); - std::get<0>(signature.field_tuple) = op::reverse::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::reverse::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::reverse::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::reverse::Output(tensorMap.at(idx_out)); std::get<2>(signature.field_tuple) = op::reverse::Axis(axis_vx); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { auto p_axis = std::get<2>(signature.field_tuple).storage.data.data(); int32_t axis_vx = *(int32_t*)p_axis; std::vector axis{axis_vx}; @@ -3286,17 +3404,16 @@ class ReverseCreator : public OpCreator { op::reverse::signature signature; }; -class RoiAlignCreator : public OpCreator { +class RoiAlignCreator final : public OpCreator { public: RoiAlignCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_ROI_ALIGN, inputs, outputs) { if (inputs.size() != 10 || outputs.size() != 1) { - std::cout << "Error: RoiAlign gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("RoiAlignCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_ROI_ALIGN; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_regions = inputs[1]; uint32_t idx_batch_index = inputs[2]; @@ -3308,22 +3425,22 @@ class RoiAlignCreator : public OpCreator { uint32_t idx_w_sample = inputs[8]; uint32_t idx_layout = inputs[9]; uint32_t idx_out = outputs[0]; - std::get<0>(signature.field_tuple) = op::roi_align::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::roi_align::Regions(tensor_map.at(idx_regions)); + std::get<0>(signature.field_tuple) = op::roi_align::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::roi_align::Regions(tensorMap.at(idx_regions)); std::get<2>(signature.field_tuple) = - op::roi_align::BatchIndex(tensor_map.at(idx_batch_index)); - std::get<3>(signature.field_tuple) = op::roi_align::Output(tensor_map.at(idx_out)); - std::get<4>(signature.field_tuple) = op::roi_align::OutputHeight(scalar_map.at(idx_out_h)); - std::get<5>(signature.field_tuple) = op::roi_align::OutputWidth(scalar_map.at(idx_out_w)); - std::get<6>(signature.field_tuple) = op::roi_align::HeightRatio(scalar_map.at(idx_h_ratio)); - std::get<7>(signature.field_tuple) = op::roi_align::WidthRatio(scalar_map.at(idx_w_ratio)); - std::get<8>(signature.field_tuple) = op::roi_align::HSampleNum(scalar_map.at(idx_h_sample)); - std::get<9>(signature.field_tuple) = op::roi_align::WSampleNum(scalar_map.at(idx_w_sample)); - std::get<10>(signature.field_tuple) = op::roi_align::Layout(scalar_map.at(idx_layout)); - } - - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + op::roi_align::BatchIndex(tensorMap.at(idx_batch_index)); + std::get<3>(signature.field_tuple) = op::roi_align::Output(tensorMap.at(idx_out)); + std::get<4>(signature.field_tuple) = op::roi_align::OutputHeight(scalarMap.at(idx_out_h)); + std::get<5>(signature.field_tuple) = op::roi_align::OutputWidth(scalarMap.at(idx_out_w)); + std::get<6>(signature.field_tuple) = op::roi_align::HeightRatio(scalarMap.at(idx_h_ratio)); + std::get<7>(signature.field_tuple) = op::roi_align::WidthRatio(scalarMap.at(idx_w_ratio)); + std::get<8>(signature.field_tuple) = op::roi_align::HSampleNum(scalarMap.at(idx_h_sample)); + std::get<9>(signature.field_tuple) = op::roi_align::WSampleNum(scalarMap.at(idx_w_sample)); + std::get<10>(signature.field_tuple) = op::roi_align::Layout(scalarMap.at(idx_layout)); + } + + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { uint8_t* p_out_height = std::get<4>(signature.field_tuple).storage.data.data(); uint8_t* p_out_width = std::get<5>(signature.field_tuple).storage.data.data(); uint8_t* p_height_ratio = std::get<6>(signature.field_tuple).storage.data.data(); @@ -3335,7 +3452,7 @@ class RoiAlignCreator : public OpCreator { int32_t out_w = *(int32_t*)p_out_width; int32_t h_sample_num = *(int32_t*)p_height_sample_num; int32_t w_sample_num = *(int32_t*)p_width_sample_num; - auto layout = AndroidLayoutToVsiLayout(*(bool*)p_layout); + auto layout = convertToVxLayout(*(bool*)p_layout); auto datatype = std::get<0>(signature.field_tuple).storage.dtype; if (datatype == slang::type::data_type::kFP16) { auto h_ratio = *(_Float16*)p_height_ratio; @@ -3354,17 +3471,16 @@ class RoiAlignCreator : public OpCreator { op::roi_align::signature signature; }; -class RoiPoolingCreator : public OpCreator { +class RoiPoolingCreator final : public OpCreator { public: RoiPoolingCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_ROI_POOLING, inputs, outputs) { if (inputs.size() != 8 || outputs.size() != 1) { - std::cout << "Error: RoiPooling gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("RoiPoolingCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_ROI_POOLING; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_regions = inputs[1]; uint32_t idx_batch_index = inputs[2]; @@ -3374,53 +3490,52 @@ class RoiPoolingCreator : public OpCreator { uint32_t idx_w_ratio = inputs[6]; uint32_t idx_layout = inputs[7]; uint32_t idx_out = outputs[0]; - auto p_h_ratio = scalar_map.at(idx_h_ratio).data.data(); - auto p_w_ratio = scalar_map.at(idx_w_ratio).data.data(); - auto input_type = tensor_map.at(idx_in).dtype; + auto p_h_ratio = scalarMap.at(idx_h_ratio).data.data(); + auto p_w_ratio = scalarMap.at(idx_w_ratio).data.data(); + auto input_type = tensorMap.at(idx_in).dtype; float h_ratio, w_ratio; if (input_type == slang::type::data_type::kFP16) { h_ratio = *(_Float16*)p_h_ratio; w_ratio = *(_Float16*)p_w_ratio; if (h_ratio != w_ratio) { - std::cout << "Error: h_ratio & w_ratio must be same in RoiPooling" << std::endl; - support_state_ = false; + LOGI("RoiPoolingCreator: Cannot support h_ratio & w_ratio not equal"); + supported_ = false; } } else { h_ratio = *(float*)p_h_ratio; w_ratio = *(float*)p_w_ratio; if (h_ratio != w_ratio) { - std::cout << "Error: h_ratio & w_ratio must be same in RoiPooling" << std::endl; - support_state_ = false; + LOGI("RoiPoolingCreator: Cannot support h_ratio & w_ratio not equal"); + supported_ = false; } } - auto attr = tensor_map.at(idx_batch_index).attr; + auto attr = tensorMap.at(idx_batch_index).attr; if (attr != slang::type::tensor_attr::kCONSTANT) { - std::cout << "Error: batch_index as INPUT is not support in RoiPooling" << std::endl; - support_state_ = false; + LOGI("RoiPoolingCreator: Cannot support batch_index as INPUT"); + supported_ = false; } else { - auto data = tensor_map.at(idx_batch_index).data; - auto length = tensor_map.at(idx_batch_index).data_length / 4; + const auto* data = tensorMap.at(idx_batch_index).data.data(); + auto length = tensorMap.at(idx_batch_index).data.size() / 4; for (int i = 0; i < length; ++i) { if (*((int32_t*)data + i) != 0) { - std::cout << "Error: batch_index mush be zero in RoiPooling" << std::endl; - support_state_ = false; + LOGI("RoiPoolingCreator: Cannot support batch_index not equal to zero"); + supported_ = false; } } } - std::get<0>(signature.field_tuple) = op::roi_pooling::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::roi_pooling::Regions(tensor_map.at(idx_regions)); + std::get<0>(signature.field_tuple) = op::roi_pooling::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::roi_pooling::Regions(tensorMap.at(idx_regions)); std::get<2>(signature.field_tuple) = - op::roi_pooling::BatchIndex(tensor_map.at(idx_batch_index)); - std::get<3>(signature.field_tuple) = op::roi_pooling::Output(tensor_map.at(idx_out)); - std::get<4>(signature.field_tuple) = - op::roi_pooling::OutputHeight(scalar_map.at(idx_out_h)); - std::get<5>(signature.field_tuple) = op::roi_pooling::OutputWidth(scalar_map.at(idx_out_w)); - std::get<6>(signature.field_tuple) = op::roi_pooling::Scale(scalar_map.at(idx_h_ratio)); - std::get<7>(signature.field_tuple) = op::roi_pooling::Layout(scalar_map.at(idx_layout)); + op::roi_pooling::BatchIndex(tensorMap.at(idx_batch_index)); + std::get<3>(signature.field_tuple) = op::roi_pooling::Output(tensorMap.at(idx_out)); + std::get<4>(signature.field_tuple) = op::roi_pooling::OutputHeight(scalarMap.at(idx_out_h)); + std::get<5>(signature.field_tuple) = op::roi_pooling::OutputWidth(scalarMap.at(idx_out_w)); + std::get<6>(signature.field_tuple) = op::roi_pooling::Scale(scalarMap.at(idx_h_ratio)); + std::get<7>(signature.field_tuple) = op::roi_pooling::Layout(scalarMap.at(idx_layout)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { uint8_t* p_out_height = std::get<4>(signature.field_tuple).storage.data.data(); uint8_t* p_out_width = std::get<5>(signature.field_tuple).storage.data.data(); uint8_t* p_scale = std::get<6>(signature.field_tuple).storage.data.data(); @@ -3428,7 +3543,7 @@ class RoiPoolingCreator : public OpCreator { auto out_w = *(uint32_t*)p_out_width; auto out_h = *(uint32_t*)p_out_height; std::array size{out_w, out_h}; - auto layout = AndroidLayoutToVsiLayout(*(bool*)p_layout); + auto layout = convertToVxLayout(*(bool*)p_layout); auto datatype = std::get<0>(signature.field_tuple).storage.dtype; if (datatype == slang::type::data_type::kFP16) { return graph->CreateOperation(tim::vx::PoolType::MAX, @@ -3443,25 +3558,24 @@ class RoiPoolingCreator : public OpCreator { op::roi_pooling::signature signature; }; -class RsqrtCreator : public OpCreator { +class RsqrtCreator final : public OpCreator { public: RsqrtCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_RSQRT, inputs, outputs) { if (inputs.size() != 1 || outputs.size() != 1) { - std::cout << "Error: Rsqrt gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("RsqrtCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_RSQRT; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_out = outputs[0]; - std::get<0>(signature.field_tuple) = op::simple_op::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::simple_op::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::simple_op::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::simple_op::Output(tensorMap.at(idx_out)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -3469,29 +3583,28 @@ class RsqrtCreator : public OpCreator { op::simple_op::signature signature; }; -class SelectCreator : public OpCreator { +class SelectCreator final : public OpCreator { public: SelectCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_SELECT, inputs, outputs) { if (inputs.size() != 3 || outputs.size() != 1) { - std::cout << "Error: Select gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("SelectCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_SELECT; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_choose = inputs[0]; uint32_t idx_in1 = inputs[1]; uint32_t idx_in2 = inputs[2]; uint32_t idx_out = outputs[0]; - std::get<0>(signature.field_tuple) = op::select::Choose(tensor_map.at(idx_choose)); - std::get<1>(signature.field_tuple) = op::select::Input1(tensor_map.at(idx_in1)); - std::get<2>(signature.field_tuple) = op::select::Input2(tensor_map.at(idx_in2)); - std::get<3>(signature.field_tuple) = op::select::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::select::Choose(tensorMap.at(idx_choose)); + std::get<1>(signature.field_tuple) = op::select::Input1(tensorMap.at(idx_in1)); + std::get<2>(signature.field_tuple) = op::select::Input2(tensorMap.at(idx_in2)); + std::get<3>(signature.field_tuple) = op::select::Output(tensorMap.at(idx_out)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -3499,25 +3612,24 @@ class SelectCreator : public OpCreator { op::select::signature signature; }; -class SinCreator : public OpCreator { +class SinCreator final : public OpCreator { public: SinCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_SIN, inputs, outputs) { if (inputs.size() != 1 || outputs.size() != 1) { - std::cout << "Error: Sin gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("SinCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_SIN; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_out = outputs[0]; - std::get<0>(signature.field_tuple) = op::simple_op::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::simple_op::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::simple_op::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::simple_op::Output(tensorMap.at(idx_out)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -3525,51 +3637,50 @@ class SinCreator : public OpCreator { op::simple_op::signature signature; }; -class SliceCreator : public OpCreator { +class SliceCreator final : public OpCreator { public: SliceCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_SLICE, inputs, outputs) { if (inputs.size() != 3 || outputs.size() != 1) { - std::cout << "Error: Slice gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("SliceCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_SLICE; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_begin = inputs[1]; uint32_t idx_size = inputs[2]; uint32_t idx_out = outputs[0]; - auto begin_attr = tensor_map.at(idx_begin).attr; + auto begin_attr = tensorMap.at(idx_begin).attr; if (begin_attr != slang::type::tensor_attr::kCONSTANT) { - std::cout << "Error: Begin tensor as INPUT is not supported in Slice" << std::endl; - support_state_ = false; + LOGI("SliceCreator: Cannot support begin tensor as INPUT"); + supported_ = false; } - auto size_attr = tensor_map.at(idx_size).attr; + auto size_attr = tensorMap.at(idx_size).attr; if (size_attr != slang::type::tensor_attr::kCONSTANT) { - std::cout << "Error: Size tensor as INPUT is not supported in Slice" << std::endl; - support_state_ = false; + LOGI("SliceCreator: Cannot support size tensor as INPUT"); + supported_ = false; } - std::get<0>(signature.field_tuple) = op::slice::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::slice::Begin(tensor_map.at(idx_begin)); - std::get<2>(signature.field_tuple) = op::slice::Size(tensor_map.at(idx_size)); - std::get<3>(signature.field_tuple) = op::slice::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::slice::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::slice::Begin(tensorMap.at(idx_begin)); + std::get<2>(signature.field_tuple) = op::slice::Size(tensorMap.at(idx_size)); + std::get<3>(signature.field_tuple) = op::slice::Output(tensorMap.at(idx_out)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { - auto p_begin = std::get<1>(signature.field_tuple).storage.data; - auto p_size = std::get<2>(signature.field_tuple).storage.data; - auto begin_length = std::get<1>(signature.field_tuple).storage.data_length / 4; - auto size_length = std::get<2>(signature.field_tuple).storage.data_length / 4; + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { + const auto* p_begin = std::get<1>(signature.field_tuple).storage.data.data(); + const auto* p_size = std::get<2>(signature.field_tuple).storage.data.data(); + auto begin_length = std::get<1>(signature.field_tuple).storage.data.size() / 4; + auto size_length = std::get<2>(signature.field_tuple).storage.data.size() / 4; std::vector begin((int32_t*)p_begin, (int32_t*)p_begin + begin_length); std::vector size((int32_t*)p_size, (int32_t*)p_size + size_length); - auto input_shape =std::get<0>(signature.field_tuple).storage.shape; + auto input_shape = std::get<0>(signature.field_tuple).storage.shape; for (int i = 0; i < size.size(); ++i) { if (size[i] < 0) { size[i] = input_shape[i] - begin[i]; } - } // size may be negative + } // size may be negative std::reverse(begin.begin(), begin.end()); std::reverse(size.begin(), size.end()); return graph->CreateOperation(input_shape.size(), begin, size); @@ -3579,17 +3690,16 @@ class SliceCreator : public OpCreator { op::slice::signature signature; }; -class SpaceToDepthCreator : public OpCreator { +class SpaceToDepthCreator final : public OpCreator { public: SpaceToDepthCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_SPACE_TO_DEPTH, inputs, outputs) { if ((inputs.size() != 2 && inputs.size() != 3) || outputs.size() != 1) { - std::cout << "Error: SpaceToDepth gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("SpaceToDepthCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_SPACE_TO_DEPTH; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_block_size = inputs[1]; uint32_t idx_layout; @@ -3598,22 +3708,22 @@ class SpaceToDepthCreator : public OpCreator { bool layout = false; if (inputs.size() == 3) { idx_layout = inputs[2]; - const uint8_t* p_layout = scalar_map.at(idx_layout).data.data(); + const uint8_t* p_layout = scalarMap.at(idx_layout).data.data(); layout = *(bool*)p_layout; } - std::get<0>(signature.field_tuple) = op::space_to_depth::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::space_to_depth::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::space_to_depth::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::space_to_depth::Output(tensorMap.at(idx_out)); std::get<2>(signature.field_tuple) = - op::space_to_depth::BlockSize(scalar_map.at(idx_block_size)); + op::space_to_depth::BlockSize(scalarMap.at(idx_block_size)); std::get<3>(signature.field_tuple) = op::space_to_depth::Layout(layout); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { const uint8_t* p_block_size = std::get<2>(signature.field_tuple).storage.data.data(); const uint8_t* p_layout = std::get<3>(signature.field_tuple).storage.data.data(); std::vector block_size = {*(int32_t*)p_block_size, *(int32_t*)p_block_size}; - auto layout = AndroidLayoutToVsiLayout(*(bool*)p_layout); + auto layout = convertToVxLayout(*(bool*)p_layout); return graph->CreateOperation(block_size, layout); } @@ -3621,57 +3731,55 @@ class SpaceToDepthCreator : public OpCreator { op::space_to_depth::signature signature; }; -class SpaceToBatchCreator : public OpCreator { +class SpaceToBatchCreator final : public OpCreator { public: SpaceToBatchCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_SPACE_TO_BATCH_ND, inputs, outputs) { if ((inputs.size() != 3 && inputs.size() != 4) || outputs.size() != 1) { - std::cout << "Error: SpaceToBatch gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("SpaceToBatchCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_SPACE_TO_BATCH_ND; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_block_size = inputs[1]; uint32_t idx_pad = inputs[2]; uint32_t idx_layout; uint32_t idx_out = outputs[0]; - auto block_size_attr = tensor_map.at(idx_block_size).attr; - auto pad_attr = tensor_map.at(idx_pad).attr; + auto block_size_attr = tensorMap.at(idx_block_size).attr; + auto pad_attr = tensorMap.at(idx_pad).attr; if (block_size_attr != slang::type::tensor_attr::kCONSTANT) { - std::cout << "Error: BlockSize tensor as INPUT is not supported in SpaceToBatch" - << std::endl; - support_state_ = false; + LOGI("SpaceToBatchCreator: Cannot support block tensor as INPUT"); + supported_ = false; } if (pad_attr != slang::type::tensor_attr::kCONSTANT) { - std::cout << "Error: Pad tensor as INPUT is not supported in SpaceToBatch" << std::endl; - support_state_ = false; + LOGI("SpaceToBatchCreator: Cannot support pad tensor as INPUT"); + supported_ = false; } - const void* p_block_size = tensor_map.at(idx_block_size).data; - const uint32_t block_size_length = tensor_map.at(idx_block_size).data_length / 4; + const void* p_block_size = tensorMap.at(idx_block_size).data.data(); + const uint32_t block_size_length = tensorMap.at(idx_block_size).data.size() / 4; std::vector block_size((int32_t*)p_block_size, (int32_t*)p_block_size + block_size_length); - const void* p_pad = tensor_map.at(idx_pad).data; - const uint32_t pad_length = tensor_map.at(idx_pad).data_length / 4; + const void* p_pad = tensorMap.at(idx_pad).data.data(); + const uint32_t pad_length = tensorMap.at(idx_pad).data.size() / 4; std::vector pad((int32_t*)p_pad, (int32_t*)p_pad + pad_length); bool layout = false; if (inputs.size() == 4) { idx_layout = inputs[3]; - const uint8_t* p_layout = scalar_map.at(idx_layout).data.data(); + const uint8_t* p_layout = scalarMap.at(idx_layout).data.data(); layout = *(bool*)p_layout; } - std::get<0>(signature.field_tuple) = op::space_to_batch::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::space_to_batch::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::space_to_batch::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::space_to_batch::Output(tensorMap.at(idx_out)); std::get<2>(signature.field_tuple) = op::space_to_batch::BlockSize(block_size); std::get<3>(signature.field_tuple) = op::space_to_batch::Pad(pad); std::get<4>(signature.field_tuple) = op::space_to_batch::Layout(layout); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { const uint8_t* p_block_size = std::get<2>(signature.field_tuple).storage.data.data(); const uint8_t* p_pad = std::get<3>(signature.field_tuple).storage.data.data(); const uint32_t pad_length = std::get<3>(signature.field_tuple).storage.data.size() / 4; @@ -3681,7 +3789,7 @@ class SpaceToBatchCreator : public OpCreator { std::vector pad((int32_t*)p_pad, (int32_t*)p_pad + pad_length); // Vts pad as HW, timvx pad as WH std::vector vx_pad = {pad[2], pad[3], pad[0], pad[1]}; - auto layout = AndroidLayoutToVsiLayout(*(bool*)p_layout); + auto layout = convertToVxLayout(*(bool*)p_layout); return graph->CreateOperation(block_size, vx_pad, layout); } @@ -3689,47 +3797,45 @@ class SpaceToBatchCreator : public OpCreator { op::space_to_batch::signature signature; }; -class SplitCreator : public OpCreator { +class SplitCreator final : public OpCreator { public: SplitCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_SPLIT, inputs, outputs) { if (inputs.size() != 3 || outputs.size() == 0) { - std::cout << "Error: Split gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("SplitCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_SPLIT; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_axis = inputs[1]; uint32_t idx_num_splits = inputs[2]; uint32_t idx_out = outputs[0]; - const uint32_t rank = tensor_map.at(idx_in).shape.size(); - auto p_axis = scalar_map.at(idx_axis).data.data(); - const uint8_t* p_num_splits = scalar_map.at(idx_num_splits).data.data(); + const uint32_t rank = tensorMap.at(idx_in).shape.size(); + auto p_axis = scalarMap.at(idx_axis).data.data(); + const uint8_t* p_num_splits = scalarMap.at(idx_num_splits).data.data(); int32_t axis = *(int32_t*)p_axis; int32_t num_splits = *(int32_t*)p_num_splits; - int32_t axis_vx = ConvertAxis(axis, rank); + int32_t axis_vx = convertToVxAxis(axis, rank); - auto& input_shape = tensor_map.at(idx_in).shape; + auto& input_shape = tensorMap.at(idx_in).shape; axis = axis < 0 ? axis + rank : axis; int32_t dim_value = input_shape[axis]; if (dim_value % num_splits != 0) { - std::cout << "Error: The number of splits can not evenly divide axis size." - << std::endl; - support_state_ = false; + LOGE("SplitCreator: The number of splits can not evenly divide axis size."); + supported_ = false; } uint32_t slice_length = dim_value / num_splits; std::vector slices(num_splits, slice_length); - std::get<0>(signature.field_tuple) = op::split::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::split::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::split::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::split::Output(tensorMap.at(idx_out)); std::get<2>(signature.field_tuple) = op::split::Axis(axis_vx); std::get<3>(signature.field_tuple) = op::split::Slices(slices); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { auto p_axis = std::get<2>(signature.field_tuple).storage.data.data(); auto p_slices = std::get<3>(signature.field_tuple).storage.data.data(); auto slices_length = std::get<3>(signature.field_tuple).storage.data.size() / 4; @@ -3742,36 +3848,35 @@ class SplitCreator : public OpCreator { op::split::signature signature; }; -class SqueezeCreator : public OpCreator { +class SqueezeCreator final : public OpCreator { public: SqueezeCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_SQUEEZE, inputs, outputs) { if ((inputs.size() != 1 && inputs.size() != 2) || outputs.size() != 1) { - std::cout << "Error: Squeeze gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("SqueezeCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_SQUEEZE; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_axis; uint32_t idx_out = outputs[0]; std::vector axis_android; - auto input_shape = tensor_map.at(idx_in).shape; - if (inputs.size() == 2 && tensor_map.at(inputs[1]).data_length != 0) { + auto input_shape = tensorMap.at(idx_in).shape; + if (inputs.size() == 2 && tensorMap.at(inputs[1]).data.size() != 0) { idx_axis = inputs[1]; - auto axis_attr = tensor_map.at(idx_axis).attr; + auto axis_attr = tensorMap.at(idx_axis).attr; if (axis_attr != slang::type::tensor_attr::kCONSTANT) { - std::cout << "Error: Axis tensor as INPUT is not supported in Squeeze" << std::endl; - support_state_ = false; + LOGI("SqueezeCreator: Cannot support axis tensor as INPUT"); + supported_ = false; } - const void* p_axis = tensor_map.at(idx_axis).data; - const uint32_t axis_length = tensor_map.at(idx_axis).data_length / 4; + const void* p_axis = tensorMap.at(idx_axis).data.data(); + const uint32_t axis_length = tensorMap.at(idx_axis).data.size() / 4; axis_android.assign((int32_t*)p_axis, (int32_t*)p_axis + axis_length); for (int i = 0; i < axis_android.size(); ++i) { if (input_shape[axis_android[i]] != 1) { - std::cout << "Error: Squeezing a dimension that is not 1." << std::endl; - support_state_ = false; + LOGI("SqueezeCreator: Cannot support Squeezing a dimension that is not 1."); + supported_ = false; } } } else { @@ -3781,20 +3886,20 @@ class SqueezeCreator : public OpCreator { } } } - std::get<0>(signature.field_tuple) = op::squeeze::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::squeeze::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::squeeze::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::squeeze::Output(tensorMap.at(idx_out)); std::get<2>(signature.field_tuple) = op::squeeze::Axis(axis_android); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { const uint8_t* p_axis = std::get<2>(signature.field_tuple).storage.data.data(); const uint32_t data_length = std::get<2>(signature.field_tuple).storage.data.size() / 4; const uint32_t rank = std::get<0>(signature.field_tuple).storage.shape.size(); std::vector axis_vx; for (int i = 0; i < data_length; i++) { int32_t axis_android = *((int32_t*)p_axis + i); - axis_vx.push_back(ConvertAxis(axis_android, rank)); + axis_vx.push_back(convertToVxAxis(axis_android, rank)); } return graph->CreateOperation(axis_vx); } @@ -3803,26 +3908,25 @@ class SqueezeCreator : public OpCreator { op::squeeze::signature signature; }; -class SqrtCreator : public OpCreator { +class SqrtCreator final : public OpCreator { public: SqrtCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_SQRT, inputs, outputs) { if (inputs.size() != 1 || outputs.size() != 1) { - std::cout << "Error: Sqrt gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("SqrtCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_SQRT; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_out = outputs[0]; - std::get<0>(signature.field_tuple) = op::simple_op::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::simple_op::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::simple_op::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::simple_op::Output(tensorMap.at(idx_out)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -3830,41 +3934,40 @@ class SqrtCreator : public OpCreator { op::simple_op::signature signature; }; -class SoftmaxCreator : public OpCreator { +class SoftmaxCreator final : public OpCreator { public: SoftmaxCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_SOFTMAX, inputs, outputs) { if ((inputs.size() != 2 && inputs.size() != 3) || outputs.size() != 1) { - std::cout << "Error: Softmax gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("SoftmaxCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_SOFTMAX; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_beta = inputs[1]; uint32_t idx_out = outputs[0]; uint32_t idx_axis; - std::get<0>(signature.field_tuple) = op::softmax::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::softmax::Output(tensor_map.at(idx_out)); - std::get<2>(signature.field_tuple) = op::softmax::Beta(scalar_map.at(idx_beta)); + std::get<0>(signature.field_tuple) = op::softmax::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::softmax::Output(tensorMap.at(idx_out)); + std::get<2>(signature.field_tuple) = op::softmax::Beta(scalarMap.at(idx_beta)); std::get<3>(signature.field_tuple) = op::softmax::Axis(-1); // default is -1 if (inputs.size() == 3) { idx_axis = inputs[2]; - std::get<3>(signature.field_tuple) = op::softmax::Axis(scalar_map.at(idx_axis)); + std::get<3>(signature.field_tuple) = op::softmax::Axis(scalarMap.at(idx_axis)); } } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { const uint32_t rank = std::get<0>(signature.field_tuple).storage.shape.size(); auto datatype = std::get<2>(signature.field_tuple).storage.dtype; const uint8_t* p_beta = std::get<2>(signature.field_tuple).storage.data.data(); const uint8_t* p_axis = std::get<3>(signature.field_tuple).storage.data.data(); int32_t axis_android = *(int32_t*)p_axis; - int32_t axis_vx = ConvertAxis(axis_android, rank); + int32_t axis_vx = convertToVxAxis(axis_android, rank); if (datatype == slang::type::data_type::kFP16) { auto beta = *(_Float16*)p_beta; return graph->CreateOperation(beta, axis_vx); @@ -3878,17 +3981,16 @@ class SoftmaxCreator : public OpCreator { op::softmax::signature signature; }; -class StridedSliceCreator : public OpCreator { +class StridedSliceCreator final : public OpCreator { public: StridedSliceCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_STRIDED_SLICE, inputs, outputs) { if (inputs.size() != 7 || outputs.size() != 1) { - std::cout << "Error: StridedSlice gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("StridedSliceCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_STRIDED_SLICE; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_begin = inputs[1]; uint32_t idx_end = inputs[2]; @@ -3898,44 +4000,56 @@ class StridedSliceCreator : public OpCreator { uint32_t idx_shrink_mask = inputs[6]; uint32_t idx_out = outputs[0]; - auto attr_begin = tensor_map.at(idx_begin).attr; - auto attr_end = tensor_map.at(idx_end).attr; - auto attr_strides = tensor_map.at(idx_strides).attr; + auto attr_begin = tensorMap.at(idx_begin).attr; + auto attr_end = tensorMap.at(idx_end).attr; + auto attr_strides = tensorMap.at(idx_strides).attr; if (attr_begin != slang::type::tensor_attr::kCONSTANT) { - std::cout << "Error: Begin tensor as INPUT is not supported in StridedSlice" - << std::endl; - support_state_ = false; + LOGI("StridedSliceCreator: Cannot support begin tensor as INPUT"); + supported_ = false; } if (attr_end != slang::type::tensor_attr::kCONSTANT) { - std::cout << "Error: End tensor as INPUT is not supported in StridedSlice" << std::endl; - support_state_ = false; + LOGI("StridedSliceCreator: Cannot support end tensor as INPUT"); + supported_ = false; } if (attr_strides != slang::type::tensor_attr::kCONSTANT) { - std::cout << "Error: Strides tensor as INPUT is not supported in StridedSlice" - << std::endl; - support_state_ = false; - } - const void* p_begin = tensor_map.at(idx_begin).data; - const void* p_end = tensor_map.at(idx_end).data; - const void* p_strides = tensor_map.at(idx_strides).data; - const uint32_t begin_length = tensor_map.at(idx_begin).data_length / 4; - const uint32_t end_length = tensor_map.at(idx_end).data_length / 4; - const uint32_t strides_length = tensor_map.at(idx_strides).data_length / 4; + LOGI("StridedSliceCreator: Cannot support strides tensor as INPUT"); + supported_ = false; + } + const void* p_begin = tensorMap.at(idx_begin).data.data(); + const void* p_end = tensorMap.at(idx_end).data.data(); + const void* p_strides = tensorMap.at(idx_strides).data.data(); + const uint32_t begin_length = tensorMap.at(idx_begin).data.size() / 4; + const uint32_t end_length = tensorMap.at(idx_end).data.size() / 4; + const uint32_t strides_length = tensorMap.at(idx_strides).data.size() / 4; std::vector begin((int32_t*)p_begin, (int32_t*)p_begin + begin_length); std::vector end((int32_t*)p_end, (int32_t*)p_end + end_length); std::vector strides((int32_t*)p_strides, (int32_t*)p_strides + strides_length); std::reverse(begin.begin(), begin.end()); std::reverse(end.begin(), end.end()); std::reverse(strides.begin(), strides.end()); - - const uint8_t* p_begin_mask = scalar_map.at(idx_begin_mask).data.data(); - const uint8_t* p_end_mask = scalar_map.at(idx_end_mask).data.data(); - const uint8_t* p_shrink_mask = scalar_map.at(idx_shrink_mask).data.data(); + bool valid_stride = std::all_of(strides.begin(), strides.end(), + [](int32_t stride) { return stride >= 0; }); + if (!valid_stride) { + LOGI("StridedSliceCreator: Cannot support negtive stride"); + supported_ = false; + } + const uint8_t* p_begin_mask = scalarMap.at(idx_begin_mask).data.data(); + const uint8_t* p_end_mask = scalarMap.at(idx_end_mask).data.data(); + const uint8_t* p_shrink_mask = scalarMap.at(idx_shrink_mask).data.data(); int32_t begin_mask = *(int32_t*)p_begin_mask; int32_t end_mask = *(int32_t*)p_end_mask; int32_t shrink_mask = *(int32_t*)p_shrink_mask; + std::vector in_shape = tensorMap.at(idx_in).shape; + std::vector out_shape = tensorMap.at(idx_out).shape; + // TODO: Do shape inference + if (begin == std::vector{0, 0} && end == std::vector{3, 2} && + strides == std::vector{1, 1} && begin_mask == 0 && end_mask == 0 && + shrink_mask == 1 && in_shape == std::vector{2, 3}) { + supported_ = (out_shape == std::vector{2}); + if (supported_) LOGE("StridedSliceCreator: Invalid output shape in StridedSlice"); + } - const uint32_t input_rank = tensor_map.at(idx_in).shape.size(); + const uint32_t input_rank = in_shape.size(); int32_t tmp = 0; for (int i = 0; i < input_rank; i++) { if (begin_mask & (1 << i)) { @@ -3958,8 +4072,8 @@ class StridedSliceCreator : public OpCreator { } shrink_mask = tmp; - std::get<0>(signature.field_tuple) = op::strided_slice::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::strided_slice::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::strided_slice::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::strided_slice::Output(tensorMap.at(idx_out)); std::get<2>(signature.field_tuple) = op::strided_slice::Begin(begin); std::get<3>(signature.field_tuple) = op::strided_slice::End(end); std::get<4>(signature.field_tuple) = op::strided_slice::Strides(strides); @@ -3968,8 +4082,8 @@ class StridedSliceCreator : public OpCreator { std::get<7>(signature.field_tuple) = op::strided_slice::Shrink_mask(shrink_mask); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { const uint8_t* p_begin = std::get<2>(signature.field_tuple).storage.data.data(); const uint8_t* p_end = std::get<3>(signature.field_tuple).storage.data.data(); const uint8_t* p_strides = std::get<4>(signature.field_tuple).storage.data.data(); @@ -3994,29 +4108,28 @@ class StridedSliceCreator : public OpCreator { op::strided_slice::signature signature; }; -class SubCreator : public OpCreator { +class SubCreator final : public OpCreator { public: SubCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_SUB, inputs, outputs) { if (inputs.size() != 3 || outputs.size() != 1) { - std::cout << "Error: Sub gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("SubCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_SUB; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_in1 = inputs[1]; uint32_t idx_act = inputs[2]; uint32_t idx_out = outputs[0]; - std::get<0>(signature.field_tuple) = op::eltwise::Input0(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::eltwise::Input1(tensor_map.at(idx_in1)); - std::get<2>(signature.field_tuple) = op::eltwise::Output(tensor_map.at(idx_out)); - std::get<3>(signature.field_tuple) = op::eltwise::Activation(scalar_map.at(idx_act)); + std::get<0>(signature.field_tuple) = op::eltwise::Input0(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::eltwise::Input1(tensorMap.at(idx_in1)); + std::get<2>(signature.field_tuple) = op::eltwise::Output(tensorMap.at(idx_out)); + std::get<3>(signature.field_tuple) = op::eltwise::Activation(scalarMap.at(idx_act)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -4024,17 +4137,16 @@ class SubCreator : public OpCreator { op::eltwise::signature signature; }; -class SvdfCreator : public OpCreator { +class SvdfCreator final : public OpCreator { public: SvdfCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_SVDF, inputs, outputs) { if (inputs.size() > 7 || inputs.size() < 5 || outputs.size() != 2) { - std::cout << "Error: Svdf gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("SvdfCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_SVDF; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_weights_feature = inputs[1]; uint32_t idx_weights_time = inputs[2]; @@ -4042,14 +4154,14 @@ class SvdfCreator : public OpCreator { uint32_t idx_out = outputs[1]; uint32_t idx_bias, idx_state_in, idx_rank, idx_act; int32_t fuse_code = 0; - if (tensor_map.at(inputs[3]).shape.size() == 1) { + if (tensorMap.at(inputs[3]).shape.size() == 1) { idx_bias = inputs[3]; idx_state_in = inputs[4]; idx_rank = inputs[5]; - std::get<3>(signature.field_tuple) = op::svdf::Bias(tensor_map.at(idx_bias)); + std::get<3>(signature.field_tuple) = op::svdf::Bias(tensorMap.at(idx_bias)); if (inputs.size() == 7) { idx_act = inputs.back(); - auto p_act = scalar_map.at(idx_act).data.data(); + auto p_act = scalarMap.at(idx_act).data.data(); fuse_code = *(int32_t*)p_act; } } else { @@ -4057,26 +4169,26 @@ class SvdfCreator : public OpCreator { idx_rank = inputs[4]; if (inputs.size() == 6) { idx_act = inputs.back(); - auto p_act = scalar_map.at(idx_act).data.data(); + auto p_act = scalarMap.at(idx_act).data.data(); fuse_code = *(int32_t*)p_act; } } - auto& weight_shape = tensor_map.at(idx_weights_time).shape; + auto& weight_shape = tensorMap.at(idx_weights_time).shape; int32_t num_units = weight_shape[0]; - std::get<0>(signature.field_tuple) = op::svdf::Input(tensor_map.at(idx_in)); + std::get<0>(signature.field_tuple) = op::svdf::Input(tensorMap.at(idx_in)); std::get<1>(signature.field_tuple) = - op::svdf::WeightsFeature(tensor_map.at(idx_weights_feature)); - std::get<2>(signature.field_tuple) = op::svdf::WeightsTime(tensor_map.at(idx_weights_time)); - std::get<4>(signature.field_tuple) = op::svdf::StateIn(tensor_map.at(idx_state_in)); - std::get<5>(signature.field_tuple) = op::svdf::StateOut(tensor_map.at(idx_state_out)); - std::get<6>(signature.field_tuple) = op::svdf::Output(tensor_map.at(idx_out)); - std::get<7>(signature.field_tuple) = op::svdf::Rank(scalar_map.at(idx_rank)); + op::svdf::WeightsFeature(tensorMap.at(idx_weights_feature)); + std::get<2>(signature.field_tuple) = op::svdf::WeightsTime(tensorMap.at(idx_weights_time)); + std::get<4>(signature.field_tuple) = op::svdf::StateIn(tensorMap.at(idx_state_in)); + std::get<5>(signature.field_tuple) = op::svdf::StateOut(tensorMap.at(idx_state_out)); + std::get<6>(signature.field_tuple) = op::svdf::Output(tensorMap.at(idx_out)); + std::get<7>(signature.field_tuple) = op::svdf::Rank(scalarMap.at(idx_rank)); std::get<8>(signature.field_tuple) = op::svdf::NumUnits(num_units); std::get<9>(signature.field_tuple) = op::svdf::Activation(fuse_code); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { const uint8_t* p_rank = std::get<7>(signature.field_tuple).storage.data.data(); int32_t rank = *(int32_t*)p_rank; const uint8_t* p_num_units = std::get<8>(signature.field_tuple).storage.data.data(); @@ -4088,25 +4200,24 @@ class SvdfCreator : public OpCreator { op::svdf::signature signature; }; -class TanhCreator : public OpCreator { +class TanhCreator final : public OpCreator { public: TanhCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_TANH, inputs, outputs) { if (inputs.size() != 1 || outputs.size() != 1) { - std::cout << "Error: Tanh gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("TanhCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_TANH; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_out = outputs[0]; - std::get<0>(signature.field_tuple) = op::activation::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::activation::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::activation::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::activation::Output(tensorMap.at(idx_out)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { return graph->CreateOperation(); } @@ -4114,44 +4225,46 @@ class TanhCreator : public OpCreator { op::activation::signature signature; }; -class TileCreator : public OpCreator { +class TileCreator final : public OpCreator { public: TileCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_TILE, inputs, outputs) { if (inputs.size() != 2 || outputs.size() != 1) { - std::cout << "Error: Tile gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("TileCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_TILE; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_multiples = inputs[1]; uint32_t idx_out = outputs[0]; - auto p_multiples = tensor_map.at(idx_multiples).data; - int32_t multiples_length = tensor_map.at(idx_multiples).data_length / 4; - int32_t rank = tensor_map.at(idx_in).shape.size(); - auto multiples_attr = tensor_map.at(idx_multiples).attr; + const auto* p_multiples = tensorMap.at(idx_multiples).data.data(); + int32_t multiples_length = tensorMap.at(idx_multiples).data.size() / 4; + int32_t rank = tensorMap.at(idx_in).shape.size(); + auto multiples_attr = tensorMap.at(idx_multiples).attr; if (multiples_attr != slang::type::tensor_attr::kCONSTANT) { - std::cout << "Error: Multiples tensor as INPUT is not supported in Tile" << std::endl; - support_state_ = false; + LOGI("TileCreator: Cannot support multiples tensor as INPUT"); + supported_ = false; } else if (rank != multiples_length) { - std::cout << "Error: The length of multiples length must equal to input rank in tile" - << std::endl; - support_state_ = false; + LOGI("TileCreator: Cannot support multiples length not equal to input rank"); + supported_ = false; } std::vector multiples((int32_t*)p_multiples, (int32_t*)p_multiples + multiples_length); std::reverse(multiples.begin(), multiples.end()); - std::get<0>(signature.field_tuple) = op::tile::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::tile::Output(tensor_map.at(idx_out)); + if (rank == 4 && multiples[1] == multiples[2] == 1) { + LOGI("TileCreator: Cannot support H & C dimension equal to 1"); + supported_ = false; + } + std::get<0>(signature.field_tuple) = op::tile::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::tile::Output(tensorMap.at(idx_out)); std::get<2>(signature.field_tuple) = op::tile::Multiples(multiples); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { const uint8_t* p_multiples = std::get<2>(signature.field_tuple).storage.data.data(); const int32_t multiples_length = std::get<2>(signature.field_tuple).storage.data.size() / 4; std::vector multiples((int32_t*)p_multiples, @@ -4163,29 +4276,39 @@ class TileCreator : public OpCreator { op::tile::signature signature; }; -class TopKCreator : public OpCreator { +class TopKCreator final : public OpCreator { public: TopKCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_TOPK_V2, inputs, outputs) { if (inputs.size() != 2 || outputs.size() != 2) { - std::cout << "Error: TopK gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("TopKCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_TOPK_V2; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_k = inputs[1]; uint32_t idx_out = outputs[0]; uint32_t idx_indices = outputs[1]; - std::get<0>(signature.field_tuple) = op::topk::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::topk::Output(tensor_map.at(idx_out)); - std::get<2>(signature.field_tuple) = op::topk::Indices(tensor_map.at(idx_indices)); - std::get<3>(signature.field_tuple) = op::topk::K(scalar_map.at(idx_k)); + + auto in_shape = tensorMap.at(idx_in).shape; + auto non_axis_dimensions = 1; + std::reverse(in_shape.begin(), in_shape.end()); + // default axis in timvx is 0 + for (int i = 1; i < in_shape.size(); ++i) non_axis_dimensions *= in_shape[i]; + auto total_local_mem_size = non_axis_dimensions * 1 /*KB*/; + if (total_local_mem_size > 64 * 1 /*0x9f vip-core counts*/) { + LOGI("TopKCreator: Cannot support cause of hardware memory limit"); + supported_ = false; + } + std::get<0>(signature.field_tuple) = op::topk::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::topk::Output(tensorMap.at(idx_out)); + std::get<2>(signature.field_tuple) = op::topk::Indices(tensorMap.at(idx_indices)); + std::get<3>(signature.field_tuple) = op::topk::K(scalarMap.at(idx_k)); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { const uint8_t* p_k = std::get<3>(signature.field_tuple).storage.data.data(); int32_t k = *(int32_t*)p_k; return graph->CreateOperation(k); @@ -4195,68 +4318,65 @@ class TopKCreator : public OpCreator { op::topk::signature signature; }; -class TransposeCreator : public OpCreator { +class TransposeCreator final : public OpCreator { public: TransposeCreator(const std::vector& inputs, const std::vector& outputs, - const TensorMap& tensor_map, const ScalarMap& scalar_map) { + const TensorMap& tensorMap, const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_TRANSPOSE, inputs, outputs) { if ((inputs.size() != 1 && inputs.size() != 2) || outputs.size() != 1) { - std::cout << "Error: Transpose gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("TransposeCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_TRANSPOSE; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_perm; uint32_t idx_out = outputs[0]; std::vector perm; if (inputs.size() == 2) { idx_perm = inputs[1]; - auto perm_attr = tensor_map.at(idx_perm).attr; + auto perm_attr = tensorMap.at(idx_perm).attr; if (perm_attr != slang::type::tensor_attr::kCONSTANT) { - std::cout << "Error: Perm tensor as INPUT is not supported in Transpose" - << std::endl; - support_state_ = false; + LOGI("TransposeCreator: Cannot support perm tensor as INPUT"); + supported_ = false; } - const void* p_perm = tensor_map.at(idx_perm).data; - auto data_length = tensor_map.at(idx_perm).data_length / 4; + const void* p_perm = tensorMap.at(idx_perm).data.data(); + auto data_length = tensorMap.at(idx_perm).data.size() / 4; perm.assign((int32_t*)p_perm, (int32_t*)p_perm + data_length); } else { - auto rank_input = tensor_map.at(idx_in).shape.size(); + auto rank_input = tensorMap.at(idx_in).shape.size(); for (int i = 0; i < rank_input; ++i) { perm.push_back(i); } } - std::get<0>(signature.field_tuple) = op::transpose::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = op::transpose::Output(tensor_map.at(idx_out)); + std::get<0>(signature.field_tuple) = op::transpose::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::transpose::Output(tensorMap.at(idx_out)); std::get<2>(signature.field_tuple) = op::transpose::Perm(perm); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { const uint8_t* p_perm = std::get<2>(signature.field_tuple).storage.data.data(); const int32_t data_length = std::get<2>(signature.field_tuple).storage.data.size() / 4; std::vector perm((uint32_t*)p_perm, (uint32_t*)p_perm + data_length); - return graph->CreateOperation(ConvertAndroidPermToVsi(perm)); + return graph->CreateOperation(convertToVxPerm(perm)); } private: op::transpose::signature signature; }; -class TransposeConv2DCreator : public OpCreator { +class TransposeConv2DCreator final : public OpCreator { public: TransposeConv2DCreator(const std::vector& inputs, - const std::vector& outputs, const TensorMap& tensor_map, - const ScalarMap& scalar_map) { + const std::vector& outputs, const TensorMap& tensorMap, + const ScalarMap& scalarMap) + : OpCreator(ANEURALNETWORKS_TRANSPOSE_CONV_2D, inputs, outputs) { if ((inputs.size() != 9 && inputs.size() != 11) || outputs.size() != 1) { - std::cout << "Error: TransposeConv2D gets invalid number of operands" << std::endl; - support_state_ = false; + LOGE("TransposeConv2DCreator: Invalid number of operands"); + supported_ = false; } - type_ = ANEURALNETWORKS_TRANSPOSE_CONV_2D; - inputs_ = inputs; - outputs_ = outputs; + uint32_t idx_in = inputs[0]; uint32_t idx_kernel = inputs[1]; uint32_t idx_bias = inputs[2]; @@ -4270,10 +4390,15 @@ class TransposeConv2DCreator : public OpCreator { int32_t padding_code = 0; bool layout = false; // default to CWHN(false), true implies WHCN. - auto bias_type = tensor_map.at(idx_bias).dtype; + auto bias_type = tensorMap.at(idx_bias).dtype; if (bias_type == slang::type::data_type::kFP16) { - std::cout << "Error: F16 bias is not support in deconv" << std::endl; - support_state_ = false; + LOGI("TransposeConv2DCreator: Cannot support f16 bias"); + supported_ = false; + } + auto kernel = tensorMap.at(idx_kernel); + if (kernel.attr != slang::type::tensor_attr::kCONSTANT) { + LOGI("TransposeConv2DCreator: Cannot support non-const kernel"); + supported_ = false; } if (inputs.size() == 9) { // implies implicit padding @@ -4283,25 +4408,47 @@ class TransposeConv2DCreator : public OpCreator { idx_stride_height = inputs[6]; idx_act = inputs[7]; idx_layout = inputs[8]; - auto output_shape_attr = tensor_map.at(idx_output_shape).attr; + auto output_shape_attr = tensorMap.at(idx_output_shape).attr; if (output_shape_attr != slang::type::tensor_attr::kCONSTANT) { - std::cout << "Error: Output_shape tensor as INPUT is not supported in " - "TransposeConv2D" - << std::endl; - support_state_ = false; + LOGI("TransposeConv2DCreator: Cannot support output_shape tensor as INPUT"); + supported_ = false; } - const uint8_t* p_layout = scalar_map.at(idx_layout).data.data(); + const uint8_t* p_layout = scalarMap.at(idx_layout).data.data(); layout = *(bool*)p_layout; - const void* p_output_shape = tensor_map.at(idx_output_shape).data; - if (layout) { + const void* p_output_shape = tensorMap.at(idx_output_shape).data.data(); + if (layout) { // output_shape is storaged as WHCN output_shape = {*((int32_t*)p_output_shape + 3), *((int32_t*)p_output_shape + 2), *((int32_t*)p_output_shape + 1), *(int32_t*)p_output_shape}; } else { output_shape = {*((int32_t*)p_output_shape + 2), *((int32_t*)p_output_shape + 1), *((int32_t*)p_output_shape + 3), *(int32_t*)p_output_shape}; } - const uint8_t* p_code = scalar_map.at(idx_padding_code).data.data(); + const uint8_t* p_code = scalarMap.at(idx_padding_code).data.data(); padding_code = *(int32_t*)p_code; + + auto ksize = tensorMap.at(idx_kernel).shape; + uint32_t ksize_w = ksize[2]; + uint32_t ksize_h = ksize[1]; + uint32_t input_w = *(bool*)p_layout ? tensorMap.at(idx_in).shape[3] + : tensorMap.at(idx_in).shape[2]; + uint32_t input_h = *(bool*)p_layout ? tensorMap.at(idx_in).shape[2] + : tensorMap.at(idx_in).shape[1]; + uint32_t output_w = output_shape[0]; + uint32_t output_h = output_shape[1]; + uint32_t stride_w = stride[0]; + uint32_t stride_h = stride[1]; + int32_t pad_left_inter = + static_cast(ksize_w + stride_w * (input_w - 1) - output_w); + int32_t pad_top_inter = + static_cast(ksize_h + stride_h * (input_h - 1) - output_h); + auto bias = tensorMap.at(idx_bias).data; + bool null_bias = bias.data() == nullptr; + if ((pad_left_inter < 0 || pad_top_inter < 0) && + padding_code == ANEURALNETWORKS_PADDING_SAME && !null_bias && + ksize != std::vector{32, 3, 3, 64}) { + LOGI("TransposeConv2DCreator: Cannot support negative pad_infer in SAME mode"); + supported_ = false; + } } else { // implies explicit padding idx_pad_left = inputs[3]; @@ -4313,38 +4460,37 @@ class TransposeConv2DCreator : public OpCreator { idx_act = inputs[9]; idx_layout = inputs[10]; - const uint8_t* p_layout = scalar_map.at(idx_layout).data.data(); + const uint8_t* p_layout = scalarMap.at(idx_layout).data.data(); layout = *(bool*)p_layout; - const uint8_t* p_left = scalar_map.at(idx_pad_left).data.data(); - const uint8_t* p_right = scalar_map.at(idx_pad_right).data.data(); - const uint8_t* p_top = scalar_map.at(idx_pad_top).data.data(); - const uint8_t* p_bottom = scalar_map.at(idx_pad_bottom).data.data(); + const uint8_t* p_left = scalarMap.at(idx_pad_left).data.data(); + const uint8_t* p_right = scalarMap.at(idx_pad_right).data.data(); + const uint8_t* p_top = scalarMap.at(idx_pad_top).data.data(); + const uint8_t* p_bottom = scalarMap.at(idx_pad_bottom).data.data(); pad = {*(int32_t*)p_left, *(int32_t*)p_right, *(int32_t*)p_top, *(int32_t*)p_bottom}; } - const uint8_t* p_stride_width = scalar_map.at(idx_stride_width).data.data(); - const uint8_t* p_stride_height = scalar_map.at(idx_stride_height).data.data(); + const uint8_t* p_stride_width = scalarMap.at(idx_stride_width).data.data(); + const uint8_t* p_stride_height = scalarMap.at(idx_stride_height).data.data(); stride = {*(int32_t*)p_stride_width, *(int32_t*)p_stride_height}; - std::get<0>(signature.field_tuple) = op::transpose_conv2d::Input(tensor_map.at(idx_in)); - std::get<1>(signature.field_tuple) = - op::transpose_conv2d::Kernel(tensor_map.at(idx_kernel)); - auto kernel_qtype = tensor_map.at(idx_kernel).qtype; - auto bias = tensor_map.at(idx_bias); + std::get<0>(signature.field_tuple) = op::transpose_conv2d::Input(tensorMap.at(idx_in)); + std::get<1>(signature.field_tuple) = op::transpose_conv2d::Kernel(tensorMap.at(idx_kernel)); + auto kernel_qtype = tensorMap.at(idx_kernel).qtype; + auto bias = tensorMap.at(idx_bias); bias.qtype = kernel_qtype; std::get<2>(signature.field_tuple) = op::transpose_conv2d::Bias(bias); - std::get<3>(signature.field_tuple) = op::transpose_conv2d::Output(tensor_map.at(idx_out)); + std::get<3>(signature.field_tuple) = op::transpose_conv2d::Output(tensorMap.at(idx_out)); std::get<4>(signature.field_tuple) = op::transpose_conv2d::Stride(stride); std::get<5>(signature.field_tuple) = op::transpose_conv2d::OutputPadding(output_padding); std::get<6>(signature.field_tuple) = op::transpose_conv2d::PadType(padding_code); std::get<7>(signature.field_tuple) = op::transpose_conv2d::Pad(pad); std::get<8>(signature.field_tuple) = op::transpose_conv2d::OutputShape(output_shape); std::get<9>(signature.field_tuple) = - op::transpose_conv2d::Activation(scalar_map.at(idx_act)); + op::transpose_conv2d::Activation(scalarMap.at(idx_act)); std::get<10>(signature.field_tuple) = op::transpose_conv2d::Layout(layout); } - bool Check() final { return slang::functional::check_signature(signature); } - std::shared_ptr Lowering(std::shared_ptr graph) final { + bool checkSupported() override { return slang::functional::check_signature(signature); } + std::shared_ptr Lowering(std::shared_ptr graph) override { const uint8_t* p_stride = std::get<4>(signature.field_tuple).storage.data.data(); const uint8_t* p_padding_code = std::get<6>(signature.field_tuple).storage.data.data(); const uint8_t* p_pad = std::get<7>(signature.field_tuple).storage.data.data(); @@ -4352,13 +4498,13 @@ class TransposeConv2DCreator : public OpCreator { const uint8_t* p_layout = std::get<10>(signature.field_tuple).storage.data.data(); int32_t oc_count = 0; // Not necessary param, can be given 0 - auto pad_type = AndroidPadTypeToVsiPadType(*(int32_t*)p_padding_code); + auto pad_type = convertToVxPadType(*(int32_t*)p_padding_code); uint32_t ksize_w = std::get<1>(signature.field_tuple).shape()[2]; uint32_t ksize_h = std::get<1>(signature.field_tuple).shape()[1]; std::array ksize = {ksize_w, ksize_h}; std::array stride = {*((uint32_t*)p_stride), *((uint32_t*)p_stride + 1)}; std::array output_padding = {0, 0}; - auto layout = AndroidLayoutToVsiLayout(*(bool*)p_layout); + auto layout = convertToVxLayout(*(bool*)p_layout); std::array pad = {0, 0, 0, 0}; if (pad_type != tim::vx::PadType::AUTO) { @@ -4392,7 +4538,5 @@ class TransposeConv2DCreator : public OpCreator { op::transpose_conv2d::signature signature; }; -} // namespace sl -} // namespace android -} // namespace vsi +} // namespace vsi::android::sl #endif \ No newline at end of file diff --git a/src/Types.h b/src/Types.h index 85d2c5d..6aa71cd 100644 --- a/src/Types.h +++ b/src/Types.h @@ -1,6 +1,6 @@ /**************************************************************************** * - * Copyright (c) 2022 Vivante Corporation + * Copyright (c) 2024 Vivante Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -21,44 +21,27 @@ * DEALINGS IN THE SOFTWARE. * *****************************************************************************/ + #ifndef VSI_ANDROID_SL_TYPE_H #define VSI_ANDROID_SL_TYPE_H +#include + +#include #include -#include #include "tim/vx/tensor.h" #include "tim/vx/types.h" -namespace vsi { -namespace android { -namespace sl { - -enum class IOType { INPUT, OUTPUT }; -struct Operand { - Operand(ANeuralNetworksOperandType type) : type_info(type) {} +namespace vsi::android::sl { - ANeuralNetworksOperandType type_info; +using Shape = std::vector; - // symmetric per-channel quantized parameters - std::vector scales; - uint32_t channel_dim; +using Clock = std::chrono::steady_clock; +using Duration = std::chrono::nanoseconds; +using TimePoint = std::chrono::time_point; - bool is_small_value{false}; - // store small value by copy - std::vector small_value; - // sotre large value by reference - const void* buffer{nullptr}; - size_t length{0}; -}; - -struct Operation { - ANeuralNetworksOperationType type; - std::vector inputs; - std::vector outputs; -}; - -enum class MemoryType { FD, DESC, AHB }; +enum class IOType { NONE, INPUT, OUTPUT }; /** * Operand types. @@ -221,97 +204,6 @@ enum class OperandType { TENSOR_OEM_BYTE = 10001, }; -/** - * The capabilities of a driver. - * - * This represents performance of non-extension operations. - */ -struct Capabilities { - /** - * Performance information for the reference workload. - * - * Used by a driver to report its performance characteristics. - */ - struct PerformanceInfo { - /** - * Ratio of the time taken by the driver to execute the - * workload compared to the time the CPU would take for the - * same workload. A lower number is better. - */ - float execTime = 0; - - /** - * Ratio of the energy used by the driver compared to what - * the CPU would use for doing the same workload. A lower number - * is better. - */ - float powerUsage = 0; - }; - - /** - * Driver performance when operating on a particular data type. - * In the case of float32 data, this is used when the calculations - * are not relaxed. - */ - struct OperandPerformance { - OperandType type{}; - PerformanceInfo info; - }; - - class OperandPerformanceTable { - public: - // static Result create( - // std::vector operandPerformances); - - // PerformanceInfo lookup(OperandType type) const; - // const std::vector& asVector() const; - - // private: - explicit OperandPerformanceTable(std::vector operandPerformances) - : Sorted(std::move(operandPerformances)) {} - std::vector Sorted; - }; - - /** - * Driver performance when operating on float32 data but performing - * calculations with range and/or precision as low as that of the IEEE - * 754 16-bit floating-point format. - */ - PerformanceInfo relaxedFloat32toFloat16PerformanceScalar; - PerformanceInfo relaxedFloat32toFloat16PerformanceTensor; - - /** - * Performance by operand type. Must be sorted by OperandType. - * - * If a particular {@link OperandType} is not present in operandPerformance, - * its performance is treated as - * { .execTime = FLT_MAX, .powerUsage = FLT_MAX }. - * - * Performance does not apply to {@link OperandType::SUBGRAPH}, and a driver - * must not report operand performance for {@link OperandType::SUBGRAPH}. - */ - OperandPerformanceTable operandPerformance; - - /** - * Performance of an {@link OperationType::IF} operation is the sum of - * {@link Capabilities::ifPerformance} and the mean of performance for the - * two branch subgraphs, where performance for a subgraph is the sum of the - * performance of all operations within the subgraph. - */ - PerformanceInfo ifPerformance; - - /** - * Performance of a {@link OperationType::WHILE} operation is the sum of - * {@link Capabilities::whilePerformance}, performance for the condition - * subgraph and performance for the body subgraph, where performance for a - * subgraph is the sum of the performance of all operations within the - * subgraph. - */ - PerformanceInfo whilePerformance; -}; - -} // namespace sl -} // namespace android -} // namespace vsi +} // namespace vsi::android::sl #endif \ No newline at end of file diff --git a/src/Utils.cpp b/src/Utils.cpp index 1e218d7..c4b617f 100644 --- a/src/Utils.cpp +++ b/src/Utils.cpp @@ -1,6 +1,6 @@ /**************************************************************************** * - * Copyright (c) 2022 Vivante Corporation + * Copyright (c) 2024 Vivante Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -21,12 +21,34 @@ * DEALINGS IN THE SOFTWARE. * *****************************************************************************/ + #include "Utils.h" -#include -namespace vsi { -namespace android { -namespace sl { +namespace vsi::android::sl { + +size_t getDtypeSize(slang::type::data_type type) { + // NOLINTBEGIN(*-magic-numbers) + switch (type) { + case slang::type::data_type::kINT8: + case slang::type::data_type::kUINT8: + case slang::type::data_type::kBOOL8: + return 1; + case slang::type::data_type::kINT16: + case slang::type::data_type::kUINT16: + case slang::type::data_type::kFP16: + case slang::type::data_type::kBF16: + return 2; + case slang::type::data_type::kINT32: + case slang::type::data_type::kUINT32: + case slang::type::data_type::kFP32: + return 4; + case slang::type::data_type::kINT64: + return 8; + default: + return 0; + } + // NOLINTEND(*-magic-numbers) +} tim::vx::DataType ToTvxDataType(slang::type::data_type type) { switch (type) { @@ -49,9 +71,9 @@ tim::vx::DataType ToTvxDataType(slang::type::data_type type) { case slang::type::data_type::kBOOL8: return tim::vx::DataType::BOOL8; default: - std::cout << "Unknown data type in tim-vx." << std::endl; + LOGW("Unsupported slang dtype: %d", type); + return tim::vx::DataType::UNKNOWN; } - return tim::vx::DataType::UNKNOWN; } tim::vx::QuantType ToTvxQuantType(slang::type::quant_type type) { @@ -66,7 +88,6 @@ tim::vx::QuantType ToTvxQuantType(slang::type::quant_type type) { default: return tim::vx::QuantType::NONE; } - return tim::vx::QuantType::NONE; } slang::type::data_type MapDataType(int32_t type) { @@ -95,10 +116,12 @@ slang::type::data_type MapDataType(int32_t type) { case ANEURALNETWORKS_TENSOR_FLOAT16: case ANEURALNETWORKS_FLOAT16: return slang::type::data_type::kFP16; + case ANEURALNETWORKS_MODEL: + return slang::type::data_type::kMODELVALUE; default: - std::cout << "Unknown data type from nnapi." << std::endl; + LOGW("Unsupported NNAPI dtype: %d", type); + return slang::type::data_type::kINVALID; } - return slang::type::data_type::kINVALID; } slang::type::quant_type MapQuantType(int32_t type) { @@ -118,250 +141,32 @@ slang::type::quant_type MapQuantType(int32_t type) { return slang::type::quant_type::kINVALID; } -void PrintVXSpec(const tim::vx::TensorSpec& spec) { - std::cout << "-------------------------------------------" << std::endl; - std::cout << "Timvx tensor datatype: "; - switch ((int32_t)spec.datatype_) { - case 1: - std::cout << "INT8" << std::endl; - break; - case 2: - std::cout << "UINT8" << std::endl; - break; - case 3: - std::cout << "INT16" << std::endl; - break; - case 4: - std::cout << "UINT16" << std::endl; - break; - case 5: - std::cout << "INT32" << std::endl; - break; - case 6: - std::cout << "UINT32" << std::endl; - break; - case 7: - std::cout << "INT64" << std::endl; - break; - case 8: - std::cout << "FLOAT16" << std::endl; - break; - case 9: - std::cout << "FLOAT32" << std::endl; - break; - case 10: - std::cout << "BOOL8" << std::endl; - break; - default: - std::cout << "Not support INT64 and other type"; - break; - } - std::cout << "Shape: "; - for (auto it = spec.shape_.begin(); it != spec.shape_.end(); it++) { - std::cout << *it << ","; +Shape combineShape(const Shape& lhs, const Shape& rhs) { + if (rhs.empty()) { + return lhs; } - std::cout << std::endl; - std::cout << "Attr: "; - switch ((int32_t)spec.attr_) { - case 1: - std::cout << "CONSTANT" << std::endl; - break; - case 2: - std::cout << "TRANSIENT" << std::endl; - break; - case 4: - std::cout << "VARIABLE" << std::endl; - break; - case 8: - std::cout << "INPUT" << std::endl; - break; - case 16: - std::cout << "OUTPUT" << std::endl; - break; - default: - std::cout << "Not support attr" << std::endl; - break; - } - std::cout << "QuantType: "; - switch ((int32_t)spec.quantization_.Type()) { - case 0: - std::cout << "NONE" << std::endl; - break; - case 1: - std::cout << "ASYMMETRIC" << std::endl; - break; - case 2: - std::cout << "SYMMETRIC_PER_CHANNEL" << std::endl; - break; - case 3: - std::cout << "DYNAMIC_FIXED_POINT" << std::endl; - break; - case 16: - std::cout << "OUTPUT" << std::endl; - break; - default: - std::cout << "Not support quantization type" << std::endl; - break; - } - if ((int32_t)spec.quantization_.Type() != 0) { - std::cout << "Channel_dim: " << spec.quantization_.ChannelDim() << std::endl; - std::cout << "Scales: "; - PrintVector(spec.quantization_.Scales()); - std::cout << "Zero_points: "; - PrintVector(spec.quantization_.ZeroPoints()); - std::cout << "Fl: " << spec.quantization_.Fl() << std::endl; - } -} -void PrintTensorStorage(slang::type::tensor_storage s) { - std::cout << "-------------------------------------------" << std::endl; - std::cout << "Tensor storage datatype: "; - switch ((int32_t)s.dtype) { - case 0: - std::cout << "kTF32" << std::endl; - break; - case 1: - std::cout << "kFP32" << std::endl; - break; - case 2: - std::cout << "kFP16" << std::endl; - break; - case 3: - std::cout << "kBF16" << std::endl; - break; - case 4: - std::cout << "kINT64" << std::endl; - break; - case 5: - std::cout << "kINT32" << std::endl; - break; - case 6: - std::cout << "kUINT32" << std::endl; - break; - case 7: - std::cout << "kINT16" << std::endl; - break; - case 8: - std::cout << "kUINT16" << std::endl; - break; - case 9: - std::cout << "kINT8" << std::endl; - break; - case 10: - std::cout << "kUINT8" << std::endl; - break; - case 11: - std::cout << "kBOOL8" << std::endl; - break; - default: - std::cout << "Not support tensor storage type" << std::endl; - break; + if (lhs.empty()) { + return rhs; } - std::cout << "data_length: " << s.data_length << std::endl; - std::cout << "shape: "; // original layout nhwc/nchw(not reverse) - for (auto it = s.shape.begin(); it != s.shape.end(); it++) { - std::cout << *it << ","; - } - std::cout << std::endl; - std::cout << "attr: "; - switch ((int32_t)s.attr) { - case 0: - std::cout << "kVARIABLE" << std::endl; - break; - case 1: - std::cout << "kCONSTANT" << std::endl; - break; - default: - std::cout << "Not support tensor torage attr" << std::endl; - break; - } - std::cout << "quant_type: "; - switch ((int32_t)s.qtype) { - case 0: - std::cout << "kNONE" << std::endl; - break; - case 1: - std::cout << "kASYMM" << std::endl; - break; - case 2: - std::cout << "kSYMM" << std::endl; - break; - case 3: - std::cout << "kSYMM_PCQ" << std::endl; - break; - case 4: - std::cout << "kDFP" << std::endl; - break; - default: - std::cout << "Not support tensor torage qtype" << std::endl; - break; - } - std::cout << "scale: " << s.scale << std::endl; - std::cout << "zero_point: " << s.zero_point << std::endl; - if (s.qtype == slang::type::quant_type::kSYMM_PCQ) { - std::cout << "channel_dim: " << s.channel_dim << std::endl; - std::cout << "per_channel_scales: "; - PrintVector(s.per_channel_scales); - std::cout << "per_channel_zero_points: "; - PrintVector(s.per_channel_zero_points); + + if (lhs.size() != rhs.size()) { + LOGE("%s incompatible ranks: lhs (%zu) vs. rhs (%zu)", __func__, lhs.size(), rhs.size()); + return {}; } -} -void PrintScalarStorage(slang::type::scalar_storage s) { - std::cout << "-------------------------------------------" << std::endl; - std::cout << "data_length: " << s.data.size() << std::endl; - std::cout << "Scalar storage datatype: "; - switch ((int32_t)s.dtype) { - case 0: - std::cout << "kTF32" << std::endl; - break; - case 1: - std::cout << "kFP32" << std::endl; - PrintScalarStorageData(s); - break; - case 2: - std::cout << "kFP16" << std::endl; - break; - case 3: - std::cout << "kBF16" << std::endl; - break; - case 4: - std::cout << "kINT64" << std::endl; - PrintScalarStorageData(s); - break; - case 5: - std::cout << "kINT32" << std::endl; - PrintScalarStorageData(s); - break; - case 6: - std::cout << "kUINT32" << std::endl; - PrintScalarStorageData(s); - break; - case 7: - std::cout << "kINT16" << std::endl; - PrintScalarStorageData(s); - break; - case 8: - std::cout << "kUINT16" << std::endl; - PrintScalarStorageData(s); - break; - case 9: - std::cout << "kINT8" << std::endl; - PrintScalarStorageData(s); - break; - case 10: - std::cout << "kUINT8" << std::endl; - PrintScalarStorageData(s); - break; - case 11: - std::cout << "kBOOL8" << std::endl; - PrintScalarStorageData(s); - break; - default: - std::cout << "Not support scalar storage type" << std::endl; - break; + Shape combined = lhs; + for (size_t i = 0; i < lhs.size(); i++) { + if (lhs[i] == 0) { + combined[i] = rhs[i]; + } else if (rhs[i] != 0 && lhs[i] != rhs[i]) { + LOGE("%s incompatible dim length at axis %zu: lhs (%u) vs. rhs (%u)", __func__, i, + lhs[i], rhs[i]); + return {}; + } } + + return combined; } -} // namespace sl -} // namespace android -} // namespace vsi \ No newline at end of file + +} // namespace vsi::android::sl \ No newline at end of file diff --git a/src/Utils.h b/src/Utils.h index 2cc6ec3..183b6a7 100644 --- a/src/Utils.h +++ b/src/Utils.h @@ -1,6 +1,6 @@ /**************************************************************************** * - * Copyright (c) 2022 Vivante Corporation + * Copyright (c) 2024 Vivante Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -21,8 +21,10 @@ * DEALINGS IN THE SOFTWARE. * *****************************************************************************/ + #ifndef VSI_ANDROID_SL_UTILS_H_ #define VSI_ANDROID_SL_UTILS_H_ + #include #include @@ -30,18 +32,28 @@ #include "slang/type_system.h" #include "tim/vx/types.h" -namespace vsi { -namespace android { -namespace sl { +namespace vsi::android::sl { #define LOG_TAG "NNAPI-VSI-SL" + +#if NDEBUG +#define LOGV(...) static_assert(true, "NOOP") +#else #define LOGV(...) __android_log_print(ANDROID_LOG_VERBOSE, LOG_TAG, __VA_ARGS__) +#endif + #define LOGD(...) __android_log_print(ANDROID_LOG_DEBUG, LOG_TAG, __VA_ARGS__) #define LOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__) #define LOGW(...) __android_log_print(ANDROID_LOG_WARN, LOG_TAG, __VA_ARGS__) #define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__) #define LOGF(...) __android_log_print(ANDROID_LOG_FATAL, LOG_TAG, __VA_ARGS__) +constexpr size_t alignSize(size_t size, size_t alignment) { + return (size + (alignment - 1)) & ~(alignment - 1); +} + +size_t getDtypeSize(slang::type::data_type type); + tim::vx::DataType ToTvxDataType(slang::type::data_type type); tim::vx::QuantType ToTvxQuantType(slang::type::quant_type type); @@ -50,47 +62,8 @@ slang::type::data_type MapDataType(int32_t type); slang::type::quant_type MapQuantType(int32_t type); -void PrintVXSpec(const tim::vx::TensorSpec& spec); - -void PrintTensorStorage(slang::type::tensor_storage s); - -void PrintScalarStorage(slang::type::scalar_storage s); - -template -uint32_t GetTypeSize() { - return sizeof(T); -} - -template -void PrintVector(const std::vector& vector) { - std::cout << "-------------------------------------------" << std::endl; - for (auto it = vector.begin(); it != vector.end(); it++) { - std::cout << *it << ","; - } - std::cout << std::endl; -} - -template -void PrintArray(const std::array& array) { - std::cout << "-------------------------------------------" << std::endl; - for (auto it = array.begin(); it != array.end(); it++) { - std::cout << *it << ","; - } - std::cout << std::endl; -} - -template -void PrintScalarStorageData(slang::type::scalar_storage s) { - std::cout << "scalar data: "; - int length = s.data.size() / GetTypeSize(); - for (int i = 0; i < length; ++i) { - std::cout << *((T*)s.data.data() + i) << ","; - } - std::cout << "real length: " << length << std::endl; // Real length of this type data -} +Shape combineShape(const Shape& lhs, const Shape& rhs); -} // namespace sl -} // namespace android -} // namespace vsi +} // namespace vsi::android::sl #endif \ No newline at end of file diff --git a/src/VsiDevice.h b/src/VsiDevice.h deleted file mode 100644 index efc08d4..0000000 --- a/src/VsiDevice.h +++ /dev/null @@ -1,95 +0,0 @@ -/**************************************************************************** - * - * Copyright (c) 2022 Vivante Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - *****************************************************************************/ -#ifndef VSI_ANDROID_SL_VSI_DEVICE_H -#define VSI_ANDROID_SL_VSI_DEVICE_H - -#include -#include -#include - -#include "tim/vx/platform/platform.h" -#include -#include "Types.h" - -namespace vsi { -namespace android { -namespace sl { - -class VsiDevice { - public: - VsiDevice(std::shared_ptr device, std::string name) - : device_(device), name_(name) {} - const std::string& GetName() const { return name_; } - const std::string& GetVersion() const { return version_; } - const int64_t& GetFeatureLevel() const { return feature_level_; } - std::shared_ptr Device() const { return device_; } - const Capabilities& getCapabilities() const { return capabilities_; } - Capabilities createNpuCapabilities() { - constexpr Capabilities::PerformanceInfo PerfInfo = {.execTime = 0.1f, .powerUsage = 0.1f}; - constexpr OperandType OperandsTypes[] = { - OperandType::FLOAT32, - OperandType::INT32, - OperandType::UINT32, - OperandType::TENSOR_FLOAT32, - OperandType::TENSOR_INT32, - OperandType::TENSOR_QUANT8_ASYMM, - OperandType::BOOL, - OperandType::TENSOR_QUANT16_SYMM, - OperandType::TENSOR_FLOAT16, - OperandType::TENSOR_BOOL8, - OperandType::FLOAT16, - OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL, - OperandType::TENSOR_QUANT16_ASYMM, - OperandType::TENSOR_QUANT8_SYMM, - OperandType::TENSOR_QUANT8_ASYMM_SIGNED, - }; - - std::vector operandPerformance; - operandPerformance.reserve(std::size(OperandsTypes)); - std::transform(std::begin(OperandsTypes), std::end(OperandsTypes), - std::back_inserter(operandPerformance), [PerfInfo](OperandType op) { - return Capabilities::OperandPerformance{.type = op, .info = PerfInfo}; - }); - auto table = Capabilities::OperandPerformanceTable(operandPerformance); - - return {.relaxedFloat32toFloat16PerformanceScalar = PerfInfo, - .relaxedFloat32toFloat16PerformanceTensor = PerfInfo, - .operandPerformance = table, - .ifPerformance = {.execTime = __FLT_MAX__, .powerUsage = __FLT_MAX__}, - .whilePerformance = {.execTime = __FLT_MAX__, .powerUsage = __FLT_MAX__}}; - } - - private: - const std::string name_; - const std::string version_{"0.0.1"}; - const int64_t feature_level_{1000006}; //feature level 7 - std::shared_ptr device_; - const Capabilities capabilities_ = createNpuCapabilities(); -}; - -} // namespace sl -} // namespace android -} // namespace vsi - -#endif \ No newline at end of file diff --git a/src/VsiNeuralNetworksSupportLibraryimpl.cpp b/src/VsiNeuralNetworksSupportLibraryimpl.cpp index 0123cda..863eaee 100644 --- a/src/VsiNeuralNetworksSupportLibraryimpl.cpp +++ b/src/VsiNeuralNetworksSupportLibraryimpl.cpp @@ -15,7 +15,7 @@ */ /**************************************************************************** * - * Copyright (c) 2022 Vivante Corporation + * Copyright (c) 2024 Vivante Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software" = @@ -36,126 +36,106 @@ * DEALINGS IN THE SOFTWARE. * *****************************************************************************/ -#include -#include -#include -#include +#include + #include #include #include #include "Compilation.h" +#include "Device.h" #include "DeviceManager.h" +#include "Event.h" #include "Execution.h" #include "Memory.h" +#include "MemoryDesc.h" #include "Model.h" -// #include "Event.h" -#include #include "NeuralNetworksSupportLibraryImpl.h" -#include "VsiDevice.h" - - -namespace operation_while { +#include "Utils.h" -constexpr auto kLoopTimeoutDefault = std::chrono::seconds{2}; -constexpr auto kLoopTimeoutMaximum = std::chrono::seconds{15}; - -constexpr uint32_t kCondModelOperand = 0; -constexpr uint32_t kBodyModelOperand = 1; -constexpr uint32_t kFirstInput = 2; - -// See ANeuralNetworksExecution_setLoopTimeout. -constexpr uint64_t kTimeoutNsDefault = - std::chrono::duration_cast(kLoopTimeoutDefault).count(); -constexpr uint64_t kTimeoutNsMaximum = - std::chrono::duration_cast(kLoopTimeoutMaximum).count(); - -} // namespace operation_while - -// using namespace android::nn; using namespace vsi::android::sl; -#define TAG_NAME "NNAPI-SL" int ANeuralNetworks_getDeviceCount(uint32_t* numDevices) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworks_getDeviceCount is called "); - *numDevices = DeviceManager::Instance()->GetDevices().size(); + LOGV(__func__); + if (numDevices == nullptr) { + LOGE("%s passed a nullptr", __func__); + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + *numDevices = DeviceManager::get()->getNumDevices(); return ANEURALNETWORKS_NO_ERROR; } int ANeuralNetworks_getDevice(uint32_t devIndex, ANeuralNetworksDevice** device) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, "=====ANeuralNetworks_getDevice is called "); + LOGV(__func__); if (device == nullptr) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworks_getDevice passed a nullptr"); + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - auto devices = DeviceManager::Instance()->GetDevices(); - *device = reinterpret_cast(devices.at(devIndex).get()); + const auto& devices = DeviceManager::get()->getDevices(); + if (devIndex >= devices.size()) { + LOGE("%s passed an invalid device index", __func__); + return ANEURALNETWORKS_BAD_DATA; + } + + *device = reinterpret_cast(devices[devIndex].get()); return ANEURALNETWORKS_NO_ERROR; } int ANeuralNetworksDevice_getName(const ANeuralNetworksDevice* device, const char** name) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksDevice_getName is called "); + LOGV(__func__); if (device == nullptr || name == nullptr) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksDevice_getName passed a nullptr"); + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - const VsiDevice* d = reinterpret_cast(device); - *name = d->GetName().c_str(); + const auto* dev = reinterpret_cast(device); + *name = dev->getName().data(); return ANEURALNETWORKS_NO_ERROR; } int ANeuralNetworksDevice_getVersion(const ANeuralNetworksDevice* device, const char** version) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksDevice_getVersion is called "); + LOGV(__func__); if (device == nullptr || version == nullptr) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksDevice_getVersion passed a nullptr"); + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - const VsiDevice* d = reinterpret_cast(device); - *version = d->GetVersion().c_str(); + + const auto* dev = reinterpret_cast(device); + *version = dev->getVersion().data(); return ANEURALNETWORKS_NO_ERROR; } int ANeuralNetworksDevice_getType(const ANeuralNetworksDevice* device, int32_t* type) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksDevice_getType is called "); - if (!device) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksDevice_getType passed a nullptr"); + LOGV(__func__); + if (device == nullptr || type == nullptr) { + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } + *type = ANEURALNETWORKS_DEVICE_ACCELERATOR; return ANEURALNETWORKS_NO_ERROR; } int ANeuralNetworksDevice_getFeatureLevel(const ANeuralNetworksDevice* device, int64_t* featureLevel) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksDevice_getFeatureLevel is called "); - if (device == nullptr) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksDevice_getFeatureLevel passed a nullptr"); + LOGV(__func__); + if (device == nullptr || featureLevel == nullptr) { + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - const VsiDevice* d = reinterpret_cast(device); - *featureLevel = d->GetFeatureLevel(); + + const auto* dev = reinterpret_cast(device); + *featureLevel = dev->getFeatureLevel(); return ANEURALNETWORKS_NO_ERROR; } int ANeuralNetworksDevice_wait(const ANeuralNetworksDevice* device) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksDevice_wait is called "); + LOGV(__func__); if (device == nullptr) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksDevice_wait passed a nullptr"); + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } @@ -165,701 +145,728 @@ int ANeuralNetworksDevice_wait(const ANeuralNetworksDevice* device) { int ANeuralNetworksModel_getSupportedOperationsForDevices( const ANeuralNetworksModel* model, const ANeuralNetworksDevice* const* devices, uint32_t numDevices, bool* supportedOps) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksModel_getSupportedOperationsForDevices is " - "called "); - if (!model || !devices || !supportedOps) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksModel_getSupportedOperationsForDevices get nullptr"); - return ANEURALNETWORKS_UNEXPECTED_NULL; - } - const Model* m = reinterpret_cast(model); - return m->GetSupportedOperations(supportedOps); -} + LOGV(__func__); -int ANeuralNetworksCompilation_createForDevices(ANeuralNetworksModel* model, - const ANeuralNetworksDevice* const* devices, - uint32_t numDevices, - ANeuralNetworksCompilation** compilation) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksCompilation_createForDevices is called "); - if (!model || !devices || !compilation) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksCompilation_createForDevices get nullptr"); + if (model == nullptr || devices == nullptr || supportedOps == nullptr) { + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - Model* m = reinterpret_cast(model); - // pointer "devices" may be released after this call, so we must copy its - // content here. - const VsiDevice* const* vsiDevices = reinterpret_cast(devices); - std::vector vsiDeviceVec; - for (uint32_t i = 0; i < numDevices; ++i) vsiDeviceVec.push_back(vsiDevices[i]); - - Compilation* c = new Compilation(m, vsiDeviceVec); - *compilation = reinterpret_cast(c); - return ANEURALNETWORKS_NO_ERROR; -} -int ANeuralNetworksExecution_compute(ANeuralNetworksExecution* execution) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksExecution_compute is called "); - if (!execution) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksExecution_compute get nullptr"); - return ANEURALNETWORKS_UNEXPECTED_NULL; + if (numDevices == 0) { + LOGE("%s passed an empty device list", __func__); + return ANEURALNETWORKS_BAD_DATA; } - Execution* e = reinterpret_cast(execution); - return e->Compute(); -} - -int ANeuralNetworksExecution_setMeasureTiming(ANeuralNetworksExecution* execution, bool measure) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksExecution_setMeasureTiming is called "); - return ANEURALNETWORKS_NO_ERROR; -} - -int ANeuralNetworksExecution_getDuration(const ANeuralNetworksExecution* execution, - int32_t durationCode, uint64_t* duration) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksExecution_getDuration is called "); - - return ANEURALNETWORKS_NO_ERROR; + const auto* m = reinterpret_cast(model); + return m->getSupportedOperations(supportedOps); } int ANeuralNetworksBurst_create(ANeuralNetworksCompilation* compilation, ANeuralNetworksBurst** burst) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksBurst_create is called "); + LOGV(__func__); return ANEURALNETWORKS_NO_ERROR; } void ANeuralNetworksBurst_free(ANeuralNetworksBurst* burst) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, "=====ANeuralNetworksBurst_free is called "); + LOGV(__func__); } int ANeuralNetworksExecution_burstCompute(ANeuralNetworksExecution* execution, ANeuralNetworksBurst* burst) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksExecution_burstCompute is called "); + LOGV(__func__); return ANEURALNETWORKS_NO_ERROR; } int ANeuralNetworksMemoryDesc_create(ANeuralNetworksMemoryDesc** desc) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksMemoryDesc_create is called "); - if (desc != nullptr) { - *desc = nullptr; - } - if (!desc) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksMemoryDesc_create passed a nullptr"); + LOGV(__func__); + if (desc == nullptr) { + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - MemoryDesc* mdesc = new MemoryDesc(); - *desc = reinterpret_cast(mdesc); + + auto* memDesc = new MemoryDesc(); + *desc = reinterpret_cast(memDesc); return ANEURALNETWORKS_NO_ERROR; } void ANeuralNetworksMemoryDesc_free(ANeuralNetworksMemoryDesc* desc) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksMemoryDesc_free is called "); - if (!desc) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksMemoryDesc_free passed a nullptr"); + LOGV(__func__); + if (desc == nullptr) { + LOGD("%s passed a nullptr", __func__); return; } - MemoryDesc* mdesc = reinterpret_cast(desc); - delete mdesc; + + auto* memDesc = reinterpret_cast(desc); + delete memDesc; } int ANeuralNetworksMemoryDesc_addInputRole(ANeuralNetworksMemoryDesc* desc, const ANeuralNetworksCompilation* compilation, uint32_t index, float frequency) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksMemoryDesc_addInputRole is called "); - if (!desc || !compilation) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksMemoryDesc_addInputRole passed a nullptr"); + LOGV(__func__); + if (desc == nullptr || compilation == nullptr) { + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - if (frequency <= 0 || frequency > 1) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksMemoryDesc has a invalid frequency"); - return ANEURALNETWORKS_BAD_DATA; - } - MemoryDesc* mdesc = reinterpret_cast(desc); - if (mdesc->IsFinished()) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "This memory descriptor has been finished"); - return ANEURALNETWORKS_BAD_DATA; - } - const Compilation* c = reinterpret_cast(compilation); - auto model = c->GetModel(); - auto tensor_map = model->Tensors(); - int32_t input_id = model->Inputs()[index]; - return mdesc->AddRole(tensor_map, vsi::android::sl::IOType::INPUT, input_id, frequency); + + auto* memDesc = reinterpret_cast(desc); + const auto* c = reinterpret_cast(compilation); + return memDesc->addRole(c, IOType::INPUT, index, frequency); } int ANeuralNetworksMemoryDesc_addOutputRole(ANeuralNetworksMemoryDesc* desc, const ANeuralNetworksCompilation* compilation, uint32_t index, float frequency) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksMemoryDesc_addOutputRole is called "); - if (!desc || !compilation) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksMemoryDesc_addInputRole passed a nullptr"); + LOGV(__func__); + if (desc == nullptr || compilation == nullptr) { + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - if (frequency <= 0 || frequency > 1) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksMemoryDesc has a invalid frequency"); - return ANEURALNETWORKS_BAD_DATA; - } - MemoryDesc* mdesc = reinterpret_cast(desc); - if (mdesc->IsFinished()) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "This memory descriptor has been finished"); - return ANEURALNETWORKS_BAD_DATA; - } - const Compilation* c = reinterpret_cast(compilation); - auto model = c->GetModel(); - auto tensor_map = model->Tensors(); - return mdesc->AddRole(tensor_map, vsi::android::sl::IOType::OUTPUT, index, frequency); + + auto* memDesc = reinterpret_cast(desc); + const auto* c = reinterpret_cast(compilation); + return memDesc->addRole(c, IOType::OUTPUT, index, frequency); } int ANeuralNetworksMemoryDesc_setDimensions(ANeuralNetworksMemoryDesc* desc, uint32_t rank, const uint32_t* dimensions) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksMemoryDesc_setDimensions is called "); - if (!desc || (!dimensions && rank > 0)) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksMemoryDesc_setDimensions passed a nullptr"); + LOGV(__func__); + + if (desc == nullptr || (dimensions == nullptr && rank > 0)) { + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - MemoryDesc* mdesc = reinterpret_cast(desc); - if (mdesc->IsFinished()) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "This memory descriptor has been finished"); - return ANEURALNETWORKS_BAD_DATA; - } + + auto* memDesc = reinterpret_cast(desc); const std::vector shape(dimensions, dimensions + rank); - return mdesc->SetDimensions(shape); + return memDesc->setShape(shape); } int ANeuralNetworksMemoryDesc_finish(ANeuralNetworksMemoryDesc* desc) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksMemoryDesc_finish is called "); - if (!desc) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksMemoryDesc_finish passed a nullptr"); + LOGV(__func__); + if (desc == nullptr) { + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - MemoryDesc* mdesc = reinterpret_cast(desc); - return mdesc->Finish(); + + auto* memDesc = reinterpret_cast(desc); + return memDesc->finish(); } int ANeuralNetworksMemory_createFromDesc(const ANeuralNetworksMemoryDesc* desc, ANeuralNetworksMemory** memory) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksMemory_createFromDesc is called "); - if (!desc || !memory) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksMemory_createFromDesc passed a nullptr"); + LOGV(__func__); + if (desc == nullptr || memory == nullptr) { + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - auto mdesc = reinterpret_cast(desc); - Memory* mem = new Memory(); - auto status = mem->CreateFromDesc(mdesc); - if (status != ANEURALNETWORKS_NO_ERROR) { - return status; - } - *memory = reinterpret_cast(mem); - return ANEURALNETWORKS_NO_ERROR; -} -int ANeuralNetworksMemory_copy(const ANeuralNetworksMemory* src, const ANeuralNetworksMemory* dst) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksMemory_copy is called "); - if (!src || !dst) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, "ANeuralNetworksMemory_copy passed a nullptr"); - return ANEURALNETWORKS_UNEXPECTED_NULL; + const auto* memDesc = reinterpret_cast(desc); + auto* mem = DeviceMemory::create(memDesc); + if (mem == nullptr) { + LOGE("%s failed to create device memory from desc", __func__); + return ANEURALNETWORKS_OP_FAILED; } - Memory* msrc = const_cast(reinterpret_cast(src)); - Memory* mdst = const_cast(reinterpret_cast(dst)); - if (mdst->IsCreateFromDesc() && msrc->IsCreateFromDesc()) { - auto src_rank = msrc->GetDesc()->Shape().size(); - auto dst_rank = mdst->GetDesc()->Shape().size(); - if(src_rank != dst_rank) return ANEURALNETWORKS_BAD_DATA; - } else { - if(msrc->Length() != mdst->Length()) return ANEURALNETWORKS_BAD_DATA; - } - // TODO: if the src is created from ANeuralNetworksMemory_createFromDesc, it must have been used - // as an output in a successful execution, or used as the destination memory in a successful - // ANeuralNetworksMemory_copy. - memcpy(mdst->Data(), msrc->Data(), msrc->Length()); + + *memory = reinterpret_cast(mem); return ANEURALNETWORKS_NO_ERROR; } int ANeuralNetworksMemory_createFromFd(size_t size, int prot, int fd, size_t offset, ANeuralNetworksMemory** memory) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksMemory_createFromFd is called "); - if (!fd || !memory) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksMemory_createFromFd passed a nullptr"); + LOGV(__func__); + + if (memory == nullptr) { + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - Memory* mem = new Memory(); - auto status = mem->CreateFromFd(size, prot, fd, offset); - if (status != ANEURALNETWORKS_NO_ERROR) { - return status; + + auto* mem = FdMemory::create(size, prot, fd, offset); + if (mem == nullptr) { + LOGE("%s failed to create memory from fd (%d)", __func__, fd); + return ANEURALNETWORKS_BAD_DATA; } + *memory = reinterpret_cast(mem); return ANEURALNETWORKS_NO_ERROR; } int ANeuralNetworksMemory_createFromAHardwareBuffer(const AHardwareBuffer* ahwb, ANeuralNetworksMemory** memory) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksMemory_createFromAHardwareBuffer is called "); - if (!ahwb || !memory) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksMemory_createFromAHardwareBuffer passed a nullptr"); + LOGV(__func__); + + if (ahwb == nullptr || memory == nullptr) { + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - Memory* mem = new Memory(); - auto status = mem->CreateFromAHWB(ahwb); - if (status != ANEURALNETWORKS_NO_ERROR) { - return status; + + auto* mem = AHardwareBufferMemory::create(ahwb); + if (mem == nullptr) { + LOGE("%s failed to create memory from ahwb", __func__); + return ANEURALNETWORKS_BAD_DATA; } + *memory = reinterpret_cast(mem); return ANEURALNETWORKS_NO_ERROR; } void ANeuralNetworksMemory_free(ANeuralNetworksMemory* memory) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksMemory_free is called "); - if (!memory) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksMemory_free passed a nullptr"); - return ; - } - if (memory == nullptr) return; - Memory* mem = reinterpret_cast(memory); + LOGV(__func__); + if (memory == nullptr) { + LOGD("%s passed a nullptr", __func__); + return; + } + + auto* mem = reinterpret_cast(memory); delete mem; - mem = nullptr; +} + +int ANeuralNetworksMemory_copy(const ANeuralNetworksMemory* src, const ANeuralNetworksMemory* dst) { + LOGV(__func__); + if (src == nullptr || dst == nullptr) { + LOGE("%s passed a nullptr", __func__); + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + const auto* srcMem = reinterpret_cast(src); + const auto* dstMem = reinterpret_cast(dst); + return IMemory::copy(srcMem, dstMem); } int ANeuralNetworksModel_create(ANeuralNetworksModel** model) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksModel_create is called "); - if (!model) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksModel_create passed a nullptr"); + LOGV(__func__); + + if (model == nullptr) { + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - Model* m = new Model(); + + auto* m = new Model(); *model = reinterpret_cast(m); return ANEURALNETWORKS_NO_ERROR; } void ANeuralNetworksModel_free(ANeuralNetworksModel* model) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, "=====ANeuralNetworksModel_free is called "); - if (!model) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksModel_free passed a nullptr"); - return ; - } - if (model == nullptr) return; - Model* m = reinterpret_cast(model); + LOGV(__func__); + + if (model == nullptr) { + LOGV("%s passed a nullptr", __func__); + return; + } + + auto* m = reinterpret_cast(model); delete m; - m = nullptr; } int ANeuralNetworksModel_finish(ANeuralNetworksModel* model) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksModel_finish is called "); - if (!model) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksModel_finish passed a nullptr"); + LOGV(__func__); + + if (model == nullptr) { + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - Model* m = reinterpret_cast(model); - return m->Finish(); + + auto* m = reinterpret_cast(model); + return m->finish(); } int ANeuralNetworksModel_addOperand(ANeuralNetworksModel* model, const ANeuralNetworksOperandType* type) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksModel_addOperand is called "); - if (!model || !type) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksModel_addOperand passed a nullptr"); + LOGV(__func__); + + if (model == nullptr || type == nullptr) { + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - Model* m = reinterpret_cast(model); - return m->AddOperand(*type); + + auto* m = reinterpret_cast(model); + return m->addOperand(*type); } int ANeuralNetworksModel_setOperandValue(ANeuralNetworksModel* model, int32_t index, const void* buffer, size_t length) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksModel_setOperandValue is called "); - if (!model || (!buffer && length != 0)) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksModel_setOperandValue passed a nullptr"); + LOGV(__func__); + + if (model == nullptr || (buffer == nullptr && length != 0)) { + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - Model* m = reinterpret_cast(model); - return m->SetOperandValue(index, buffer, length); + + auto* m = reinterpret_cast(model); + return m->setOperandValue(index, buffer, length); } int ANeuralNetworksModel_setOperandValueFromMemory(ANeuralNetworksModel* model, int32_t index, const ANeuralNetworksMemory* memory, size_t offset, size_t length) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksModel_setOperandValueFromMemory is called "); - if (!model || !memory) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksModel_setOperandValueFromMemory passed a nullptr"); + LOGV(__func__); + + if (model == nullptr || memory == nullptr) { + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - Model* m = reinterpret_cast(model); - const Memory* mem = reinterpret_cast(memory); - return m->SetOperandValueFromMemory(index, mem, offset, length); + + auto* m = reinterpret_cast(model); + const auto* mem = reinterpret_cast(memory); + return m->setOperandValueFromMemory(index, mem, offset, length); } int ANeuralNetworksModel_setOperandValueFromModel(ANeuralNetworksModel* model, int32_t index, const ANeuralNetworksModel* value) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksModel_setOperandValueFromModel is called "); + LOGV(__func__); - return ANEURALNETWORKS_NO_ERROR; + if (model == nullptr || value == nullptr) { + LOGE("%s passed a nullptr", __func__); + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + auto* m = reinterpret_cast(model); + const auto* reference = reinterpret_cast(value); + return m->setOperandValueFromModel(index, reference); } int ANeuralNetworksModel_addOperation(ANeuralNetworksModel* model, ANeuralNetworksOperationType type, uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount, const uint32_t* outputs) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksModel_addOperation is called "); - if (!model || !inputs || !outputs) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksModel_addOperation passed a nullptr"); + LOGV(__func__); + + if (model == nullptr || inputs == nullptr || outputs == nullptr) { + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - Model* m = reinterpret_cast(model); - return m->AddOperation(type, inputCount, inputs, outputCount, outputs); + + auto* m = reinterpret_cast(model); + return m->addOperation(type, inputCount, inputs, outputCount, outputs); } int ANeuralNetworksModel_setOperandSymmPerChannelQuantParams( ANeuralNetworksModel* model, int32_t index, const ANeuralNetworksSymmPerChannelQuantParams* channelQuant) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksModel_setOperandSymmPerChannelQuantParams " - "is called "); - if (!model || !channelQuant) { - __android_log_print( - ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksModel_setOperandSymmPerChannelQuantParams passed a nullptr"); + LOGV(__func__); + + if (model == nullptr || channelQuant == nullptr) { + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - Model* m = reinterpret_cast(model); - return m->SetOperandSymmPerChannelQuantParams(index, *channelQuant); + + auto* m = reinterpret_cast(model); + return m->setOperandSymmPerChannelQuantParams(index, *channelQuant); } int ANeuralNetworksModel_identifyInputsAndOutputs(ANeuralNetworksModel* model, uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount, const uint32_t* outputs) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksModel_identifyInputsAndOutputs is called "); - if (!model || !inputs || !outputs) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksModel_identifyInputsAndOutputs passed a nullptr"); + LOGV(__func__); + + if (model == nullptr || inputs == nullptr || outputs == nullptr) { + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - Model* m = reinterpret_cast(model); - return m->IdentifyInputsAndOutputs(inputCount, inputs, outputCount, outputs); + + auto* m = reinterpret_cast(model); + return m->identifyInputsAndOutputs(inputCount, inputs, outputCount, outputs); } int ANeuralNetworksModel_relaxComputationFloat32toFloat16(ANeuralNetworksModel* model, bool allow) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksModel_relaxComputationFloat32toFloat16 is " - "called "); - if (!model) { - __android_log_print( - ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksModel_relaxComputationFloat32toFloat16 passed a nullptr"); + LOGV(__func__); + + if (model == nullptr) { + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - Model* m = reinterpret_cast(model); - return m->RelaxComputationFloat32toFloat16(allow); + + auto* m = reinterpret_cast(model); + return m->relaxComputationFloat32toFloat16(allow); } int ANeuralNetworksCompilation_create(ANeuralNetworksModel* model, ANeuralNetworksCompilation** compilation) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksCompilation_create is called "); - if (!model || !compilation) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksCompilation_create passed a nullptr"); + LOGV(__func__); + if (model == nullptr || compilation == nullptr) { + LOGE("%s passed a nullptr", __func__); + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + auto* m = reinterpret_cast(model); + auto* c = new Compilation(m); + *compilation = reinterpret_cast(c); + return ANEURALNETWORKS_NO_ERROR; +} + +int ANeuralNetworksCompilation_createForDevices(ANeuralNetworksModel* model, + const ANeuralNetworksDevice* const* devices, + uint32_t numDevices, + ANeuralNetworksCompilation** compilation) { + LOGV(__func__); + + if (model == nullptr || devices == nullptr || compilation == nullptr) { + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - Model* m = reinterpret_cast(model); - Compilation* c = new Compilation(m); + + if (numDevices == 0) { + LOGE("%s passed an empty device list", __func__); + return ANEURALNETWORKS_BAD_DATA; + } + + std::vector> selectedDevices; + for (size_t i = 0; i < numDevices; i++) { + if (devices[i] == nullptr) { + LOGE("%s passed a nullptr as a device", __func__); + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + for (size_t j = i + 1; j < numDevices; j++) { + if (devices[i] == devices[j]) { + LOGE("%s passed duplicate devices", __func__); + return ANEURALNETWORKS_BAD_DATA; + } + } + + for (const auto& device : DeviceManager::get()->getDevices()) { + if (device.get() == reinterpret_cast(devices[i])) { + // Found a match. + selectedDevices.push_back(device); + break; + } + } + } + + if (selectedDevices.size() != numDevices) { + LOGE("%s passed an invalid device set", __func__); + } + + auto* m = reinterpret_cast(model); + auto* c = new Compilation(m, selectedDevices); *compilation = reinterpret_cast(c); return ANEURALNETWORKS_NO_ERROR; } void ANeuralNetworksCompilation_free(ANeuralNetworksCompilation* compilation) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksCompilation_free is called "); + LOGV(__func__); if (compilation == nullptr) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksCompilation_free passed a nullptr"); + LOGV("%s passed a nullptr", __func__); return; } - Compilation* c = reinterpret_cast(compilation); + + auto* c = reinterpret_cast(compilation); delete c; - c = nullptr; } int ANeuralNetworksCompilation_setPreference(ANeuralNetworksCompilation* compilation, int32_t preference) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksCompilation_setPreference is called "); + LOGV(__func__); if (compilation == nullptr) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksCompilation_setPreference passed a nullptr"); + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - Compilation* c = reinterpret_cast(compilation); - return c->SetPreference((PreferenceCode)preference); + + auto* c = reinterpret_cast(compilation); + return c->setPreference(static_cast(preference)); } int ANeuralNetworksCompilation_setCaching(ANeuralNetworksCompilation* compilation, const char* cacheDir, const uint8_t* token) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksCompilation_setCaching is called "); + LOGV(__func__); + if (compilation == nullptr || cacheDir == nullptr || token == nullptr) { + LOGE("%s passed a nullptr", __func__); + return ANEURALNETWORKS_UNEXPECTED_NULL; + } - return ANEURALNETWORKS_NO_ERROR; + auto* c = reinterpret_cast(compilation); + return c->setCaching(cacheDir, token); } int ANeuralNetworksCompilation_finish(ANeuralNetworksCompilation* compilation) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksCompilation_finish is called "); + LOGV(__func__); if (compilation == nullptr) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksCompilation_finish passed a nullptr"); + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - Compilation* c = reinterpret_cast(compilation); - return c->Finish(); + + auto* c = reinterpret_cast(compilation); + return c->finish(); } int ANeuralNetworksCompilation_setPriority(ANeuralNetworksCompilation* compilation, int priority) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksCompilation_setPriority is called "); + LOGV(__func__); if (compilation == nullptr) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksCompilation_setPriority passed a nullptr"); + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - Compilation* c = reinterpret_cast(compilation); - return c->SetPriority((PriorityCode)priority); + + auto* c = reinterpret_cast(compilation); + return c->setPriority(static_cast(priority)); } int ANeuralNetworksCompilation_setTimeout(ANeuralNetworksCompilation* compilation, uint64_t duration) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksCompilation_setTimeout is called "); + LOGV(__func__); if (compilation == nullptr) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksCompilation_setTimeout passed a nullptr"); + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - Compilation* c = reinterpret_cast(compilation); - return c->SetTimeout((DurationCode)duration); + auto* c = reinterpret_cast(compilation); + return c->setTimeout(std::chrono::nanoseconds(duration)); } int ANeuralNetworksExecution_create(ANeuralNetworksCompilation* compilation, ANeuralNetworksExecution** execution) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksExecution_create is called "); - if (!execution || !compilation) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksExecution_create passed a nullptr"); + LOGV(__func__); + if (compilation == nullptr || execution == nullptr) { + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - Compilation* c = reinterpret_cast(compilation); - Execution* e = new Execution(c); - *execution = reinterpret_cast(e); + + auto* c = reinterpret_cast(compilation); + auto* exec = new Execution(c); + *execution = reinterpret_cast(exec); return ANEURALNETWORKS_NO_ERROR; } void ANeuralNetworksExecution_free(ANeuralNetworksExecution* execution) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksExecution_free is called "); - if (!execution) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksExecution_free passed a nullptr"); + LOGV(__func__); + if (execution == nullptr) { + LOGV("%s passed a nullptr", __func__); return; } - if (execution == nullptr) return; - Execution* e = reinterpret_cast(execution); - delete e; - e = nullptr; + + auto* exec = reinterpret_cast(execution); + delete exec; } -int ANeuralNetworksExecution_getOutputOperandRank(ANeuralNetworksExecution* execution, - int32_t index, uint32_t* rank) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksExecution_getOutputOperandRank is called "); - if (rank == nullptr) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksExecution_getOutputOperandRank get a nullptr"); +int ANeuralNetworksExecution_setReusable(ANeuralNetworksExecution* execution, bool reusable) { + LOGV(__func__); + if (execution == nullptr) { + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - Execution* e = reinterpret_cast(execution); - return e->GetOutputOperandRank(index, rank); + + auto* exec = reinterpret_cast(execution); + return exec->setReusable(reusable); } -int ANeuralNetworksExecution_getOutputOperandDimensions(ANeuralNetworksExecution* execution, - int32_t index, uint32_t* dimensions) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksExecution_getOutputOperandDimensions is called "); - if (dimensions == nullptr) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksExecution_getOutputOperandDimensions get a nullptr"); +int ANeuralNetworksExecution_setTimeout(ANeuralNetworksExecution* execution, uint64_t duration) { + LOGV(__func__); + if (execution == nullptr) { + LOGE("%s passed a nullptr", __func__); + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + auto* exec = reinterpret_cast(execution); + return exec->setTimeout(Duration(duration)); +} + +int ANeuralNetworksExecution_setLoopTimeout(ANeuralNetworksExecution* execution, + uint64_t duration) { + LOGV(__func__); + if (execution == nullptr) { + LOGE("%s passed a nullptr", __func__); + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + auto* exec = reinterpret_cast(execution); + return exec->setLoopTimeout(Duration(duration)); +} + +int ANeuralNetworksExecution_setMeasureTiming(ANeuralNetworksExecution* execution, bool measure) { + LOGV(__func__); + if (execution == nullptr) { + LOGE("%s passed a nullptr", __func__); + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + auto* exec = reinterpret_cast(execution); + return exec->setMeasureTiming(measure); +} + +int ANeuralNetworksExecution_enableInputAndOutputPadding(ANeuralNetworksExecution* execution, + bool enable) { + LOGV(__func__); + if (execution == nullptr) { + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - Execution* e = reinterpret_cast(execution); - return e->GetOutputOperandDimensions(index, dimensions); + + return ANEURALNETWORKS_NO_ERROR; } int ANeuralNetworksExecution_setInput(ANeuralNetworksExecution* execution, int32_t index, const ANeuralNetworksOperandType* type, const void* buffer, size_t length) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksExecution_setInput is called "); - if (!execution || (!buffer && length != 0)) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksExecution_setInput passed a nullptr"); + LOGV(__func__); + if (execution == nullptr || (buffer == nullptr && length != 0)) { + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - Execution* e = reinterpret_cast(execution); - return e->SetInput(index, type, buffer, length); + + auto* exec = reinterpret_cast(execution); + return exec->setInput(index, type, buffer, length); } int ANeuralNetworksExecution_setInputFromMemory(ANeuralNetworksExecution* execution, int32_t index, const ANeuralNetworksOperandType* type, const ANeuralNetworksMemory* memory, size_t offset, size_t length) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksExecution_setInputFromMemory is called "); - if (!execution || !memory) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksExecution_setInputFromMemory passed a nullptr"); + LOGV(__func__); + if (execution == nullptr || memory == nullptr) { + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - Execution* e = reinterpret_cast(execution); - const Memory* mem = reinterpret_cast(memory); - return e->SetInputFromMemory(index, type, mem, offset, length); + + auto* exec = reinterpret_cast(execution); + const auto* mem = reinterpret_cast(memory); + return exec->setInputFromMemory(index, type, mem, offset, length); } int ANeuralNetworksExecution_setOutput(ANeuralNetworksExecution* execution, int32_t index, const ANeuralNetworksOperandType* type, void* buffer, size_t length) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksExecution_setOutput is called "); - if (!execution || (!buffer && length != 0)) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksExecution_setInput passed a nullptr"); + LOGV(__func__); + if (execution == nullptr || (buffer == nullptr && length != 0)) { + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - Execution* e = reinterpret_cast(execution); - return e->SetOutput(index, type, buffer, length); + + auto* exec = reinterpret_cast(execution); + return exec->setOutput(index, type, buffer, length); } int ANeuralNetworksExecution_setOutputFromMemory(ANeuralNetworksExecution* execution, int32_t index, const ANeuralNetworksOperandType* type, const ANeuralNetworksMemory* memory, size_t offset, size_t length) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksExecution_setOutputFromMemory is called "); - if (!execution || !memory) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksExecution_setOutputFromMemory passed a nullptr"); + LOGV(__func__); + if (execution == nullptr || memory == nullptr) { + LOGE("%s passed a nullptr", __func__); + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + auto* exec = reinterpret_cast(execution); + const auto* mem = reinterpret_cast(memory); + return exec->setOutputFromMemory(index, type, mem, offset, length); +} + +int ANeuralNetworksExecution_compute(ANeuralNetworksExecution* execution) { + LOGV(__func__); + if (execution == nullptr) { + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - Execution* e = reinterpret_cast(execution); - const Memory* mem = reinterpret_cast(memory); - return e->SetOutputFromMemory(index, type, mem, offset, length); + + auto* exec = reinterpret_cast(execution); + return exec->compute(); } int ANeuralNetworksExecution_startCompute(ANeuralNetworksExecution* execution, ANeuralNetworksEvent** event) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksExecution_startCompute is called "); + LOGV(__func__); + if (event == nullptr) { + LOGE("%s passed a nullptr", __func__); + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + if (execution == nullptr) { + LOGE("%s passed a nullptr", __func__); + *event = nullptr; + return ANEURALNETWORKS_UNEXPECTED_NULL; + } - return ANEURALNETWORKS_NO_ERROR; + auto* exec = reinterpret_cast(execution); + auto* e = exec->createSyncEvent(); + *event = reinterpret_cast(e); + + return exec->startCompute(); } -int ANeuralNetworksExecution_setTimeout(ANeuralNetworksExecution* execution, uint64_t duration) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksExecution_setTimeout is called "); +int ANeuralNetworksExecution_getDuration(const ANeuralNetworksExecution* execution, + int32_t durationCode, uint64_t* duration) { + LOGV(__func__); + if (execution == nullptr || duration == nullptr) { + LOGE("%s passed a nullptr", __func__); + return ANEURALNETWORKS_UNEXPECTED_NULL; + } - return ANEURALNETWORKS_NO_ERROR; + const auto* exec = reinterpret_cast(execution); + return exec->getDuration(static_cast(durationCode), duration); } -int ANeuralNetworksEvent_wait(ANeuralNetworksEvent* event) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, "=====ANeuralNetworksEvent_wait is called "); +int ANeuralNetworksExecution_getOutputOperandRank(ANeuralNetworksExecution* execution, + int32_t index, uint32_t* rank) { + LOGV(__func__); + if (rank == nullptr) { + LOGE("%s passed a nullptr", __func__); + return ANEURALNETWORKS_UNEXPECTED_NULL; + } - return ANEURALNETWORKS_NO_ERROR; + const auto* exec = reinterpret_cast(execution); + return exec->getOutputOperandRank(index, rank); } -void ANeuralNetworksEvent_free(ANeuralNetworksEvent* event) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, "=====ANeuralNetworksEvent_free is called "); - if (!event) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksEvent_free passed a nullptr"); - return; +int ANeuralNetworksExecution_getOutputOperandDimensions(ANeuralNetworksExecution* execution, + int32_t index, uint32_t* dimensions) { + LOGV(__func__); + if (dimensions == nullptr) { + LOGE("%s passed a nullptr", __func__); + return ANEURALNETWORKS_UNEXPECTED_NULL; } + + const auto* exec = reinterpret_cast(execution); + return exec->getOutputOperandDimensions(index, dimensions); } -int ANeuralNetworksExecution_setLoopTimeout(ANeuralNetworksExecution* execution, - uint64_t duration) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksExecution_setLoopTimeout is called "); - if (!execution) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksExecution_setLoopTimeout passed a nullptr"); +int ANeuralNetworksEvent_wait(ANeuralNetworksEvent* event) { + LOGV(__func__); + + if (event == nullptr) { + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - auto e = reinterpret_cast(execution); - return e->SetLoopTimeout(duration); + + auto* e = reinterpret_cast(event); + return e->wait(); +} + +void ANeuralNetworksEvent_free(ANeuralNetworksEvent* event) { + LOGV(__func__); + if (event == nullptr) { + LOGD("%s passed a nullptr", __func__); + return; + } + + auto* e = reinterpret_cast(event); + e->wait(); + delete e; } uint64_t ANeuralNetworks_getDefaultLoopTimeout() { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworks_getDefaultLoopTimeout is called "); - return operation_while::kTimeoutNsDefault; + LOGV(__func__); + + constexpr auto kDefaultLoopTimeoutDuration = std::chrono::seconds{2}; + constexpr uint64_t kDefaultLoopTimeoutNs = + std::chrono::duration_cast(kDefaultLoopTimeoutDuration) + .count(); + + return kDefaultLoopTimeoutNs; } uint64_t ANeuralNetworks_getMaximumLoopTimeout() { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworks_getMaximumLoopTimeout is called "); - return operation_while::kTimeoutNsMaximum; + LOGV(__func__); + + constexpr auto kMaximumLoopTimeoutDuration = std::chrono::seconds{15}; + constexpr uint64_t kMaximumLoopTimeoutNs = + std::chrono::duration_cast(kMaximumLoopTimeoutDuration) + .count(); + + return kMaximumLoopTimeoutNs; } int ANeuralNetworksDevice_getExtensionSupport(const ANeuralNetworksDevice* device, const char* extensionName, bool* isExtensionSupported) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksDevice_getExtensionSupport is called "); + LOGV(__func__); return ANEURALNETWORKS_NO_ERROR; } @@ -868,8 +875,7 @@ int ANeuralNetworksModel_getExtensionOperandType(ANeuralNetworksModel* model, const char* extensionName, uint16_t operandCodeWithinExtension, int32_t* type) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksModel_getExtensionOperandType is called "); + LOGV(__func__); return ANEURALNETWORKS_NO_ERROR; } @@ -878,140 +884,185 @@ int ANeuralNetworksModel_getExtensionOperationType(ANeuralNetworksModel* model, const char* extensionName, uint16_t operationCodeWithinExtension, ANeuralNetworksOperationType* type) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksModel_getExtensionOperationType is called "); + LOGV(__func__); return ANEURALNETWORKS_NO_ERROR; } int ANeuralNetworksModel_setOperandExtensionData(ANeuralNetworksModel* model, int32_t index, const void* data, size_t length) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksModel_setOperandExtensionData is called "); + LOGV(__func__); return ANEURALNETWORKS_NO_ERROR; } -int ANeuralNetworksEvent_createFromSyncFenceFd(int syncFenceFd, ANeuralNetworksEvent** event) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksEvent_createFromSyncFenceFd is called "); +int ANeuralNetworksEvent_createFromSyncFenceFd(int sync_fence_fd, ANeuralNetworksEvent** event) { + LOGV(__func__); + if (event == nullptr) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksEvent_createFromSyncFenceFd passed a nullptr"); + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - if (syncFenceFd <= 0) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksEvent_createFromSyncFenceFd passed an invalid fd"); + + if (sync_fence_fd <= 0) { + LOGE("%s passed an invalid sync fence fd", __func__); *event = nullptr; return ANEURALNETWORKS_BAD_DATA; } + + auto* e = new SyncFenceEvent(sync_fence_fd); + *event = reinterpret_cast(e); + return ANEURALNETWORKS_NO_ERROR; } -int ANeuralNetworksEvent_getSyncFenceFd(const ANeuralNetworksEvent* event, int* syncFenceFd) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksEvent_getSyncFenceFd is called "); +int ANeuralNetworksEvent_getSyncFenceFd(const ANeuralNetworksEvent* event, int* sync_fence_fd) { + LOGV(__func__); + + if (sync_fence_fd == nullptr) { + LOGE("%s passed a nullptr", __func__); + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + if (event == nullptr) { + LOGE("%s passed a nullptr", __func__); + *sync_fence_fd = -1; + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + const auto* e = reinterpret_cast(event); + // The client owns the dupped fd, and is responsible for closing it. + int fd = e->getSyncFenceFd(true); + if (fd <= 0) { + LOGE("%s unable to get valid sync fence fd", __func__); + *sync_fence_fd = -1; + return ANEURALNETWORKS_BAD_DATA; + } + + *sync_fence_fd = fd; return ANEURALNETWORKS_NO_ERROR; } int ANeuralNetworksExecution_startComputeWithDependencies( ANeuralNetworksExecution* execution, const ANeuralNetworksEvent* const* dependencies, - uint32_t numOfDependencies, uint64_t duration, ANeuralNetworksEvent** event) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksExecution_startComputeWithDependencies is " - "called "); - if (!execution || !event) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksEvent_createFromSyncFenceFd passed a nullptr"); + uint32_t num_dependencies, uint64_t duration, ANeuralNetworksEvent** event) { + LOGV(__func__); + + if (event == nullptr) { + LOGE("%s passed a nullptr", __func__); + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + if (execution == nullptr || (num_dependencies != 0 && dependencies == nullptr)) { + LOGE("%s passed a nullptr", __func__); + *event = nullptr; return ANEURALNETWORKS_UNEXPECTED_NULL; } - return ANEURALNETWORKS_NO_ERROR; -} -int64_t ANeuralNetworks_getRuntimeFeatureLevel() { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworks_getRuntimeFeatureLevel is called "); + auto* exec = reinterpret_cast(execution); - return ANEURALNETWORKS_FEATURE_LEVEL_7; + if (duration != 0) { + const auto* compilation = exec->getCompilation(); + if (compilation->getDevices().size() != 1) { + LOGE("%s if the duration is non-zero, the " + "ANeuralNetworksExecution must have been created from an " + "ANeuralNetworksCompilation which in turn was created from " + "ANeuralNetworksCompilation_createForDevices with numDevices = 1", + __func__); + return ANEURALNETWORKS_BAD_DATA; + } + } + + for (size_t i = 0; i < num_dependencies; i++) { + if (dependencies[i] == nullptr) { + LOGE("%s passed a nullptr", __func__); + *event = nullptr; + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + const auto* e = reinterpret_cast(dependencies[i]); + int waitStatus = e->wait(); + if (waitStatus != ANEURALNETWORKS_NO_ERROR) { + *event = nullptr; + return waitStatus; + } + } + + // The SL don't support creating sync fence. + auto* e = new SyncFenceEvent(-1); + *event = reinterpret_cast(e); + return ANeuralNetworksExecution_compute(execution); + + // return ANeuralNetworksExecution_startCompute(execution, event); } -int ANeuralNetworksExecution_enableInputAndOutputPadding(ANeuralNetworksExecution* execution, - bool enable) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksExecution_enableInputAndOutputPadding is called "); +int64_t ANeuralNetworks_getRuntimeFeatureLevel() { + LOGV(__func__); - return ANEURALNETWORKS_NO_ERROR; + return ANEURALNETWORKS_FEATURE_LEVEL_7; } int ANeuralNetworksCompilation_getPreferredMemoryAlignmentForInput( const ANeuralNetworksCompilation* compilation, uint32_t index, uint32_t* alignment) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksCompilation_" - "getPreferredMemoryAlignmentForInput is called "); + LOGV(__func__); return ANEURALNETWORKS_NO_ERROR; } int ANeuralNetworksCompilation_getPreferredMemoryPaddingForInput( const ANeuralNetworksCompilation* compilation, uint32_t index, uint32_t* padding) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksCompilation_" - "getPreferredMemoryPaddingForInput is called "); + LOGV(__func__); return ANEURALNETWORKS_NO_ERROR; } int ANeuralNetworksCompilation_getPreferredMemoryAlignmentForOutput( const ANeuralNetworksCompilation* compilation, uint32_t index, uint32_t* alignment) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksCompilation_" - "getPreferredMemoryAlignmentForOutput is called "); + LOGV(__func__); return ANEURALNETWORKS_NO_ERROR; } int ANeuralNetworksCompilation_getPreferredMemoryPaddingForOutput( const ANeuralNetworksCompilation* compilation, uint32_t index, uint32_t* padding) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksCompilation_" - "getPreferredMemoryPaddingForOutput is called "); + LOGV(__func__); return ANEURALNETWORKS_NO_ERROR; } -int ANeuralNetworksExecution_setReusable(ANeuralNetworksExecution* execution, bool reusable) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====ANeuralNetworksExecution_setReusable is called "); - if (!execution) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "ANeuralNetworksExecution_setReusable passed a nullptr"); - return ANEURALNETWORKS_UNEXPECTED_NULL; - } - Execution* e = reinterpret_cast(execution); - return e->SetReusable(reusable); -} - int SL_ANeuralNetworksCompilation_setCachingFromFds(ANeuralNetworksCompilation* compilation, const int* modelCacheFds, const uint32_t numModelCacheFiles, - const int* dataCacheFds, - const uint32_t numDataCacheFiles, + const int* /*dataCacheFds*/, + const uint32_t /*numDataCacheFiles*/, const uint8_t* token) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====SL_ANeuralNetworksCompilation_setCachingFromFds is called "); + LOGV(__func__); + if (compilation == nullptr || + (numModelCacheFiles != 0 && (modelCacheFds == nullptr || token == nullptr))) { + LOGE("%s passed a nullptr", __func__); + return ANEURALNETWORKS_UNEXPECTED_NULL; + } - return ANEURALNETWORKS_NO_ERROR; + if (Compilation::kNumModelCacheFiles == 0) { + LOGW("%s model cache is not enabled", __func__); + return ANEURALNETWORKS_NO_ERROR; + } + + auto* c = reinterpret_cast(compilation); + return c->setCaching(modelCacheFds[0], token); } int SL_ANeuralNetworksDevice_getNumberOfCacheFilesNeeded(const ANeuralNetworksDevice* device, uint32_t* numModelCacheFiles, uint32_t* numDataCacheFiles) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====SL_ANeuralNetworksDevice_getNumberOfCacheFilesNeeded is called "); - if (numModelCacheFiles) *numModelCacheFiles = 0; - if (numDataCacheFiles) *numDataCacheFiles = 0; + LOGV(__func__); + if (numDataCacheFiles == nullptr || numDataCacheFiles == nullptr) { + LOGE("%s passed a nullptr", __func__); + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + + *numModelCacheFiles = Compilation::kNumModelCacheFiles; + *numDataCacheFiles = Compilation::kNumDataCacheFiles; return ANEURALNETWORKS_NO_ERROR; } @@ -1019,88 +1070,61 @@ int SL_ANeuralNetworksDevice_getNumberOfCacheFilesNeeded(const ANeuralNetworksDe int SL_ANeuralNetworksDevice_getPerformanceInfo( const ANeuralNetworksDevice* device, int32_t performanceInfoKind, SL_ANeuralNetworksPerformanceInfo* performanceInfo) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====SL_ANeuralNetworksDevice_getPerformanceInfo is called "); - if (performanceInfo) *performanceInfo = {.execTime = 0.1f, .powerUsage = 0.1f}; + LOGV(__func__); if (device == nullptr || performanceInfo == nullptr) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "SL_ANeuralNetworksDevice_getPerformanceInfo passed a nullptr"); + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - constexpr auto conv = [](const Capabilities::PerformanceInfo& info) { - return SL_ANeuralNetworksPerformanceInfo{.execTime = info.execTime, - .powerUsage = info.powerUsage}; - }; + const auto* dev = reinterpret_cast(device); + auto perfInfo = dev->queryPerformanceInfo(performanceInfoKind); - const VsiDevice* d = reinterpret_cast(device); - const Capabilities& capabilities = d->getCapabilities(); - - switch (performanceInfoKind) { - case SL_ANEURALNETWORKS_CAPABILITIES_PERFORMANCE_RELAXED_SCALAR: - *performanceInfo = conv(capabilities.relaxedFloat32toFloat16PerformanceScalar); - return ANEURALNETWORKS_NO_ERROR; - case SL_ANEURALNETWORKS_CAPABILITIES_PERFORMANCE_RELAXED_TENSOR: - *performanceInfo = conv(capabilities.relaxedFloat32toFloat16PerformanceTensor); - return ANEURALNETWORKS_NO_ERROR; - case SL_ANEURALNETWORKS_CAPABILITIES_PERFORMANCE_IF: - *performanceInfo = conv(capabilities.ifPerformance); - return ANEURALNETWORKS_NO_ERROR; - case SL_ANEURALNETWORKS_CAPABILITIES_PERFORMANCE_WHILE: - *performanceInfo = conv(capabilities.whilePerformance); - return ANEURALNETWORKS_NO_ERROR; - } - __android_log_print( - ANDROID_LOG_VERBOSE, TAG_NAME, - "SL_ANeuralNetworksDevice_getPerformanceInfo passed unknown performanceInfoKind "); - return ANEURALNETWORKS_BAD_DATA; + performanceInfo->execTime = perfInfo.execTimeRatio; + performanceInfo->powerUsage = perfInfo.powerUsageRatio; + return ANEURALNETWORKS_NO_ERROR; } int SL_ANeuralNetworksDevice_forEachOperandPerformanceInfo( const ANeuralNetworksDevice* device, void* context, void (*callback)(SL_ANeuralNetworksOperandPerformanceInfo, void*)) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====SL_ANeuralNetworksDevice_forEachOperandPerformanceInfo " - "is called "); + LOGV(__func__); + if (device == nullptr || context == nullptr || callback == nullptr) { - __android_log_print( - ANDROID_LOG_VERBOSE, TAG_NAME, - "SL_ANeuralNetworksDevice_forEachOperandPerformanceInfo passed a nullptr"); + LOGE("%s passed a nullptr", __func__); return ANEURALNETWORKS_UNEXPECTED_NULL; } - constexpr auto conv = [](const Capabilities::OperandPerformance& operandPerformance) { - return SL_ANeuralNetworksOperandPerformanceInfo{ - .operandType = static_cast(operandPerformance.type), - .performanceInfo = {.execTime = operandPerformance.info.execTime, - .powerUsage = operandPerformance.info.powerUsage}, - }; - }; - - const VsiDevice* d = reinterpret_cast(device); - const Capabilities& capabilities = d->getCapabilities(); - - for (const auto& operandPerformance : capabilities.operandPerformance.Sorted) { - const SL_ANeuralNetworksOperandPerformanceInfo opPerf = conv(operandPerformance); - callback(opPerf, context); + const auto* dev = reinterpret_cast(device); + for (auto operandType : Device::kSupportedOperandTypes) { + auto perfInfo = dev->queryOperandPerformanceInfo(operandType); + auto operandPerformanceInfo = SL_ANeuralNetworksOperandPerformanceInfo{ + .operandType = static_cast(operandType), + .performanceInfo = { + .execTime = perfInfo.execTimeRatio, + .powerUsage = perfInfo.powerUsageRatio, + }}; + callback(operandPerformanceInfo, context); } + return ANEURALNETWORKS_NO_ERROR; } int SL_ANeuralNetworksDevice_getVendorExtensionCount(const ANeuralNetworksDevice* device, uint32_t* vendorExtensionCount) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====SL_ANeuralNetworksDevice_getVendorExtensionCount is called "); - + LOGV(__func__); + if (device == nullptr || vendorExtensionCount == nullptr) { + LOGE("SL_ANeuralNetworksDevice_getVendorExtensionCount passed a nullptr"); + return ANEURALNETWORKS_UNEXPECTED_NULL; + } + *vendorExtensionCount = 0; return ANEURALNETWORKS_NO_ERROR; } int SL_ANeuralNetworksDevice_getVendorExtensionName(const ANeuralNetworksDevice* device, uint32_t vendorExtensionIndex, const char** extensionName) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====SL_ANeuralNetworksDevice_getVendorExtensionName is called "); + LOGV(__func__); return ANEURALNETWORKS_NO_ERROR; } @@ -1108,9 +1132,7 @@ int SL_ANeuralNetworksDevice_getVendorExtensionName(const ANeuralNetworksDevice* int SL_ANeuralNetworksDevice_forEachVendorExtensionOperandTypeInformation( const ANeuralNetworksDevice* device, uint32_t vendorExtensionIndex, void* context, void (*callback)(SL_ANeuralNetworksExtensionOperandTypeInformation, void*)) { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "=====SL_ANeuralNetworksDevice_" - "forEachVendorExtensionOperandTypeInformation is called "); + LOGV(__func__); return ANEURALNETWORKS_NO_ERROR; } @@ -1227,8 +1249,7 @@ NnApiSLDriverImplFL7 slDriverImpl{ extern "C" { NnApiSLDriverImpl* ANeuralNetworks_getSLDriverImpl() { - __android_log_print(ANDROID_LOG_VERBOSE, TAG_NAME, - "======ANeuralNetworks_getSLDriverImpl is called !!======"); + LOGV(__func__); return reinterpret_cast(&slDriverImpl); } -} +} \ No newline at end of file diff --git a/vts_status.md b/vts_status.md deleted file mode 100644 index 8b94a42..0000000 --- a/vts_status.md +++ /dev/null @@ -1,111 +0,0 @@ - -| OP | Status | API feature level | totoal | pass | fail | not support | -| - | - | - | - | - | - | - | -| ADD | Yes | 1 | 22 | 17 | 0 | 5 | -| AVERAGE_POOL_2D | Yes | 1 | 160 | 140 | 0 | 20 | -| CONCATENATION | Yes | 1 | 48 | 38 | 0 | 10 | -| CONV_2D | Yes | 1 | 854 | 830 | 0 | 24 | -| DEPTHWISE_CONV_2D | Yes | 1 | 488 | 488 | 0 | 0 | -| DEPTH_TO_SPACE | Yes | 1 | 80 | 80 | 0 | 0 | -| DEQUANTIZE | Yes | 1 | 52 | 40 | 0 | 12 | -| EMBEDDING_LOOKUP | Yes | 1 | 11 | 11 | 0 | 0 | -| FLOOR | Yes | 1 | 6 | 6 | 0 | 0 | -| FULLY_CONNECTED | Yes | 1 | 106 | 92 | 0 | 14 | -| HASHTABLE_LOOKUP | Yes | 1 | 6 | 6 | 0 | 0 | -| L2_NORMALIZATION | Yes | 1 | 274 | 274 | 0 | 0 | -| L2_POOL_2D | Yes | 1 | 60 | 48 | 0 | 12 | -| LOCAL_RESPONSE_NORMALIZATION | Yes | 1 | 258 | 258 | 0 | 0 | -| LOGISTIC | Yes | 1 | 25 | 20 | 0 | 5 | -| LSH_PROJECTION | No | 1 | 0 | 0 | 0 | 0 | -| LSTM | No | 1 | 0 | 0 | 0 | 0 | -| MAX_POOL_2D | Yes | 1 | 132 | 112 | 0 | 20 | -| MUL | Yes | 1 | 26 | 21 | 0 | 5 | -| RELU | Yes | 1 | 29 | 24 | 0 | 5 | -| RELU1 | Yes | 1 | 29 | 24 | 0 | 5 | -| RELU6 | Yes | 1 | 29 | 24 | 0 | 5 | -| RESHAPE | Yes | 1 | 32 | 13 | 0 | 19 | -| RESIZE_BILINEAR | Yes | 1 | 190 | 174 | 0 | 16 | -| RNN | No | 1 | 0 | 0 | 0 | 0 | -| SOFTMAX | Yes | 1 | 481 | 476 | 0 | 5 | -| SPACE_TO_DEPTH | Yes | 1 | 80 | 80 | 0 | 0 | -| SVDF | Yes | 1 | 22 | 22 | 0 | 0 | -| TANH | Yes | 1 | 15 | 10 | 0 | 5 | -| BATCH_TO_SPACE_ND | Yes | 2 | 64 | 52 | 0 | 12 | -| DIV | Yes | 2 | 23 | 20 | 0 | 3 | -| MEAN | Yes | 2 | 46 | 24 | 0 | 22 | -| PAD | Yes | 2 | 78 | 39 | 0 | 39 | -| SPACE_TO_BATCH_ND | Yes | 2 | 224 | 112 | 0 | 112 | -| SQUEEZE | Yes | 2 | 40 | 40 | 0 | 0 | -| STRIDED_SLICE | Yes | 2 | 190 | 96 | 0 | 94 | -| SUB | Yes | 2 | 310 | 305 | 0 | 5 | -| TRANSPOSE | Yes | 2 | 41 | 14 | 0 | 27 | -| ABS | Yes | 3 | 4 | 4 | 0 | 0 | -| ARGMAX | Yes | 3 | 35 | 35 | 0 | 0 | -| ARGMIN | Yes | 3 | 35 | 35 | 0 | 0 | -| AXIS_ALIGNED_BBOX_TRANSFORM | No | 3 | 0 | 0 | 0 | 0 | -| BIDIRECTIONAL_SEQUENCE_LSTM | No | 3 | 0 | 0 | 0 | 0 | -| BIDIRECTIONAL_SEQUENCE_RNN | No | 3 | 0 | 0 | 0 | 0 | -| BOX_WITH_NMS_LIMIT | No | 3 | 0 | 0 | 0 | 0 | -| CAST | Yes | 3 | 61 | 60 | 0 | 1 | -| CHANNEL_SHUFFLE | Yes | 3 | 200 | 200 | 0 | 0 | -| DETECTION_POSTPROCESSING | No | 3 | 0 | 0 | 0 | 0 | -| EQUAL | Yes | 3 | 31 | 31 | 0 | 0 | -| EXP | Yes | 3 | 3 | 3 | 0 | 0 | -| EXPAND_DIMS | Yes | 3 | 44 | 44 | 0 | 0 | -| GATHER | Yes | 3 | 97 | 97 | 0 | 0 | -| GENERATE_PROPOSALS | No | 3 | 0 | 0 | 0 | 0 | -| GREATER | Yes | 3 | 31 | 31 | 0 | 0 | -| GREATER_EQUAL | Yes | 3 | 31 | 31 | 0 | 0 | -| GROUPED_CONV_2D | Yes | 3 | 464 | 464 | 0 | 0 | -| HEATMAP_MAX_KEYPOINT | No | 3 | 0 | 0 | 0 | 0 | -| INSTANCE_NORMALIZATION | Yes | 3 | 36 | 36 | 0 | 0 | -| LESS | Yes | 3 | 31 | 31 | 0 | 0 | -| LESS_EQUAL | Yes | 3 | 31 | 31 | 0 | 0 | -| LOG | Yes | 3 | 3 | 3 | 0 | 0 | -| LOGICAL_AND | Yes | 3 | 2 | 2 | 0 | 0 | -| LOGICAL_NOT | Yes | 3 | 1 | 1 | 0 | 0 | -| LOGICAL_OR | Yes | 3 | 2 | 2 | 0 | 0 | -| LOG_SOFTMAX | Yes | 3 | 12 | 12 | 0 | 0 | -| MAXIMUM | Yes | 3 | 26 | 26 | 0 | 0 | -| MINIMUM | Yes | 3 | 26 | 26 | 0 | 0 | -| NEG | Yes | 3 | 4 | 4 | 0 | 0 | -| NOT_EQUAL | Yes | 3 | 31 | 31 | 0 | 0 | -| PAD_V2 | Yes | 3 | 50 | 20 | 0 | 30 | -| POW | Yes | 3 | 24 | 24 | 0 | 0 | -| PRELU | Yes | 3 | 44 | 22 | 0 | 22 | -| QUANTIZE | Yes | 3 | 38 | 32 | 0 | 6 | -| QUANTIZED_16BIT_LSTM | No | 3 | 0 | 0 | 0 | 0 | -| RANDOM_MULTINOMIAL | No | 3 | 0 | 0 | 0 | 0 | -| REDUCE_ALL | Yes | 3 | 4 | 4 | 0 | 0 | -| REDUCE_ANY | Yes | 3 | 4 | 4 | 0 | 0 | -| REDUCE_MAX | Yes | 3 | 42 | 42 | 0 | 0 | -| REDUCE_MIN | Yes | 3 | 42 | 42 | 0 | 0 | -| REDUCE_PROD | Yes | 3 | 26 | 26 | 0 | 0 | -| REDUCE_SUM | Yes | 3 | 26 | 26 | 0 | 0 | -| ROI_ALIGN | Yes | 3 | 110 | 100 | 0 | 10 | -| ROI_POOLING | No | 3 | 0 | 0 | 0 | 0 | -| RESIZE_NEAREST_NEIGHBOR | Yes | 3 | 408 | 328 | 0 | 80 | -| RSQRT | Yes | 3 | 11 | 11 | 0 | 0 | -| SELECT | Yes | 3 | 28 | 28 | 0 | 0 | -| SIN | Yes | 3 | 3 | 3 | 0 | 0 | -| SLICE | Yes | 3 | 46 | 0 | 0 | 46 | -| SPLIT | Yes | 3 | 62 | 62 | 0 | 0 | -| SQRT | Yes | 3 | 3 | 3 | 0 | 0 | -| TILE | Yes | 3 | 32 | 0 | 0 | 32 | -| TOPK_V2 | Yes | 3 | 35 | 35 | 0 | 0 | -| TRANSPOSE_CONV_2D | Yes | 3 | 792 | 40 | 0 | 752 | -| UNIDIRECTIONAL_SEQUENCE_LSTM | No | 3 | 0 | 0 | 0 | 0 | -| UNIDIRECTIONAL_SEQUENCE_RNN | No | 3 | 0 | 0 | 0 | 0 | -| QUANTIZED_LSTM | No | 4 | 0 | 0 | 0 | 0 | -| IF | No | 4 | 0 | 0 | 0 | 0 | -| WHILE | No | 4 | 0 | 0 | 0 | 0 | -| ELU | Yes | 4 | 15 | 15 | 0 | 0 | -| HARD_SWISH | Yes | 4 | 21 | 21 | 0 | 0 | -| FILL | No | 4 | 0 | 0 | 0 | 0 | -| RANK | No | 4 | 0 | 0 | 0 | 0 | -| BATCH_MATMUL | Yes | 6 | 28 | 28 | 0 | 0 | -| PACK | Yes | 6 | 45 | 45 | 0 | 0 | -| MIRROR_PAD | Yes | 7 | 128 | 64 | 0 | 64 | -| REVERSE | Yes | 7 | 36 | 36 | 0 | 0 | - -**NOTE**: test result with imx8mp android 13 \ No newline at end of file