From 2be4572f0f3ddbcee2a261e66594fb6d5cf546f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bar=C4=B1=C5=9F=20Zeren?= <76053179+StepTurtle@users.noreply.github.com> Date: Wed, 16 Oct 2024 16:28:56 +0300 Subject: [PATCH] fix(tensorrt_common): add enqueueV3 support (#9018) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: add enqueueV3 support Signed-off-by: Barış Zeren * chore: update multiplier Signed-off-by: Barış Zeren * fix: wrong multiplier Signed-off-by: Barış Zeren * fix: macros Signed-off-by: Barış Zeren --------- Signed-off-by: Barış Zeren Co-authored-by: Amadeusz Szymko --- .../tensorrt_common/tensorrt_common.hpp | 9 +++++ .../tensorrt_common/src/tensorrt_common.cpp | 35 ++++++++++++++++--- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/common/tensorrt_common/include/tensorrt_common/tensorrt_common.hpp b/common/tensorrt_common/include/tensorrt_common/tensorrt_common.hpp index 6691c1fb9e97d..9e11f5f220492 100644 --- a/common/tensorrt_common/include/tensorrt_common/tensorrt_common.hpp +++ b/common/tensorrt_common/include/tensorrt_common/tensorrt_common.hpp @@ -184,12 +184,21 @@ class TrtCommon // NOLINT */ void setup(); +#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH >= 8500 + void setupBindings(std::vector & bindings); +#endif + bool isInitialized(); nvinfer1::Dims getBindingDimensions(const int32_t index) const; int32_t getNbBindings(); bool setBindingDimensions(const int32_t index, const nvinfer1::Dims & dimensions) const; +#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH >= 8500 + bool enqueueV3(cudaStream_t stream); +#endif +#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH < 10000 bool enqueueV2(void ** bindings, cudaStream_t stream, cudaEvent_t * input_consumed); +#endif /** * @brief output per-layer information diff --git a/common/tensorrt_common/src/tensorrt_common.cpp b/common/tensorrt_common/src/tensorrt_common.cpp index 74fd331b40ae7..9024207d9fe50 100644 --- a/common/tensorrt_common/src/tensorrt_common.cpp +++ b/common/tensorrt_common/src/tensorrt_common.cpp @@ -225,6 +225,16 @@ void TrtCommon::setup() is_initialized_ = true; } +#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH >= 8500 +void TrtCommon::setupBindings(std::vector & bindings) +{ + for (int32_t i = 0, e = engine_->getNbIOTensors(); i < e; i++) { + auto const name = engine_->getIOTensorName(i); + context_->setTensorAddress(name, bindings.at(i)); + } +} +#endif + bool TrtCommon::loadEngine(const std::string & engine_file_path) { std::ifstream engine_file(engine_file_path); @@ -303,8 +313,7 @@ void TrtCommon::printNetworkInfo(const std::string & onnx_file_path) total_gflops += gflops; total_params += num_weights; std::cout << "L" << i << " [conv " << k_dims.d[0] << "x" << k_dims.d[1] << " (" << groups - << ") " - << "/" << s_dims.d[0] << "] " << dim_in.d[3] << "x" << dim_in.d[2] << "x" + << ") " << "/" << s_dims.d[0] << "] " << dim_in.d[3] << "x" << dim_in.d[2] << "x" << dim_in.d[1] << " -> " << dim_out.d[3] << "x" << dim_out.d[2] << "x" << dim_out.d[1]; std::cout << " weights:" << num_weights; @@ -369,8 +378,7 @@ bool TrtCommon::buildEngineFromOnnx( if (num_available_dla > 0) { std::cout << "###" << num_available_dla << " DLAs are supported! ###" << std::endl; } else { - std::cout << "###Warning : " - << "No DLA is supported! ###" << std::endl; + std::cout << "###Warning : " << "No DLA is supported! ###" << std::endl; } config->setDefaultDeviceType(nvinfer1::DeviceType::kDLA); config->setDLACore(build_config_->dla_core_id); @@ -567,6 +575,24 @@ bool TrtCommon::setBindingDimensions(const int32_t index, const nvinfer1::Dims & return context_->setBindingDimensions(index, dimensions); } +#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH >= 8500 +bool TrtCommon::enqueueV3(cudaStream_t stream) +{ + if (build_config_->profile_per_layer) { + auto inference_start = std::chrono::high_resolution_clock::now(); + + bool ret = context_->enqueueV3(stream); + + auto inference_end = std::chrono::high_resolution_clock::now(); + host_profiler_.reportLayerTime( + "inference", + std::chrono::duration(inference_end - inference_start).count()); + return ret; + } + return context_->enqueueV3(stream); +} +#endif +#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH < 10000 bool TrtCommon::enqueueV2(void ** bindings, cudaStream_t stream, cudaEvent_t * input_consumed) { if (build_config_->profile_per_layer) { @@ -583,6 +609,7 @@ bool TrtCommon::enqueueV2(void ** bindings, cudaStream_t stream, cudaEvent_t * i return context_->enqueueV2(bindings, stream, input_consumed); } } +#endif void TrtCommon::printProfiling() {