Skip to content

Commit

Permalink
fix(tensorrt_common): add enqueueV3 support (autowarefoundation#9018)
Browse files Browse the repository at this point in the history
* fix: add enqueueV3 support

Signed-off-by: Barış Zeren <baris@leodrive.ai>

* chore: update multiplier

Signed-off-by: Barış Zeren <baris@leodrive.ai>

* fix: wrong multiplier

Signed-off-by: Barış Zeren <baris@leodrive.ai>

* fix: macros

Signed-off-by: Barış Zeren <baris@leodrive.ai>

---------

Signed-off-by: Barış Zeren <baris@leodrive.ai>
Co-authored-by: Amadeusz Szymko <amadeusz.szymko.2@tier4.jp>
  • Loading branch information
StepTurtle and amadeuszsz authored Oct 16, 2024
1 parent 23f4c86 commit 2be4572
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -184,12 +184,21 @@ class TrtCommon // NOLINT
*/
void setup();

#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH >= 8500
void setupBindings(std::vector<void *> & bindings);
#endif

bool isInitialized();

nvinfer1::Dims getBindingDimensions(const int32_t index) const;
int32_t getNbBindings();
bool setBindingDimensions(const int32_t index, const nvinfer1::Dims & dimensions) const;
#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH >= 8500
bool enqueueV3(cudaStream_t stream);
#endif
#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH < 10000
bool enqueueV2(void ** bindings, cudaStream_t stream, cudaEvent_t * input_consumed);
#endif

/**
* @brief output per-layer information
Expand Down
35 changes: 31 additions & 4 deletions common/tensorrt_common/src/tensorrt_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,16 @@ void TrtCommon::setup()
is_initialized_ = true;
}

#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH >= 8500
void TrtCommon::setupBindings(std::vector<void *> & bindings)
{
for (int32_t i = 0, e = engine_->getNbIOTensors(); i < e; i++) {
auto const name = engine_->getIOTensorName(i);
context_->setTensorAddress(name, bindings.at(i));
}
}
#endif

bool TrtCommon::loadEngine(const std::string & engine_file_path)
{
std::ifstream engine_file(engine_file_path);
Expand Down Expand Up @@ -303,8 +313,7 @@ void TrtCommon::printNetworkInfo(const std::string & onnx_file_path)
total_gflops += gflops;
total_params += num_weights;
std::cout << "L" << i << " [conv " << k_dims.d[0] << "x" << k_dims.d[1] << " (" << groups
<< ") "
<< "/" << s_dims.d[0] << "] " << dim_in.d[3] << "x" << dim_in.d[2] << "x"
<< ") " << "/" << s_dims.d[0] << "] " << dim_in.d[3] << "x" << dim_in.d[2] << "x"
<< dim_in.d[1] << " -> " << dim_out.d[3] << "x" << dim_out.d[2] << "x"
<< dim_out.d[1];
std::cout << " weights:" << num_weights;
Expand Down Expand Up @@ -369,8 +378,7 @@ bool TrtCommon::buildEngineFromOnnx(
if (num_available_dla > 0) {
std::cout << "###" << num_available_dla << " DLAs are supported! ###" << std::endl;
} else {
std::cout << "###Warning : "
<< "No DLA is supported! ###" << std::endl;
std::cout << "###Warning : " << "No DLA is supported! ###" << std::endl;
}
config->setDefaultDeviceType(nvinfer1::DeviceType::kDLA);
config->setDLACore(build_config_->dla_core_id);
Expand Down Expand Up @@ -567,6 +575,24 @@ bool TrtCommon::setBindingDimensions(const int32_t index, const nvinfer1::Dims &
return context_->setBindingDimensions(index, dimensions);
}

#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH >= 8500
bool TrtCommon::enqueueV3(cudaStream_t stream)
{
if (build_config_->profile_per_layer) {
auto inference_start = std::chrono::high_resolution_clock::now();

bool ret = context_->enqueueV3(stream);

auto inference_end = std::chrono::high_resolution_clock::now();
host_profiler_.reportLayerTime(
"inference",
std::chrono::duration<float, std::milli>(inference_end - inference_start).count());
return ret;
}
return context_->enqueueV3(stream);
}
#endif
#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH < 10000
bool TrtCommon::enqueueV2(void ** bindings, cudaStream_t stream, cudaEvent_t * input_consumed)
{
if (build_config_->profile_per_layer) {
Expand All @@ -583,6 +609,7 @@ bool TrtCommon::enqueueV2(void ** bindings, cudaStream_t stream, cudaEvent_t * i
return context_->enqueueV2(bindings, stream, input_consumed);
}
}
#endif

void TrtCommon::printProfiling()
{
Expand Down

0 comments on commit 2be4572

Please sign in to comment.