From 31da4eeec2759c364671b82b1fb31bc1ec219c6a Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Mon, 5 Feb 2024 15:50:22 -0800 Subject: [PATCH 1/6] Add response statistics reporting and custom delays --- src/square.cc | 254 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 218 insertions(+), 36 deletions(-) diff --git a/src/square.cc b/src/square.cc index b65a823..58ea5cb 100644 --- a/src/square.cc +++ b/src/square.cc @@ -1,4 +1,4 @@ -// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -24,6 +24,7 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#include #include #include #include @@ -89,6 +90,133 @@ namespace triton { namespace backend { namespace square { } \ } while (false) +// +// ModelParameters +// +// Helper class for parsing and storing model config parameters, and help +// performing required operations on the stored parameters. +// +class ModelParameters { + public: + enum DelayType { Infer, Output }; + enum InferResultType { Success, Fail, Empty }; + + ModelParameters() + : custom_infer_delay_ns_(0), custom_output_delay_ns_(0), + custom_fail_count_(0), custom_empty_count_(0) + { + } + ModelParameters(common::TritonJson::Value& model_config_); + + void Sleep(DelayType delay_type) const; + InferResultType InferResult(size_t current_index, size_t element_count) const; + + private: + void ReadParameter( + common::TritonJson::Value& parameters_json, const std::string& key, + size_t* value) const; + void Sleep(size_t delay_ns) const; + bool IsNumber(const std::string& str) const; + + size_t custom_infer_delay_ns_; + size_t custom_output_delay_ns_; + size_t custom_fail_count_; + size_t custom_empty_count_; +}; + +ModelParameters::ModelParameters(common::TritonJson::Value& model_config_) + : ModelParameters() +{ + // Parse and store model config parameters. Any non-well-formed parameter + // will be left at its default value. + common::TritonJson::Value parameters_json; + if (model_config_.Find("parameters", ¶meters_json)) { + ReadParameter( + parameters_json, "CUSTOM_INFER_DELAY_NS", &custom_infer_delay_ns_); + ReadParameter( + parameters_json, "CUSTOM_OUTPUT_DELAY_NS", &custom_output_delay_ns_); + ReadParameter(parameters_json, "CUSTOM_FAIL_COUNT", &custom_fail_count_); + ReadParameter(parameters_json, "CUSTOM_EMPTY_COUNT", &custom_empty_count_); + } +} + +void +ModelParameters::Sleep(DelayType delay_type) const +{ + // Sleep on the requested delay type. + if (delay_type == DelayType::Infer) { + Sleep(custom_infer_delay_ns_); + } else if (delay_type == DelayType::Output) { + Sleep(custom_output_delay_ns_); + } +} + +ModelParameters::InferResultType +ModelParameters::InferResult(size_t current_index, size_t element_count) const +{ + // i.e. there are N element_count, F fail count and E empty count. Return + // empty on [N - E, N) index and fail on [N - E - F, N - E) elements. With + // proper N, F and E values, infer result will initially return success, and + // then fail, and then empty. + if (current_index + custom_fail_count_ + custom_empty_count_ < + element_count) { + // [0, N - E - F) + return ModelParameters::InferResultType::Success; + } + if (current_index + custom_empty_count_ < element_count) { + // [N - E - F, N - E) + return ModelParameters::InferResultType::Fail; + } + // [N - E, N) + return ModelParameters::InferResultType::Empty; +} + +void +ModelParameters::ReadParameter( + common::TritonJson::Value& parameters_json, const std::string& key, + size_t* value) const +{ + common::TritonJson::Value value_json; + if (parameters_json.Find(key.c_str(), &value_json)) { + std::string value_str; + if (value_json.MemberAsString("string_value", &value_str) != nullptr) { + LOG_MESSAGE( + TRITONSERVER_LOG_INFO, + (std::string("string_value cannot be parsed from ") + key + + " parameter") + .c_str()); + return; + } + if (!IsNumber(value_str)) { + LOG_MESSAGE( + TRITONSERVER_LOG_INFO, (value_str + " string_value from " + key + + " parameter is not a number") + .c_str()); + return; + } + *value = std::stoi(value_str); + } +} + +void +ModelParameters::Sleep(size_t delay_ns) const +{ + if (delay_ns > 0) { + LOG_MESSAGE( + TRITONSERVER_LOG_INFO, + (std::string("add delay ") + std::to_string(delay_ns) + " ns").c_str()); + std::this_thread::sleep_for(std::chrono::nanoseconds(delay_ns)); + } +} + +bool +ModelParameters::IsNumber(const std::string& str) const +{ + return std::find_if(str.begin(), str.end(), [](unsigned char c) { + return !std::isdigit(c); + }) == str.end(); +} + // // ModelState // @@ -107,6 +235,9 @@ class ModelState { // Validate that model configuration is supported by this backend. TRITONSERVER_Error* ValidateModelConfig(); + // Get model parameters. + const ModelParameters& get_model_parameters() { return model_parameters_; } + private: ModelState( TRITONBACKEND_Model* triton_model, @@ -114,6 +245,7 @@ class ModelState { TRITONBACKEND_Model* triton_model_; common::TritonJson::Value model_config_; + ModelParameters model_parameters_; }; TRITONSERVER_Error* @@ -146,7 +278,8 @@ ModelState::Create(TRITONBACKEND_Model* triton_model, ModelState** state) ModelState::ModelState( TRITONBACKEND_Model* triton_model, common::TritonJson::Value&& model_config) - : triton_model_(triton_model), model_config_(std::move(model_config)) + : triton_model_(triton_model), model_config_(std::move(model_config)), + model_parameters_(model_config_) { } @@ -380,47 +513,96 @@ ModelInstanceState::RequestThread( TRITONBACKEND_ResponseFactory, backend::ResponseFactoryDeleter> factory(factory_ptr); - // Copy IN->OUT, and send a response. + // Copy IN -> OUT, and send a response. const std::vector output_shape(dims_count, 1); - for (size_t e = 0; e < element_count; ++e) { - // Create the response with a single OUT output. - TRITONBACKEND_Response* response; - RESPOND_FACTORY_AND_RETURN_IF_ERROR( - factory.get(), - TRITONBACKEND_ResponseNewFromFactory(&response, factory.get())); + for (size_t e = 0; e < element_count; e++) { + // Timestamp at start of the response. + uint64_t response_start_ns; + SET_TIMESTAMP(response_start_ns); + + // Simulate compute delay, if provided. + model_state_->get_model_parameters().Sleep( + ModelParameters::DelayType::Infer); + + // Result type of the simulated inference. + ModelParameters::InferResultType result_type = + model_state_->get_model_parameters().InferResult(e, element_count); + + // Populate 'compute_output_start_ns' and 'response' if not empty result. + uint64_t compute_output_start_ns = 0; + TRITONBACKEND_Response* response = nullptr; + if (result_type != ModelParameters::InferResultType::Empty) { + // Timestamp at start of outputting compute tensors. + SET_TIMESTAMP(compute_output_start_ns); + + // Create the response with a single OUT output. + RESPOND_FACTORY_AND_RETURN_IF_ERROR( + factory.get(), + TRITONBACKEND_ResponseNewFromFactory(&response, factory.get())); - TRITONBACKEND_Output* output; - RESPOND_FACTORY_AND_RETURN_IF_ERROR( - factory.get(), TRITONBACKEND_ResponseOutput( - response, &output, "OUT", TRITONSERVER_TYPE_INT32, - output_shape.data(), dims_count)); - - // Get the output buffer. We request a buffer in CPU memory but we - // have to handle any returned type. If we get back a buffer in - // GPU memory we just fail the request. - void* output_buffer; - TRITONSERVER_MemoryType output_memory_type = TRITONSERVER_MEMORY_CPU; - int64_t output_memory_type_id = 0; - RESPOND_FACTORY_AND_RETURN_IF_ERROR( - factory.get(), TRITONBACKEND_OutputBuffer( - output, &output_buffer, sizeof(int32_t), - &output_memory_type, &output_memory_type_id)); - if (output_memory_type == TRITONSERVER_MEMORY_GPU) { + // Get response output container. + TRITONBACKEND_Output* output; RESPOND_FACTORY_AND_RETURN_IF_ERROR( - factory.get(), TRITONSERVER_ErrorNew( - TRITONSERVER_ERROR_INTERNAL, - "failed to create output buffer in CPU memory")); + factory.get(), TRITONBACKEND_ResponseOutput( + response, &output, "OUT", TRITONSERVER_TYPE_INT32, + output_shape.data(), dims_count)); + + // Get the output buffer. We request a buffer in CPU memory but we + // have to handle any returned type. If we get back a buffer in + // GPU memory we just fail the request. + void* output_buffer; + TRITONSERVER_MemoryType output_memory_type = TRITONSERVER_MEMORY_CPU; + int64_t output_memory_type_id = 0; + RESPOND_FACTORY_AND_RETURN_IF_ERROR( + factory.get(), TRITONBACKEND_OutputBuffer( + output, &output_buffer, sizeof(int32_t), + &output_memory_type, &output_memory_type_id)); + if (output_memory_type == TRITONSERVER_MEMORY_GPU) { + RESPOND_FACTORY_AND_RETURN_IF_ERROR( + factory.get(), TRITONSERVER_ErrorNew( + TRITONSERVER_ERROR_INTERNAL, + "failed to create output buffer in CPU memory")); + } + + // Copy IN -> OUT + *(reinterpret_cast(output_buffer)) = element_count; + + // Simulate output delay, if provided. + model_state_->get_model_parameters().Sleep( + ModelParameters::DelayType::Output); } - // Copy IN -> OUT - *(reinterpret_cast(output_buffer)) = element_count; + // Timestamp at end of the response. + uint64_t response_end_ns; + SET_TIMESTAMP(response_end_ns); - // Send the response. - LOG_IF_ERROR( - TRITONBACKEND_ResponseSend( - response, 0 /* flags */, nullptr /* success */), - "failed sending response"); + // Set error for simulated failure. + TRITONSERVER_Error* error = nullptr; + if (result_type == ModelParameters::InferResultType::Fail) { + error = TRITONSERVER_ErrorNew( + TRITONSERVER_ERROR_UNKNOWN, "simulated failure"); + } + + // Send response if not empty. + if (result_type != ModelParameters::InferResultType::Empty) { + LOG_IF_ERROR( + TRITONBACKEND_ResponseSend(response, 0 /* flags */, error), + "failed sending response"); + } + + // Report response statistics. + RESPOND_FACTORY_AND_RETURN_IF_ERROR( + factory.get(), + TRITONBACKEND_ModelInstanceReportResponseStatistics( + TritonModelInstance(), factory.get(), response_start_ns, + compute_output_start_ns, response_end_ns, 0 /* flags */, error)); + + // Delete error, if any. + if (error != nullptr) { + TRITONSERVER_ErrorDelete(error); + } + // Additional logs for debugging. LOG_MESSAGE( TRITONSERVER_LOG_INFO, (std::string("sent response ") + std::to_string(e + 1) + " of " + From 80195d1744ef3f0a5b8aa23e40417b80e06b600a Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Thu, 8 Feb 2024 12:04:13 -0800 Subject: [PATCH 2/6] enum capital case --- src/square.cc | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/square.cc b/src/square.cc index 58ea5cb..5c498b4 100644 --- a/src/square.cc +++ b/src/square.cc @@ -98,8 +98,8 @@ namespace triton { namespace backend { namespace square { // class ModelParameters { public: - enum DelayType { Infer, Output }; - enum InferResultType { Success, Fail, Empty }; + enum DelayType { INFER, OUTPUT }; + enum InferResultType { SUCCESS, FAIL, EMPTY }; ModelParameters() : custom_infer_delay_ns_(0), custom_output_delay_ns_(0), @@ -144,9 +144,9 @@ void ModelParameters::Sleep(DelayType delay_type) const { // Sleep on the requested delay type. - if (delay_type == DelayType::Infer) { + if (delay_type == DelayType::INFER) { Sleep(custom_infer_delay_ns_); - } else if (delay_type == DelayType::Output) { + } else if (delay_type == DelayType::OUTPUT) { Sleep(custom_output_delay_ns_); } } @@ -161,14 +161,14 @@ ModelParameters::InferResult(size_t current_index, size_t element_count) const if (current_index + custom_fail_count_ + custom_empty_count_ < element_count) { // [0, N - E - F) - return ModelParameters::InferResultType::Success; + return ModelParameters::InferResultType::SUCCESS; } if (current_index + custom_empty_count_ < element_count) { // [N - E - F, N - E) - return ModelParameters::InferResultType::Fail; + return ModelParameters::InferResultType::FAIL; } // [N - E, N) - return ModelParameters::InferResultType::Empty; + return ModelParameters::InferResultType::EMPTY; } void @@ -522,7 +522,7 @@ ModelInstanceState::RequestThread( // Simulate compute delay, if provided. model_state_->get_model_parameters().Sleep( - ModelParameters::DelayType::Infer); + ModelParameters::DelayType::INFER); // Result type of the simulated inference. ModelParameters::InferResultType result_type = @@ -531,7 +531,7 @@ ModelInstanceState::RequestThread( // Populate 'compute_output_start_ns' and 'response' if not empty result. uint64_t compute_output_start_ns = 0; TRITONBACKEND_Response* response = nullptr; - if (result_type != ModelParameters::InferResultType::Empty) { + if (result_type != ModelParameters::InferResultType::EMPTY) { // Timestamp at start of outputting compute tensors. SET_TIMESTAMP(compute_output_start_ns); @@ -569,7 +569,7 @@ ModelInstanceState::RequestThread( // Simulate output delay, if provided. model_state_->get_model_parameters().Sleep( - ModelParameters::DelayType::Output); + ModelParameters::DelayType::OUTPUT); } // Timestamp at end of the response. @@ -578,13 +578,13 @@ ModelInstanceState::RequestThread( // Set error for simulated failure. TRITONSERVER_Error* error = nullptr; - if (result_type == ModelParameters::InferResultType::Fail) { + if (result_type == ModelParameters::InferResultType::FAIL) { error = TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_UNKNOWN, "simulated failure"); } // Send response if not empty. - if (result_type != ModelParameters::InferResultType::Empty) { + if (result_type != ModelParameters::InferResultType::EMPTY) { LOG_IF_ERROR( TRITONBACKEND_ResponseSend(response, 0 /* flags */, error), "failed sending response"); From 53048e62e8bc6ce598653feb7f8b9230d20bcab5 Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Tue, 13 Feb 2024 15:49:31 -0800 Subject: [PATCH 3/6] Move API parameters into a struct --- src/square.cc | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/square.cc b/src/square.cc index 5c498b4..59d9ce6 100644 --- a/src/square.cc +++ b/src/square.cc @@ -591,11 +591,18 @@ ModelInstanceState::RequestThread( } // Report response statistics. + TRITONBACKEND_ModelInstanceResponseStatistics* response_statistics = + new TRITONBACKEND_ModelInstanceResponseStatistics(); + response_statistics->model_instance = TritonModelInstance(); + response_statistics->response_factory = factory.get(); + response_statistics->response_start = response_start_ns; + response_statistics->compute_output_start = compute_output_start_ns; + response_statistics->response_end = response_end_ns; + response_statistics->error = error; RESPOND_FACTORY_AND_RETURN_IF_ERROR( - factory.get(), - TRITONBACKEND_ModelInstanceReportResponseStatistics( - TritonModelInstance(), factory.get(), response_start_ns, - compute_output_start_ns, response_end_ns, 0 /* flags */, error)); + factory.get(), TRITONBACKEND_ModelInstanceReportResponseStatistics( + response_statistics)); + delete response_statistics; // Delete error, if any. if (error != nullptr) { From 035d1008bdff38281486d24278670e54e9e98a3c Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Wed, 14 Feb 2024 17:37:31 -0800 Subject: [PATCH 4/6] Make API parameters struct opaque --- src/square.cc | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/src/square.cc b/src/square.cc index 59d9ce6..c67c3ca 100644 --- a/src/square.cc +++ b/src/square.cc @@ -591,18 +591,39 @@ ModelInstanceState::RequestThread( } // Report response statistics. - TRITONBACKEND_ModelInstanceResponseStatistics* response_statistics = - new TRITONBACKEND_ModelInstanceResponseStatistics(); - response_statistics->model_instance = TritonModelInstance(); - response_statistics->response_factory = factory.get(); - response_statistics->response_start = response_start_ns; - response_statistics->compute_output_start = compute_output_start_ns; - response_statistics->response_end = response_end_ns; - response_statistics->error = error; + TRITONBACKEND_ModelInstanceResponseStatistics* response_statistics; + RESPOND_FACTORY_AND_RETURN_IF_ERROR( + factory.get(), + TRITONBACKEND_ModelInstanceResponseStatisticsNew(&response_statistics)); + RESPOND_FACTORY_AND_RETURN_IF_ERROR( + factory.get(), + TRITONBACKEND_ModelInstanceResponseStatisticsSetModelInstance( + response_statistics, TritonModelInstance())); + RESPOND_FACTORY_AND_RETURN_IF_ERROR( + factory.get(), + TRITONBACKEND_ModelInstanceResponseStatisticsSetResponseFactory( + response_statistics, factory.get())); + RESPOND_FACTORY_AND_RETURN_IF_ERROR( + factory.get(), + TRITONBACKEND_ModelInstanceResponseStatisticsSetResponseStart( + response_statistics, response_start_ns)); + RESPOND_FACTORY_AND_RETURN_IF_ERROR( + factory.get(), + TRITONBACKEND_ModelInstanceResponseStatisticsSetComputeOutputStart( + response_statistics, compute_output_start_ns)); + RESPOND_FACTORY_AND_RETURN_IF_ERROR( + factory.get(), + TRITONBACKEND_ModelInstanceResponseStatisticsSetResponseEnd( + response_statistics, response_end_ns)); + RESPOND_FACTORY_AND_RETURN_IF_ERROR( + factory.get(), TRITONBACKEND_ModelInstanceResponseStatisticsSetError( + response_statistics, error)); RESPOND_FACTORY_AND_RETURN_IF_ERROR( factory.get(), TRITONBACKEND_ModelInstanceReportResponseStatistics( response_statistics)); - delete response_statistics; + RESPOND_FACTORY_AND_RETURN_IF_ERROR( + factory.get(), TRITONBACKEND_ModelInstanceResponseStatisticsDelete( + response_statistics)); // Delete error, if any. if (error != nullptr) { From 54779566c6582f29eef47c7d42c0eb3baf40e2fc Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Thu, 15 Feb 2024 18:14:09 -0800 Subject: [PATCH 5/6] Add comment on where fail and empty count are from --- src/square.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/square.cc b/src/square.cc index c67c3ca..a867ae0 100644 --- a/src/square.cc +++ b/src/square.cc @@ -157,7 +157,8 @@ ModelParameters::InferResult(size_t current_index, size_t element_count) const // i.e. there are N element_count, F fail count and E empty count. Return // empty on [N - E, N) index and fail on [N - E - F, N - E) elements. With // proper N, F and E values, infer result will initially return success, and - // then fail, and then empty. + // then fail, and then empty. See the constructor for how the fail count and + // empty count are imported from the parameters on model config. if (current_index + custom_fail_count_ + custom_empty_count_ < element_count) { // [0, N - E - F) From 968431ab614356383e89e33b51f8153e92d899f0 Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Fri, 16 Feb 2024 10:43:23 -0800 Subject: [PATCH 6/6] Group response statistics api calls --- src/square.cc | 82 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 49 insertions(+), 33 deletions(-) diff --git a/src/square.cc b/src/square.cc index a867ae0..c0c8ab2 100644 --- a/src/square.cc +++ b/src/square.cc @@ -396,6 +396,11 @@ class ModelInstanceState { void RequestThread( TRITONBACKEND_ResponseFactory* factory_ptr, const size_t element_count, uint32_t dims_count); + void ReportResponseStatistics( + TRITONBACKEND_ModelInstance* model_instance, + TRITONBACKEND_ResponseFactory* factory_ptr, + const uint64_t response_start_ns, const uint64_t compute_output_start_ns, + const uint64_t response_end_ns, TRITONSERVER_Error* error) const; ModelState* model_state_; TRITONBACKEND_ModelInstance* triton_model_instance_; @@ -592,39 +597,9 @@ ModelInstanceState::RequestThread( } // Report response statistics. - TRITONBACKEND_ModelInstanceResponseStatistics* response_statistics; - RESPOND_FACTORY_AND_RETURN_IF_ERROR( - factory.get(), - TRITONBACKEND_ModelInstanceResponseStatisticsNew(&response_statistics)); - RESPOND_FACTORY_AND_RETURN_IF_ERROR( - factory.get(), - TRITONBACKEND_ModelInstanceResponseStatisticsSetModelInstance( - response_statistics, TritonModelInstance())); - RESPOND_FACTORY_AND_RETURN_IF_ERROR( - factory.get(), - TRITONBACKEND_ModelInstanceResponseStatisticsSetResponseFactory( - response_statistics, factory.get())); - RESPOND_FACTORY_AND_RETURN_IF_ERROR( - factory.get(), - TRITONBACKEND_ModelInstanceResponseStatisticsSetResponseStart( - response_statistics, response_start_ns)); - RESPOND_FACTORY_AND_RETURN_IF_ERROR( - factory.get(), - TRITONBACKEND_ModelInstanceResponseStatisticsSetComputeOutputStart( - response_statistics, compute_output_start_ns)); - RESPOND_FACTORY_AND_RETURN_IF_ERROR( - factory.get(), - TRITONBACKEND_ModelInstanceResponseStatisticsSetResponseEnd( - response_statistics, response_end_ns)); - RESPOND_FACTORY_AND_RETURN_IF_ERROR( - factory.get(), TRITONBACKEND_ModelInstanceResponseStatisticsSetError( - response_statistics, error)); - RESPOND_FACTORY_AND_RETURN_IF_ERROR( - factory.get(), TRITONBACKEND_ModelInstanceReportResponseStatistics( - response_statistics)); - RESPOND_FACTORY_AND_RETURN_IF_ERROR( - factory.get(), TRITONBACKEND_ModelInstanceResponseStatisticsDelete( - response_statistics)); + ReportResponseStatistics( + TritonModelInstance(), factory.get(), response_start_ns, + compute_output_start_ns, response_end_ns, error); // Delete error, if any. if (error != nullptr) { @@ -660,6 +635,47 @@ ModelInstanceState::RequestThread( inflight_thread_count_--; } +void +ModelInstanceState::ReportResponseStatistics( + TRITONBACKEND_ModelInstance* model_instance, + TRITONBACKEND_ResponseFactory* factory_ptr, + const uint64_t response_start_ns, const uint64_t compute_output_start_ns, + const uint64_t response_end_ns, TRITONSERVER_Error* error) const +{ + TRITONBACKEND_ModelInstanceResponseStatistics* response_statistics; + RESPOND_FACTORY_AND_RETURN_IF_ERROR( + factory_ptr, + TRITONBACKEND_ModelInstanceResponseStatisticsNew(&response_statistics)); + RESPOND_FACTORY_AND_RETURN_IF_ERROR( + factory_ptr, + TRITONBACKEND_ModelInstanceResponseStatisticsSetModelInstance( + response_statistics, model_instance)); + RESPOND_FACTORY_AND_RETURN_IF_ERROR( + factory_ptr, + TRITONBACKEND_ModelInstanceResponseStatisticsSetResponseFactory( + response_statistics, factory_ptr)); + RESPOND_FACTORY_AND_RETURN_IF_ERROR( + factory_ptr, + TRITONBACKEND_ModelInstanceResponseStatisticsSetResponseStart( + response_statistics, response_start_ns)); + RESPOND_FACTORY_AND_RETURN_IF_ERROR( + factory_ptr, + TRITONBACKEND_ModelInstanceResponseStatisticsSetComputeOutputStart( + response_statistics, compute_output_start_ns)); + RESPOND_FACTORY_AND_RETURN_IF_ERROR( + factory_ptr, TRITONBACKEND_ModelInstanceResponseStatisticsSetResponseEnd( + response_statistics, response_end_ns)); + RESPOND_FACTORY_AND_RETURN_IF_ERROR( + factory_ptr, TRITONBACKEND_ModelInstanceResponseStatisticsSetError( + response_statistics, error)); + RESPOND_FACTORY_AND_RETURN_IF_ERROR( + factory_ptr, + TRITONBACKEND_ModelInstanceReportResponseStatistics(response_statistics)); + RESPOND_FACTORY_AND_RETURN_IF_ERROR( + factory_ptr, + TRITONBACKEND_ModelInstanceResponseStatisticsDelete(response_statistics)); +} + ///////////// extern "C" {