From 377248e8db874439e9a83be1c41e6f8e0201bd0e Mon Sep 17 00:00:00 2001 From: kee hyun an Date: Tue, 5 Nov 2024 09:31:38 +0900 Subject: [PATCH] chore: setting for test --- core/runtime/TRTEngine.cpp | 4 ---- core/runtime/execute_engine.cpp | 4 ---- core/runtime/register_jit_hooks.cpp | 2 +- .../dynamo/runtime/_PythonTorchTensorRTModule.py | 5 ++++- py/torch_tensorrt/dynamo/runtime/_TorchTensorRTModule.py | 5 ++++- 5 files changed, 9 insertions(+), 11 deletions(-) diff --git a/core/runtime/TRTEngine.cpp b/core/runtime/TRTEngine.cpp index ba78c30a90..adc21bd496 100644 --- a/core/runtime/TRTEngine.cpp +++ b/core/runtime/TRTEngine.cpp @@ -296,10 +296,6 @@ int64_t TRTEngine::get_automatic_device_memory_budget() { return cuda_engine->getWeightStreamingAutomaticBudget(); } -void TRTEngine::set_pre_allocated_outputs(bool enable) { - use_pre_allocated_outputs = enable; -} - std::string TRTEngine::to_str() const { // clang-format off std::stringstream ss; diff --git a/core/runtime/execute_engine.cpp b/core/runtime/execute_engine.cpp index f7ba509494..682d56ab3d 100644 --- a/core/runtime/execute_engine.cpp +++ b/core/runtime/execute_engine.cpp @@ -5,7 +5,6 @@ #include "torch/csrc/jit/runtime/custom_operator.h" #include "torch/torch.h" -#include #include "core/runtime/TRTEngineProfiler.h" #include "core/runtime/runtime.h" #include "core/util/prelude.h" @@ -200,7 +199,6 @@ std::vector execute_engine(std::vector inputs, c10::intr { // Input Setup std::unique_ptr input_profiler_guard; - RECORD_FUNCTION("process input", std::vector()); if (compiled_engine->profile_execution) { input_profiler_guard = std::make_unique(compiled_engine->input_profile_path); @@ -282,7 +280,6 @@ std::vector execute_engine(std::vector inputs, c10::intr { // Output Setup std::unique_ptr output_profiler_guard; - RECORD_FUNCTION("process output", std::vector()); if (compiled_engine->profile_execution) { output_profiler_guard = std::make_unique(compiled_engine->output_profile_path); @@ -331,7 +328,6 @@ std::vector execute_engine(std::vector inputs, c10::intr std::unique_lock lock(compiled_engine->mu); { // Engine Execution (execute on engine stream) - RECORD_FUNCTION("Trt runtime", std::vector()); c10::cuda::CUDAStreamGuard stream_guard(compiled_engine->engine_stream); std::unique_ptr enqueue_profiler_guard; diff --git a/core/runtime/register_jit_hooks.cpp b/core/runtime/register_jit_hooks.cpp index 2918cee367..a09b99dbfc 100644 --- a/core/runtime/register_jit_hooks.cpp +++ b/core/runtime/register_jit_hooks.cpp @@ -86,7 +86,7 @@ static auto TORCHTRT_UNUSED TRTEngineTSRegistrtion = .def("dump_engine_layer_info_to_file", &TRTEngine::dump_engine_layer_info_to_file) .def("dump_engine_layer_info", &TRTEngine::dump_engine_layer_info) .def("get_engine_layer_info", &TRTEngine::get_engine_layer_info) - .def("set_pre_allocated_outputs", &TRTEngine::set_pre_allocated_outputs) + .def_readwrite("use_pre_allocated_outputs", &TRTEngine::use_pre_allocated_outputs) .def_property( "device_memory_budget", &TRTEngine::get_device_memory_budget, diff --git a/py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py b/py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py index 17a38c716d..afb67d1165 100644 --- a/py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py +++ b/py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py @@ -109,7 +109,7 @@ def __init__( self.target_platform = Platform.current_platform() self.cudagraphs_enabled = False self.pre_allocated_outputs: List[torch.Tensor] = [] - self.use_pre_allocated_outputs = False + self.use_pre_allocated_outputs = True if self.serialized_engine is not None and not self.settings.lazy_engine_init: self.setup_engine() @@ -248,6 +248,9 @@ def create_output_tensors(self) -> List[torch.Tensor]: outputs.append(output) return outputs + def set_output_opt(self, enable: bool) -> None: + self.use_pre_allocated_outputs = enable + def forward(self, *inputs: torch.Tensor) -> torch.Tensor | Tuple[torch.Tensor, ...]: # Ensure inputs are available in all scopes and cast symbolic integers to Tensors contiguous_inputs: List[torch.Tensor] = [ diff --git a/py/torch_tensorrt/dynamo/runtime/_TorchTensorRTModule.py b/py/torch_tensorrt/dynamo/runtime/_TorchTensorRTModule.py index 99f863f1da..b3ec3258f0 100644 --- a/py/torch_tensorrt/dynamo/runtime/_TorchTensorRTModule.py +++ b/py/torch_tensorrt/dynamo/runtime/_TorchTensorRTModule.py @@ -207,7 +207,7 @@ def setup_engine(self) -> None: if self.engine is not None: return self.engine = torch.classes.tensorrt.Engine(self._pack_engine_info()) - self.engine.set_pre_allocated_outputs(True) + self.set_output_opt(True) def encode_metadata(self, metadata: Any) -> str: metadata = copy.deepcopy(metadata) @@ -272,6 +272,9 @@ def set_extra_state(self, state: SerializedTorchTensorRTModuleFmt) -> None: self.input_binding_names = state[2] self.output_binding_names = state[3] + def set_output_opt(self, enable: bool) -> None: + self.engine.use_pre_allocated_outputs = enable + def forward(self, *inputs: Any) -> torch.Tensor | Tuple[torch.Tensor, ...]: """Implementation of the forward pass for a TensorRT engine