Skip to content

Commit

Permalink
max batch size
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyasMoutawwakil committed Dec 11, 2024
1 parent 18db6fd commit a388001
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 13 deletions.
16 changes: 8 additions & 8 deletions optimum_benchmark/backends/tensorrt_llm/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,20 +128,20 @@ def trtllm_kwargs(self):
if self.config.gpus_per_node is not None:
kwargs["gpus_per_node"] = self.config.gpus_per_node

if self.config.use_cuda_graph is not None:
kwargs["use_cuda_graph"] = self.config.use_cuda_graph
if self.config.max_batch_size is not None:
kwargs["max_batch_size"] = self.config.max_batch_size

if self.config.optimization_level is not None:
kwargs["optimization_level"] = self.config.optimization_level
if self.config.max_new_tokens is not None:
kwargs["max_new_tokens"] = self.config.max_new_tokens

if self.config.max_prompt_length is not None:
kwargs["max_prompt_length"] = self.config.max_prompt_length

if self.config.max_new_tokens is not None:
kwargs["max_new_tokens"] = self.config.max_new_tokens
if self.config.optimization_level is not None:
kwargs["optimization_level"] = self.config.optimization_level

if self.config.max_beam_width is not None:
kwargs["max_beam_width"] = self.config.max_beam_width
if self.config.use_cuda_graph is not None:
kwargs["use_cuda_graph"] = self.config.use_cuda_graph

return kwargs

Expand Down
9 changes: 4 additions & 5 deletions optimum_benchmark/backends/tensorrt_llm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,11 @@ class TRTLLMConfig(BackendConfig):
use_fp8: Optional[bool] = None
world_size: Optional[int] = None
gpus_per_node: Optional[int] = None
use_cuda_graph: Optional[bool] = None
optimization_level: Optional[int] = None
max_prompt_length: Optional[int] = None
max_new_tokens: Optional[int] = None
max_batch_size: Optional[int] = None
max_beam_width: Optional[int] = None
max_new_tokens: Optional[int] = None
max_prompt_length: Optional[int] = None
optimization_level: Optional[int] = None
use_cuda_graph: Optional[bool] = None

def __post_init__(self) -> None:
super().__post_init__()
Expand Down

0 comments on commit a388001

Please sign in to comment.