Skip to content

Commit

Permalink
Experimenting with larger batch size
Browse files Browse the repository at this point in the history
  • Loading branch information
Aleks committed Apr 1, 2024
1 parent dab42df commit 5aa8f3a
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 2 deletions.
2 changes: 1 addition & 1 deletion src/wordcab_transcribe/engines/tensorrt_llm/trt_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def get_session(self, engine_dir, runtime_mapping, debug_mode=False):

# TODO: Make dynamic max_batch_size and max_beam_width
decoder_model_config = ModelConfig(
max_batch_size=64,
max_batch_size=24,
max_beam_width=1,
num_heads=self.decoder_config["num_heads"],
num_kv_heads=self.decoder_config["num_heads"],
Expand Down
2 changes: 1 addition & 1 deletion src/wordcab_transcribe/services/transcribe_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def __call__(
],
source_lang: str,
model_index: int,
batch_size: int = 64,
batch_size: int = 24,
num_beams: int = 1,
suppress_blank: bool = False,
vocab: Union[List[str], None] = None,
Expand Down

0 comments on commit 5aa8f3a

Please sign in to comment.