Experimenting with larger batch size

Wordcab · Apr 1, 2024 · 5aa8f3a · 5aa8f3a
1 parent dab42df
commit 5aa8f3a
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 2 deletions.
diff --git a/src/wordcab_transcribe/engines/tensorrt_llm/trt_model.py b/src/wordcab_transcribe/engines/tensorrt_llm/trt_model.py
@@ -93,7 +93,7 @@ def get_session(self, engine_dir, runtime_mapping, debug_mode=False):
 
         # TODO: Make dynamic max_batch_size and max_beam_width
         decoder_model_config = ModelConfig(
-            max_batch_size=64,
+            max_batch_size=24,
             max_beam_width=1,
             num_heads=self.decoder_config["num_heads"],
             num_kv_heads=self.decoder_config["num_heads"],

diff --git a/src/wordcab_transcribe/services/transcribe_service.py b/src/wordcab_transcribe/services/transcribe_service.py
@@ -126,7 +126,7 @@ def __call__(
         ],
         source_lang: str,
         model_index: int,
-        batch_size: int = 64,
+        batch_size: int = 24,
         num_beams: int = 1,
         suppress_blank: bool = False,
         vocab: Union[List[str], None] = None,