Fix profile run for multi LoRA (#549)

Fixes issue with multi LoRA during `profile_run`.
HabanaAI · Nov 26, 2024 · 0f513bd · 0f513bd
1 parent 5eb8b1f
commit 0f513bd
Showing 1 changed file with 3 additions and 3 deletions.
diff --git a/vllm/worker/hpu_model_runner.py b/vllm/worker/hpu_model_runner.py
@@ -1266,9 +1266,9 @@ def create_dummy_seq_group_metadata(self,
     def profile_run(self) -> None:
         num_layers = self.model_config.get_num_layers(self.parallel_config)
         kv_caches = [None] * num_layers
-        max_batch_size, max_seq_len = self.bucketing_ctx.get_max_prompt_shape()
-        max_seq_len = min(max_seq_len,
-                          self.max_num_batched_tokens // max_batch_size)
+        _, max_seq_len = self.bucketing_ctx.get_max_prompt_shape()
+        max_batch_size = min(self.max_num_seqs,
+                             self.max_num_batched_tokens // max_seq_len)
 
         self.warmup_scenario(max_batch_size, max_seq_len, True, kv_caches,
                              False, True)