From 0ad9b5953e6f2ae86b77b148acc22f59d9e5305b Mon Sep 17 00:00:00 2001 From: Konrad Zawora <kzawora@habana.ai> Date: Tue, 10 Dec 2024 13:32:36 +0100 Subject: [PATCH] Enable padding aware scheduling by default on HPU (#606) the title says it all really --- vllm/engine/arg_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 5a64741f3b709..8fd96aad25357 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -119,7 +119,7 @@ class EngineArgs: enable_prefix_caching: Optional[bool] = None disable_sliding_window: bool = False use_v2_block_manager: bool = True - use_padding_aware_scheduling: bool = False + use_padding_aware_scheduling: bool = current_platform.is_hpu() swap_space: float = 4 # GiB cpu_offload_gb: float = 0 # GiB gpu_memory_utilization: float = 0.90 @@ -454,7 +454,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: action='store_true', help=('Use padding-aware scheduling. If True, the scheduler ' 'will consider padded tokens in prefill. ' - 'By default this is set to False. ')) + 'By default this is set to False on non-HPU devices. ')) parser.add_argument( '--num-lookahead-slots', type=int,