Skip to content

Commit

Permalink
Enable padding aware scheduling by default on HPU (#606)
Browse files Browse the repository at this point in the history
the title says it all really
  • Loading branch information
kzawora-intel authored Dec 10, 2024
1 parent 239739c commit 0ad9b59
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions vllm/engine/arg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ class EngineArgs:
enable_prefix_caching: Optional[bool] = None
disable_sliding_window: bool = False
use_v2_block_manager: bool = True
use_padding_aware_scheduling: bool = False
use_padding_aware_scheduling: bool = current_platform.is_hpu()
swap_space: float = 4 # GiB
cpu_offload_gb: float = 0 # GiB
gpu_memory_utilization: float = 0.90
Expand Down Expand Up @@ -454,7 +454,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
action='store_true',
help=('Use padding-aware scheduling. If True, the scheduler '
'will consider padded tokens in prefill. '
'By default this is set to False. '))
'By default this is set to False on non-HPU devices. '))
parser.add_argument(
'--num-lookahead-slots',
type=int,
Expand Down

0 comments on commit 0ad9b59

Please sign in to comment.