From 3b928de0225828d3125d09430d42ed85d97dbaff Mon Sep 17 00:00:00 2001 From: Michal Adamczyk Date: Thu, 14 Nov 2024 11:03:36 +0100 Subject: [PATCH] Revert "do not use softmax fast mode in FusedSDPA (#26)" This reverts commit 4e911b4f358767b3c57ea7d182dd8393f6e7d3e7. --- vllm_hpu_extension/ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm_hpu_extension/ops.py b/vllm_hpu_extension/ops.py index c024af6b..97ef7923 100644 --- a/vllm_hpu_extension/ops.py +++ b/vllm_hpu_extension/ops.py @@ -223,7 +223,7 @@ def prompt_attention( if query_heads != kv_heads: key = repeat_kv(key, int(query_heads // kv_heads)) value = repeat_kv(value, int(query_heads // kv_heads)) - softmax_mode = 'None' + softmax_mode = 'fast' recompute_mode = True attn_weights = FusedSDPA.apply(query, key, value, None, 0.0, True, scale, softmax_mode, recompute_mode,