From 107a9a3ae2c68696cc5c57ff81d1a346289bcae6 Mon Sep 17 00:00:00 2001
From: Nir David <124874956+nirda7@users.noreply.github.com>
Date: Mon, 27 Jan 2025 11:30:15 +0200
Subject: [PATCH] [SW-216666] - Add fp8 to the hpu supported quantization list
 (#739)

This is required for running the already quantized models with hpu,
using the fp8 quantization method (and not "inc").
---
 docs/source/features/quantization/supported_hardware.md | 2 +-
 vllm/platforms/hpu.py                                   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/source/features/quantization/supported_hardware.md b/docs/source/features/quantization/supported_hardware.md
index c375d044dd64b..336004525a4e0 100644
--- a/docs/source/features/quantization/supported_hardware.md
+++ b/docs/source/features/quantization/supported_hardware.md
@@ -76,7 +76,7 @@ The table below shows the compatibility of various quantization implementations
   - ✅︎
   - ✅︎
   - ✗
-  - ✗
+  - ✅︎
   - ✗
   - ✗
   - ✗
diff --git a/vllm/platforms/hpu.py b/vllm/platforms/hpu.py
index 69c445766b824..eb0b2b4ec3ee7 100644
--- a/vllm/platforms/hpu.py
+++ b/vllm/platforms/hpu.py
@@ -21,7 +21,7 @@ class HpuPlatform(Platform):
     dispatch_key: str = "HPU"
     ray_device_key: str = "HPU"
     device_control_env_var: str = "HABANA_VISIBLE_MODULES"
-    supported_quantization: list[str] = ["inc"]
+    supported_quantization: list[str] = ["fp8", "inc"]
 
     @classmethod
     def get_attn_backend_cls(cls, selected_backend: _Backend, head_size: int,