From 107a9a3ae2c68696cc5c57ff81d1a346289bcae6 Mon Sep 17 00:00:00 2001 From: Nir David <124874956+nirda7@users.noreply.github.com> Date: Mon, 27 Jan 2025 11:30:15 +0200 Subject: [PATCH] [SW-216666] - Add fp8 to the hpu supported quantization list (#739) This is required for running the already quantized models with hpu, using the fp8 quantization method (and not "inc"). --- docs/source/features/quantization/supported_hardware.md | 2 +- vllm/platforms/hpu.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/features/quantization/supported_hardware.md b/docs/source/features/quantization/supported_hardware.md index c375d044dd64b..336004525a4e0 100644 --- a/docs/source/features/quantization/supported_hardware.md +++ b/docs/source/features/quantization/supported_hardware.md @@ -76,7 +76,7 @@ The table below shows the compatibility of various quantization implementations - ✅︎ - ✅︎ - ✗ - - ✗ + - ✅︎ - ✗ - ✗ - ✗ diff --git a/vllm/platforms/hpu.py b/vllm/platforms/hpu.py index 69c445766b824..eb0b2b4ec3ee7 100644 --- a/vllm/platforms/hpu.py +++ b/vllm/platforms/hpu.py @@ -21,7 +21,7 @@ class HpuPlatform(Platform): dispatch_key: str = "HPU" ray_device_key: str = "HPU" device_control_env_var: str = "HABANA_VISIBLE_MODULES" - supported_quantization: list[str] = ["inc"] + supported_quantization: list[str] = ["fp8", "inc"] @classmethod def get_attn_backend_cls(cls, selected_backend: _Backend, head_size: int,