From d751006b385e06230ac0ed267a03831366cd9245 Mon Sep 17 00:00:00 2001
From: Lianmin Zheng <lianminzheng@gmail.com>
Date: Sun, 24 Nov 2024 07:42:43 -0800
Subject: [PATCH] Rename triton_fused_moe -> fused_moe_triton

---
 python/sglang/srt/layers/fused_moe/__init__.py              | 1 -
 python/sglang/srt/layers/fused_moe_grok/__init__.py         | 1 +
 ...N=4096,device_name=AMD_Instinct_MI300X,dtype=float8.json | 0
 ...N=8192,device_name=AMD_Instinct_MI300X,dtype=float8.json | 0
 .../srt/layers/{fused_moe => fused_moe_grok}/fused_moe.py   | 0
 .../srt/layers/{fused_moe => fused_moe_grok}/layer.py       | 6 +++---
 .../{triton_fused_moe => fused_moe_triton}/__init__.py      | 6 +++---
 ...,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json | 0
 .../E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json      | 0
 ...,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json | 0
 .../E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json       | 0
 ...,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json | 0
 ...,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json | 0
 .../E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json       | 0
 ...,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json | 0
 .../E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json       | 0
 ...,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json | 0
 .../E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json       | 0
 .../E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json      | 0
 .../E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json      | 0
 .../E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json      | 0
 ...,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json | 0
 .../E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json     | 0
 ...,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json | 0
 .../E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json      | 0
 .../E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json      | 0
 .../E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json      | 0
 ...,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json | 0
 ...,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json | 0
 ...00,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json | 0
 ...,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json | 0
 .../E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json      | 0
 ...00,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json | 0
 ...,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json | 0
 .../E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json      | 0
 ...,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json | 0
 ...00,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json | 0
 .../E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json      | 0
 .../E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3.json      | 0
 .../E=64,N=640,device_name=NVIDIA_A100-SXM4-80GB.json       | 0
 .../E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3.json       | 0
 .../E=8,N=14336,device_name=AMD_Instinct_MI300X.json        | 0
 ...36,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json | 0
 .../configs/E=8,N=1792,device_name=AMD_Instinct_MI300X.json | 0
 .../E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json       | 0
 .../E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json       | 0
 .../E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json       | 0
 .../E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json       | 0
 ...48,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json | 0
 .../E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json       | 0
 .../configs/E=8,N=3584,device_name=AMD_Instinct_MI300X.json | 0
 .../E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json       | 0
 .../E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json       | 0
 ...84,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json | 0
 .../E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json       | 0
 .../configs/E=8,N=3584,device_name=NVIDIA_L40S.json         | 0
 .../E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json       | 0
 ...96,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json | 0
 .../E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json       | 0
 .../configs/E=8,N=7168,device_name=AMD_Instinct_MI300X.json | 0
 .../E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json       | 0
 ...68,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json | 0
 .../E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json       | 0
 ...92,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json | 0
 .../{triton_fused_moe => fused_moe_triton}/configs/README   | 0
 .../{triton_fused_moe => fused_moe_triton}/fused_moe.py     | 2 +-
 .../layers/{triton_fused_moe => fused_moe_triton}/layer.py  | 4 ++--
 python/sglang/srt/layers/quantization/__init__.py           | 2 +-
 python/sglang/srt/models/dbrx.py                            | 2 +-
 python/sglang/srt/models/deepseek.py                        | 2 +-
 python/sglang/srt/models/deepseek_v2.py                     | 2 +-
 python/sglang/srt/models/grok.py                            | 2 +-
 python/sglang/srt/models/mixtral.py                         | 2 +-
 python/sglang/srt/models/olmoe.py                           | 2 +-
 python/sglang/srt/models/qwen2_moe.py                       | 2 +-
 python/sglang/srt/models/xverse_moe.py                      | 2 +-
 76 files changed, 19 insertions(+), 19 deletions(-)
 delete mode 100644 python/sglang/srt/layers/fused_moe/__init__.py
 create mode 100644 python/sglang/srt/layers/fused_moe_grok/__init__.py
 rename python/sglang/srt/layers/{fused_moe => fused_moe_grok}/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=float8.json (100%)
 rename python/sglang/srt/layers/{fused_moe => fused_moe_grok}/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=float8.json (100%)
 rename python/sglang/srt/layers/{fused_moe => fused_moe_grok}/fused_moe.py (100%)
 rename python/sglang/srt/layers/{fused_moe => fused_moe_grok}/layer.py (99%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/__init__.py (80%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=1,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=16,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=16,N=3200,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=16,N=6400,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=64,N=640,device_name=NVIDIA_A100-SXM4-80GB.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=8,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=8,N=3584,device_name=NVIDIA_L40S.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/configs/README (100%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/fused_moe.py (99%)
 rename python/sglang/srt/layers/{triton_fused_moe => fused_moe_triton}/layer.py (99%)

diff --git a/python/sglang/srt/layers/fused_moe/__init__.py b/python/sglang/srt/layers/fused_moe/__init__.py
deleted file mode 100644
index 5f7691c09..000000000
--- a/python/sglang/srt/layers/fused_moe/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from sglang.srt.layers.fused_moe.layer import FusedMoE, FusedMoEMethodBase
diff --git a/python/sglang/srt/layers/fused_moe_grok/__init__.py b/python/sglang/srt/layers/fused_moe_grok/__init__.py
new file mode 100644
index 000000000..c915c960d
--- /dev/null
+++ b/python/sglang/srt/layers/fused_moe_grok/__init__.py
@@ -0,0 +1 @@
+from sglang.srt.layers.fused_moe_grok.layer import FusedMoE, FusedMoEMethodBase
diff --git a/python/sglang/srt/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=float8.json b/python/sglang/srt/layers/fused_moe_grok/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=float8.json
similarity index 100%
rename from python/sglang/srt/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=float8.json
rename to python/sglang/srt/layers/fused_moe_grok/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=float8.json
diff --git a/python/sglang/srt/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=float8.json b/python/sglang/srt/layers/fused_moe_grok/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=float8.json
similarity index 100%
rename from python/sglang/srt/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=float8.json
rename to python/sglang/srt/layers/fused_moe_grok/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=float8.json
diff --git a/python/sglang/srt/layers/fused_moe/fused_moe.py b/python/sglang/srt/layers/fused_moe_grok/fused_moe.py
similarity index 100%
rename from python/sglang/srt/layers/fused_moe/fused_moe.py
rename to python/sglang/srt/layers/fused_moe_grok/fused_moe.py
diff --git a/python/sglang/srt/layers/fused_moe/layer.py b/python/sglang/srt/layers/fused_moe_grok/layer.py
similarity index 99%
rename from python/sglang/srt/layers/fused_moe/layer.py
rename to python/sglang/srt/layers/fused_moe_grok/layer.py
index df91ba117..89cc33d11 100644
--- a/python/sglang/srt/layers/fused_moe/layer.py
+++ b/python/sglang/srt/layers/fused_moe_grok/layer.py
@@ -20,7 +20,7 @@
 from vllm.model_executor.layers.quantization.fp8 import Fp8Config
 from vllm.model_executor.utils import set_weight_attrs
 
-from sglang.srt.layers.fused_moe.fused_moe import padding_size
+from sglang.srt.layers.fused_moe_grok.fused_moe import padding_size
 from sglang.srt.utils import is_hip
 
 logger = init_logger(__name__)
@@ -123,7 +123,7 @@ def forward_cuda(
         num_expert_group: Optional[int],
         topk_group: Optional[int],
     ) -> torch.Tensor:
-        from sglang.srt.layers.fused_moe.fused_moe import fused_moe
+        from sglang.srt.layers.fused_moe_grok.fused_moe import fused_moe
 
         return fused_moe(
             x,
@@ -609,7 +609,7 @@ def apply(
         topk_group: Optional[int] = None,
     ) -> torch.Tensor:
 
-        from sglang.srt.layers.fused_moe.fused_moe import fused_moe
+        from sglang.srt.layers.fused_moe_grok.fused_moe import fused_moe
 
         return fused_moe(
             x,
diff --git a/python/sglang/srt/layers/triton_fused_moe/__init__.py b/python/sglang/srt/layers/fused_moe_triton/__init__.py
similarity index 80%
rename from python/sglang/srt/layers/triton_fused_moe/__init__.py
rename to python/sglang/srt/layers/fused_moe_triton/__init__.py
index b2eb11835..b895b9e48 100644
--- a/python/sglang/srt/layers/triton_fused_moe/__init__.py
+++ b/python/sglang/srt/layers/fused_moe_triton/__init__.py
@@ -1,14 +1,14 @@
 from contextlib import contextmanager
 from typing import Any, Dict, Optional
 
-import sglang.srt.layers.triton_fused_moe.fused_moe  # noqa
-from sglang.srt.layers.triton_fused_moe.fused_moe import (
+import sglang.srt.layers.fused_moe_triton.fused_moe  # noqa
+from sglang.srt.layers.fused_moe_triton.fused_moe import (
     fused_experts,
     fused_topk,
     get_config_file_name,
     grouped_topk,
 )
-from sglang.srt.layers.triton_fused_moe.layer import (
+from sglang.srt.layers.fused_moe_triton.layer import (
     FusedMoE,
     FusedMoEMethodBase,
     FusedMoeWeightScaleSupported,
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=1,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=1,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=1,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=1,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=3200,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=3200,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=3200,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=3200,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=6400,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=6400,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=6400,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=6400,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=64,N=640,device_name=NVIDIA_A100-SXM4-80GB.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=64,N=640,device_name=NVIDIA_A100-SXM4-80GB.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=64,N=640,device_name=NVIDIA_A100-SXM4-80GB.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=64,N=640,device_name=NVIDIA_A100-SXM4-80GB.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=3584,device_name=NVIDIA_L40S.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_L40S.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=3584,device_name=NVIDIA_L40S.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_L40S.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json
rename to python/sglang/srt/layers/fused_moe_triton/configs/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json
diff --git a/python/sglang/srt/layers/triton_fused_moe/configs/README b/python/sglang/srt/layers/fused_moe_triton/configs/README
similarity index 100%
rename from python/sglang/srt/layers/triton_fused_moe/configs/README
rename to python/sglang/srt/layers/fused_moe_triton/configs/README
diff --git a/python/sglang/srt/layers/triton_fused_moe/fused_moe.py b/python/sglang/srt/layers/fused_moe_triton/fused_moe.py
similarity index 99%
rename from python/sglang/srt/layers/triton_fused_moe/fused_moe.py
rename to python/sglang/srt/layers/fused_moe_triton/fused_moe.py
index 8a289a5c6..4f92512b2 100644
--- a/python/sglang/srt/layers/triton_fused_moe/fused_moe.py
+++ b/python/sglang/srt/layers/fused_moe_triton/fused_moe.py
@@ -376,7 +376,7 @@ def try_get_optimal_moe_config(
     M: int,
     is_marlin: bool = False,
 ):
-    from sglang.srt.layers.triton_fused_moe import get_config
+    from sglang.srt.layers.fused_moe_triton import get_config
 
     override_config = get_config()
     if override_config:
diff --git a/python/sglang/srt/layers/triton_fused_moe/layer.py b/python/sglang/srt/layers/fused_moe_triton/layer.py
similarity index 99%
rename from python/sglang/srt/layers/triton_fused_moe/layer.py
rename to python/sglang/srt/layers/fused_moe_triton/layer.py
index 93a6e5506..d9503fe20 100644
--- a/python/sglang/srt/layers/triton_fused_moe/layer.py
+++ b/python/sglang/srt/layers/fused_moe_triton/layer.py
@@ -20,7 +20,7 @@
 from sglang.srt.utils import set_weight_attrs
 
 if torch.cuda.is_available() or torch.hip.is_available():
-    from sglang.srt.layers.triton_fused_moe.fused_moe import fused_experts
+    from sglang.srt.layers.fused_moe_triton.fused_moe import fused_experts
 else:
     fused_experts = None  # type: ignore
 
@@ -514,7 +514,7 @@ def select_experts(
         num_expert_group: Optional[int] = None,
         custom_routing_function: Optional[Callable] = None,
     ):
-        from sglang.srt.layers.triton_fused_moe.fused_moe import (
+        from sglang.srt.layers.fused_moe_triton.fused_moe import (
             fused_topk,
             grouped_topk,
         )
diff --git a/python/sglang/srt/layers/quantization/__init__.py b/python/sglang/srt/layers/quantization/__init__.py
index 584ae0d89..78d9f99b5 100644
--- a/python/sglang/srt/layers/quantization/__init__.py
+++ b/python/sglang/srt/layers/quantization/__init__.py
@@ -68,7 +68,7 @@ def fp8_get_quant_method(self, layer, prefix):
         is_layer_skipped,
     )
 
-    from sglang.srt.layers.triton_fused_moe.layer import FusedMoE
+    from sglang.srt.layers.fused_moe_triton.layer import FusedMoE
 
     if isinstance(layer, LinearBase):
         if is_layer_skipped(prefix, self.ignored_layers):
diff --git a/python/sglang/srt/models/dbrx.py b/python/sglang/srt/models/dbrx.py
index cfbf21c70..b8dad0248 100644
--- a/python/sglang/srt/models/dbrx.py
+++ b/python/sglang/srt/models/dbrx.py
@@ -28,6 +28,7 @@
 from vllm.model_executor.model_loader.weight_utils import default_weight_loader
 from vllm.transformers_utils.configs.dbrx import DbrxConfig
 
+from sglang.srt.layers.fused_moe_triton import fused_moe
 from sglang.srt.layers.linear import (
     QKVParallelLinear,
     ReplicatedLinear,
@@ -36,7 +37,6 @@
 from sglang.srt.layers.logits_processor import LogitsProcessor
 from sglang.srt.layers.quantization.base_config import QuantizationConfig
 from sglang.srt.layers.radix_attention import RadixAttention
-from sglang.srt.layers.triton_fused_moe import fused_moe
 from sglang.srt.layers.vocab_parallel_embedding import (
     DEFAULT_VOCAB_PADDING_SIZE,
     ParallelLMHead,
diff --git a/python/sglang/srt/models/deepseek.py b/python/sglang/srt/models/deepseek.py
index e8e163dfc..cdebafa2f 100644
--- a/python/sglang/srt/models/deepseek.py
+++ b/python/sglang/srt/models/deepseek.py
@@ -30,6 +30,7 @@
 from vllm.model_executor.model_loader.weight_utils import default_weight_loader
 
 from sglang.srt.layers.activation import SiluAndMul
+from sglang.srt.layers.fused_moe_triton import fused_moe
 from sglang.srt.layers.layernorm import RMSNorm
 from sglang.srt.layers.linear import (
     MergedColumnParallelLinear,
@@ -40,7 +41,6 @@
 from sglang.srt.layers.logits_processor import LogitsProcessor
 from sglang.srt.layers.quantization.base_config import QuantizationConfig
 from sglang.srt.layers.radix_attention import RadixAttention
-from sglang.srt.layers.triton_fused_moe import fused_moe
 from sglang.srt.layers.vocab_parallel_embedding import (
     ParallelLMHead,
     VocabParallelEmbedding,
diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py
index 73ab9c059..85467c12c 100644
--- a/python/sglang/srt/models/deepseek_v2.py
+++ b/python/sglang/srt/models/deepseek_v2.py
@@ -31,6 +31,7 @@
 from vllm.model_executor.model_loader.weight_utils import default_weight_loader
 
 from sglang.srt.layers.activation import SiluAndMul
+from sglang.srt.layers.fused_moe_triton import FusedMoE
 from sglang.srt.layers.layernorm import RMSNorm
 from sglang.srt.layers.linear import (
     ColumnParallelLinear,
@@ -41,7 +42,6 @@
 from sglang.srt.layers.logits_processor import LogitsProcessor
 from sglang.srt.layers.quantization.base_config import QuantizationConfig
 from sglang.srt.layers.radix_attention import RadixAttention
-from sglang.srt.layers.triton_fused_moe import FusedMoE
 from sglang.srt.layers.vocab_parallel_embedding import (
     ParallelLMHead,
     VocabParallelEmbedding,
diff --git a/python/sglang/srt/models/grok.py b/python/sglang/srt/models/grok.py
index 40f50785a..f8326c72d 100644
--- a/python/sglang/srt/models/grok.py
+++ b/python/sglang/srt/models/grok.py
@@ -31,7 +31,7 @@
 from vllm.model_executor.model_loader.loader import DefaultModelLoader
 from vllm.model_executor.model_loader.weight_utils import default_weight_loader
 
-from sglang.srt.layers.fused_moe import FusedMoE
+from sglang.srt.layers.fused_moe_grok import FusedMoE
 from sglang.srt.layers.layernorm import RMSNorm
 from sglang.srt.layers.linear import (
     QKVParallelLinear,
diff --git a/python/sglang/srt/models/mixtral.py b/python/sglang/srt/models/mixtral.py
index 46a6b6ac7..98d5ab332 100644
--- a/python/sglang/srt/models/mixtral.py
+++ b/python/sglang/srt/models/mixtral.py
@@ -25,6 +25,7 @@
 from vllm.model_executor.layers.rotary_embedding import get_rope
 from vllm.model_executor.model_loader.weight_utils import default_weight_loader
 
+from sglang.srt.layers.fused_moe_triton import FusedMoE
 from sglang.srt.layers.layernorm import RMSNorm
 from sglang.srt.layers.linear import (
     QKVParallelLinear,
@@ -35,7 +36,6 @@
 from sglang.srt.layers.quantization.base_config import QuantizationConfig
 from sglang.srt.layers.radix_attention import RadixAttention
 from sglang.srt.layers.torchao_utils import apply_torchao_config_
-from sglang.srt.layers.triton_fused_moe import FusedMoE
 from sglang.srt.layers.vocab_parallel_embedding import (
     ParallelLMHead,
     VocabParallelEmbedding,
diff --git a/python/sglang/srt/models/olmoe.py b/python/sglang/srt/models/olmoe.py
index 984638d5b..407eb98cb 100644
--- a/python/sglang/srt/models/olmoe.py
+++ b/python/sglang/srt/models/olmoe.py
@@ -38,11 +38,11 @@
 from vllm.utils import print_warning_once
 
 from sglang.srt.layers.activation import SiluAndMul
+from sglang.srt.layers.fused_moe_triton import FusedMoE
 from sglang.srt.layers.layernorm import RMSNorm
 from sglang.srt.layers.logits_processor import LogitsProcessor, LogitsProcessorOutput
 from sglang.srt.layers.quantization.base_config import QuantizationConfig
 from sglang.srt.layers.radix_attention import RadixAttention
-from sglang.srt.layers.triton_fused_moe import FusedMoE
 from sglang.srt.layers.vocab_parallel_embedding import (
     ParallelLMHead,
     VocabParallelEmbedding,
diff --git a/python/sglang/srt/models/qwen2_moe.py b/python/sglang/srt/models/qwen2_moe.py
index d363ec6a0..febd6d748 100644
--- a/python/sglang/srt/models/qwen2_moe.py
+++ b/python/sglang/srt/models/qwen2_moe.py
@@ -30,6 +30,7 @@
 from vllm.model_executor.model_loader.weight_utils import default_weight_loader
 
 from sglang.srt.layers.activation import SiluAndMul
+from sglang.srt.layers.fused_moe_triton import FusedMoE
 from sglang.srt.layers.layernorm import RMSNorm
 from sglang.srt.layers.linear import (
     MergedColumnParallelLinear,
@@ -41,7 +42,6 @@
 from sglang.srt.layers.quantization.base_config import QuantizationConfig
 from sglang.srt.layers.radix_attention import RadixAttention
 from sglang.srt.layers.torchao_utils import apply_torchao_config_
-from sglang.srt.layers.triton_fused_moe import FusedMoE
 from sglang.srt.layers.vocab_parallel_embedding import (
     ParallelLMHead,
     VocabParallelEmbedding,
diff --git a/python/sglang/srt/models/xverse_moe.py b/python/sglang/srt/models/xverse_moe.py
index 8cdd4c570..c6458f7f5 100644
--- a/python/sglang/srt/models/xverse_moe.py
+++ b/python/sglang/srt/models/xverse_moe.py
@@ -34,10 +34,10 @@
 from vllm.model_executor.layers.rotary_embedding import get_rope
 from vllm.model_executor.model_loader.weight_utils import default_weight_loader
 
+from sglang.srt.layers.fused_moe_triton import fused_moe
 from sglang.srt.layers.logits_processor import LogitsProcessor
 from sglang.srt.layers.quantization.base_config import QuantizationConfig
 from sglang.srt.layers.radix_attention import RadixAttention
-from sglang.srt.layers.triton_fused_moe import fused_moe
 from sglang.srt.layers.vocab_parallel_embedding import (
     ParallelLMHead,
     VocabParallelEmbedding,