Skip to content

Commit

Permalink
AMDGPU: MC support for v_cvt_scalef32_pk32_f32_[fp|bf]6 of gfx950
Browse files Browse the repository at this point in the history
Co-authored-by: Pravin Jagtap <[email protected]>
  • Loading branch information
pravinjagtap authored and arsenm committed Nov 25, 2024
1 parent 9de73b2 commit 57317b1
Show file tree
Hide file tree
Showing 10 changed files with 87 additions and 5 deletions.
14 changes: 12 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -402,11 +402,17 @@ def FeatureFP4ConversionScaleInsts : SubtargetFeature<"fp4-cvt-scale-insts",
"Has fp4 conversion scale instructions"
>;

def FeatureFP6BF6ConversionScaleInsts : SubtargetFeature<"fp6bf6-cvt-scale-insts",
"HasFP6BF6ConversionScaleInsts",
"true",
"Has fp6 and bf6 conversion scale instructions"
>;

def FeatureGFX950Insts : SubtargetFeature<"gfx950-insts",
"GFX950Insts",
"true",
"Additional instructions for GFX950+",
[FeaturePermlane16Swap, FeaturePermlane32Swap, FeatureFP8ConversionScaleInsts, FeatureBF8ConversionScaleInsts, FeatureFP4ConversionScaleInsts]
[FeaturePermlane16Swap, FeaturePermlane32Swap, FeatureFP8ConversionScaleInsts, FeatureBF8ConversionScaleInsts, FeatureFP4ConversionScaleInsts, FeatureFP6BF6ConversionScaleInsts]
>;

def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts",
Expand Down Expand Up @@ -1552,7 +1558,8 @@ def FeatureISAVersion9_5_Common : FeatureSet<
FeatureBitOp3Insts,
FeatureFP8ConversionScaleInsts,
FeatureBF8ConversionScaleInsts,
FeatureFP4ConversionScaleInsts
FeatureFP4ConversionScaleInsts,
FeatureFP6BF6ConversionScaleInsts
])>;

def FeatureISAVersion9_4_0 : FeatureSet<
Expand Down Expand Up @@ -2435,6 +2442,9 @@ def HasBF8ConversionScaleInsts : Predicate<"Subtarget->hasBF8ConversionScaleInst
def HasFP4ConversionScaleInsts : Predicate<"Subtarget->hasFP4ConversionScaleInsts()">,
AssemblerPredicate<(all_of FeatureFP4ConversionScaleInsts)>;

def HasFP6BF6ConversionScaleInsts : Predicate<"Subtarget->hasFP6BF6ConversionScaleInsts()">,
AssemblerPredicate<(all_of FeatureFP6BF6ConversionScaleInsts)>;

def HasGDS : Predicate<"Subtarget->hasGDS()">;

def HasGWS : Predicate<"Subtarget->hasGWS()">;
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ class AMDGPUSubtarget {
bool HasFP8ConversionScaleInsts = false;
bool HasBF8ConversionScaleInsts = false;
bool HasFP4ConversionScaleInsts = false;
bool HasFP6BF6ConversionScaleInsts = false;
bool EnableRealTrue16Insts = false;
bool HasBF16ConversionInsts = false;
bool HasMadMixInsts = false;
Expand Down Expand Up @@ -184,6 +185,8 @@ class AMDGPUSubtarget {

bool hasFP4ConversionScaleInsts() const { return HasFP4ConversionScaleInsts; }

bool hasFP6BF6ConversionScaleInsts() const { return HasFP6BF6ConversionScaleInsts; }

bool hasMadMacF32Insts() const {
return HasMadMacF32Insts || !isGCN();
}
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1530,6 +1530,7 @@ unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const {
case OPWV232: return VReg_64RegClassID;
case OPW96: return VReg_96RegClassID;
case OPW128: return VReg_128RegClassID;
case OPW192: return VReg_192RegClassID;
case OPW160: return VReg_160RegClassID;
case OPW256: return VReg_256RegClassID;
case OPW288: return VReg_288RegClassID;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ class AMDGPUDisassembler : public MCDisassembler {
OPW96,
OPW128,
OPW160,
OPW192,
OPW256,
OPW288,
OPW320,
Expand Down
10 changes: 8 additions & 2 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1696,7 +1696,8 @@ class getVALUDstForVT<ValueType VT, bit IsTrue16 = 0, bit IsVOP3Encoding = 0> {
defvar op16 = !if(IsTrue16, !if (IsVOP3Encoding, VOPDstOperand_t16,
VOPDstOperand_t16Lo128),
VOPDstOperand<VGPR_32>);
RegisterOperand ret = !cond(!eq(VT.Size, 256) : VOPDstOperand<VReg_256>,
RegisterOperand ret = !cond(!eq(VT.Size, 1024) : VOPDstOperand<VReg_1024>,
!eq(VT.Size, 256) : VOPDstOperand<VReg_256>,
!eq(VT.Size, 128) : VOPDstOperand<VReg_128>,
!eq(VT.Size, 64) : VOPDstOperand<VReg_64>,
!eq(VT.Size, 32) : VOPDstOperand<VGPR_32>,
Expand Down Expand Up @@ -1752,7 +1753,8 @@ class getSOPSrcForVT<ValueType VT> {
// Returns the vreg register class to use for source operand given VT
class getVregSrcForVT<ValueType VT, bit IsTrue16 = 0, bit IsFake16 = 1> {
RegisterOperand ret =
!cond(!eq(VT.Size, 128) : RegisterOperand<VReg_128>,
!cond(!eq(VT.Size, 192) : RegisterOperand<VReg_192>,
!eq(VT.Size, 128) : RegisterOperand<VReg_128>,
!eq(VT.Size, 96) : RegisterOperand<VReg_96>,
!eq(VT.Size, 64) : RegisterOperand<VReg_64>,
!eq(VT.Size, 48) : RegisterOperand<VReg_64>,
Expand Down Expand Up @@ -1785,6 +1787,7 @@ class getVOP3SrcForVT<ValueType VT, bit IsTrue16 = 0> {
!eq(VT, v2i16) : VSrc_v2b16,
!eq(VT, v4f16) : AVSrc_64,
!eq(VT, v4bf16) : AVSrc_64,
!eq(VT.Size, 192) : VRegSrc_192,
!eq(VT.Size, 128) : VRegSrc_128,
!eq(VT.Size, 96) : VRegSrc_96,
!eq(VT.Size, 64) : VSrc_b64,
Expand Down Expand Up @@ -2828,6 +2831,9 @@ def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], /*EnableClamp=
def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>;
def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>;
def VOP_V2BF16_F32_F32 : VOPProfile <[v2bf16, f32, f32, untyped]>;
def VOP_V32F32_V6I32_F32 : VOPProfile <[v32f32, v6i32, f32, untyped]>;
def VOP_V32F16_V6I32_F32 : VOPProfile <[v32f16, v6i32, f32, untyped]>;
def VOP_V32BF16_V6I32_F32 : VOPProfile <[v32bf16, v6i32, f32, untyped]>;

def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1249,6 +1249,7 @@ def VRegSrc_32 : SrcReg9<VGPR_32, "OPW32">;
def VRegSrc_64 : SrcReg9<VReg_64, "OPW64">;
def VRegSrc_96 : SrcReg9<VReg_96, "OPW96">;
def VRegSrc_128: SrcReg9<VReg_128, "OPW128">;
def VRegSrc_192: SrcReg9<VReg_192, "OPW192">;
def VRegSrc_256: SrcReg9<VReg_256, "OPW256">;
def VRegOrLdsSrc_32 : SrcReg9<VRegOrLds_32, "OPW32">;

Expand Down
22 changes: 22 additions & 0 deletions llvm/lib/Target/AMDGPU/VOP3Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -925,6 +925,19 @@ def VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile : VOP3_Profile<VOPProfile<[i32, v2f
let HasOMod = 0;
}

class VOP3_CVT_SCALEF32_PK_F864_Profile<VOPProfile P> : VOP3_Profile<P> {
let HasModifiers = 0;
let HasSrc0IntMods = 0;
let HasSrc1IntMods = 0;
let HasOMod = 0;
let HasOpSel = 0;
let HasClamp = 0;
let HasExtDPP = 0;
let HasExt32BitDPP = 0;
let HasExtVOP3DPP = 0;
let HasExt64BitDPP = 0;
}

let SubtargetPredicate = HasFP8ConversionScaleInsts, mayRaiseFPException = 0 in {
defm V_CVT_SCALEF32_F16_FP8 : VOP3Inst<"v_cvt_scalef32_f16_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f16>>;
defm V_CVT_SCALEF32_F32_FP8 : VOP3Inst<"v_cvt_scalef32_f32_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f32>>;
Expand All @@ -950,6 +963,11 @@ let SubtargetPredicate = HasFP4ConversionScaleInsts, mayRaiseFPException = 0 in
defm V_CVT_SCALEF32_PK_BF16_FP4 : VOP3Inst<"v_cvt_scalef32_pk_bf16_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2bf16>>;
}

let SubtargetPredicate = HasFP6BF6ConversionScaleInsts, mayRaiseFPException = 0 in {
defm V_CVT_SCALEF32_PK32_F32_FP6 : VOP3Inst<"v_cvt_scalef32_pk32_f32_fp6", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V32F32_V6I32_F32>>;
defm V_CVT_SCALEF32_PK32_F32_BF6 : VOP3Inst<"v_cvt_scalef32_pk32_f32_bf6", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V32F32_V6I32_F32>>;
}

let SubtargetPredicate = isGFX10Plus in {
let isCommutable = 1, isReMaterializable = 1 in {
defm V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
Expand Down Expand Up @@ -1894,3 +1912,7 @@ defm V_CVT_SCALEF32_PK_FP4_F32 : VOP3OpSel_Real_gfx9 <0x23d>;
defm V_CVT_SCALEF32_PK_F16_FP4 : VOP3OpSel_Real_gfx9 <0x250>;
defm V_CVT_SCALEF32_PK_BF16_FP4 : VOP3OpSel_Real_gfx9 <0x251>;
}
let OtherPredicates = [HasFP6BF6ConversionScaleInsts] in {
defm V_CVT_SCALEF32_PK32_F32_FP6 : VOP3_Real_gfx9<0x256, "v_cvt_scalef32_pk32_f32_fp6">;
defm V_CVT_SCALEF32_PK32_F32_BF6 : VOP3_Real_gfx9<0x257, "v_cvt_scalef32_pk32_f32_bf6">;
}
10 changes: 9 additions & 1 deletion llvm/test/MC/AMDGPU/gfx950_asm_features.s
Original file line number Diff line number Diff line change
Expand Up @@ -884,4 +884,12 @@ v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[1,1,0]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x51,0xd2,0x02,0x06,0x01,0x00]
v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,1,0]
v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,1,0]

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6 ; encoding: [0x02,0x00,0x56,0xd2,0x02,0x0d,0x02,0x00]
v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6 ; encoding: [0x02,0x00,0x57,0xd2,0x02,0x0d,0x02,0x00]
v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6
24 changes: 24 additions & 0 deletions llvm/test/MC/AMDGPU/gfx950_err.s
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,27 @@ v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 div:2

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 clamp div:2

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6 clamp

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6 mul:2

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6 div:2

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6 clamp div:2

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6 clamp

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6 mul:2

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6 div:2

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6 clamp div:2
6 changes: 6 additions & 0 deletions llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
Original file line number Diff line number Diff line change
Expand Up @@ -611,3 +611,9 @@

# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x51,0xd2,0x02,0x06,0x01,0x00]
0x01,0x18,0x51,0xd2,0x02,0x06,0x01,0x00

# GFX950: v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6 ; encoding: [0x02,0x00,0x56,0xd2,0x02,0x0d,0x02,0x00]
0x02,0x00,0x56,0xd2,0x02,0x0d,0x02,0x00

# GFX950: v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6 ; encoding: [0x02,0x00,0x57,0xd2,0x02,0x0d,0x02,0x00]
0x02,0x00,0x57,0xd2,0x02,0x0d,0x02,0x00

0 comments on commit 57317b1

Please sign in to comment.