Skip to content

Commit

Permalink
AMDGPU: Add MC support for gfx950 V_BITOP3_B32/B16
Browse files Browse the repository at this point in the history
Co-authored-by: Pravin Jagtap <[email protected]>
  • Loading branch information
pravinjagtap authored and arsenm committed Nov 25, 2024
1 parent e97fb22 commit 174f311
Show file tree
Hide file tree
Showing 9 changed files with 183 additions and 9 deletions.
12 changes: 11 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -997,6 +997,12 @@ def FeatureVmemWriteVgprInOrder : SubtargetFeature<"vmem-write-vgpr-in-order",
"VMEM instructions of the same type write VGPR results in order"
>;

def FeatureBitOp3Insts : SubtargetFeature<"bitop3-insts",
"HasBitOp3Insts",
"true",
"Has v_bitop3_b32/v_bitop3_b16 instructions"
>;

def FeaturePrngInst : SubtargetFeature<"prng-inst",
"HasPrngInst",
"true",
Expand Down Expand Up @@ -1524,7 +1530,8 @@ def FeatureISAVersion9_5_Common : FeatureSet<
FeatureCvtFP8VOP1Bug,
FeatureGFX950Insts,
FeaturePrngInst,
FeatureBF16ConversionInsts
FeatureBF16ConversionInsts,
FeatureBitOp3Insts
])>;

def FeatureISAVersion9_4_0 : FeatureSet<
Expand Down Expand Up @@ -2392,6 +2399,9 @@ def HasSALUFloatInsts : Predicate<"Subtarget->hasSALUFloatInsts()">,
def HasPseudoScalarTrans : Predicate<"Subtarget->hasPseudoScalarTrans()">,
AssemblerPredicate<(all_of FeaturePseudoScalarTrans)>;

def HasBitOp3Insts : Predicate<"Subtarget->hasBitOp3Insts()">,
AssemblerPredicate<(all_of FeatureBitOp3Insts)>;

def HasPrngInst : Predicate<"Subtarget->hasPrngInst()">,
AssemblerPredicate<(all_of FeaturePrngInst)>;

Expand Down
25 changes: 25 additions & 0 deletions llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
ImmTyWaitVAVDst,
ImmTyWaitVMVSrc,
ImmTyByteSel,
ImmTyBitOp3,
};

// Immediate operand kind.
Expand Down Expand Up @@ -410,6 +411,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
bool isNegLo() const { return isImmTy(ImmTyNegLo); }
bool isNegHi() const { return isImmTy(ImmTyNegHi); }
bool isBitOp3() const { return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); }

bool isRegOrImm() const {
return isReg() || isImm();
Expand Down Expand Up @@ -1138,6 +1140,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
case ImmTyByteSel: OS << "ByteSel" ; break;
case ImmTyBitOp3: OS << "BitOp3"; break;
}
// clang-format on
}
Expand Down Expand Up @@ -1913,6 +1916,9 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
ParseStatus parseEndpgm(OperandVector &Operands);

ParseStatus parseVOPD(OperandVector &Operands);

ParseStatus parseBitOp3(OperandVector &Operands);
AMDGPUOperand::Ptr defaultBitOp3() const;
};

} // end anonymous namespace
Expand Down Expand Up @@ -8841,6 +8847,11 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
Inst.addOperand(Inst.getOperand(0));
}

int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::bitop3);
if (BitOp3Idx != -1) {
addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
}

// FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
// instruction, and then figure out where to actually put the modifiers

Expand Down Expand Up @@ -9748,6 +9759,20 @@ ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {

bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }

//===----------------------------------------------------------------------===//
// BITOP3
//===----------------------------------------------------------------------===//

ParseStatus AMDGPUAsmParser::parseBitOp3(OperandVector &Operands) {
ParseStatus Res =
parseIntWithPrefix("bitop3", Operands, AMDGPUOperand::ImmTyBitOp3);
return Res;
}

AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBitOp3() const {
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBitOp3);
}

//===----------------------------------------------------------------------===//
// Split Barrier
//===----------------------------------------------------------------------===//
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasSALUFloatInsts = false;
bool HasPseudoScalarTrans = false;
bool HasRestrictedSOffset = false;
bool HasBitOp3Insts = false;
bool HasPrngInst = false;
bool HasPermlane16Swap = false;
bool HasPermlane32Swap = false;
Expand Down Expand Up @@ -1321,6 +1322,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// \returns true if the target has instructions with xf32 format support.
bool hasXF32Insts() const { return HasXF32Insts; }

bool hasBitOp3Insts() const { return HasBitOp3Insts; }

bool hasPermlane16Swap() const { return HasPermlane16Swap; }
bool hasPermlane32Swap() const { return HasPermlane32Swap; }

Expand Down
14 changes: 14 additions & 0 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1714,4 +1714,18 @@ void AMDGPUInstPrinter::printNamedInt(const MCInst *MI, unsigned OpNo,
O << ' ' << Prefix << ':' << (PrintInHex ? formatHex(V) : formatDec(V));
}

void AMDGPUInstPrinter::printBitOp3(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
uint8_t Imm = MI->getOperand(OpNo).getImm();
if (!Imm)
return;

O << " bitop3:";
if (Imm <= 10)
O << formatDec(Imm);
else
O << formatHex(static_cast<uint64_t>(Imm));
}

#include "AMDGPUGenAsmWriter.inc"
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,9 @@ class AMDGPUInstPrinter : public MCInstPrinter {
const MCSubtargetInfo &STI, raw_ostream &O,
StringRef Prefix, bool PrintInHex, bool AlwaysPrint);

void printBitOp3(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);

public:
static void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O,
StringRef Asm, StringRef Default = "");
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1271,6 +1271,9 @@ def ByteSel : NamedIntOperand<"byte_sel"> {
let Validator = "isUInt<2>";
}

def BitOp3 : CustomOperand<i8, 1, "BitOp3">;
def bitop3_0 : DefaultOperand<BitOp3, 0>;

class KImmFPOperand<ValueType vt> : ImmOperand<vt> {
let OperandNamespace = "AMDGPU";
let OperandType = "OPERAND_KIMM"#vt.Size;
Expand Down
48 changes: 48 additions & 0 deletions llvm/lib/Target/AMDGPU/VOP3Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -856,6 +856,24 @@ class PermlaneVarPat<SDPatternOperator permlane,
VGPR_32:$src1, VGPR_32:$vdst_in)
>;

class VOP3_BITOP3_Profile<VOPProfile pfl, VOP3Features f> : VOP3_Profile<pfl, f> {
let HasClamp = 0;
let HasOMod = 0;
let HasModifiers = 0;

let Ins64 = !con(getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
0 /* HasIntClamp */, HasModifiers, HasSrc2Mods,
HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret,
(ins bitop3_0:$bitop3));

let InsVOP3OpSel = !con(getInsVOP3Base<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs, 0, 1, 1, 0,
Src0Mod, Src1Mod, Src2Mod, 0>.ret,
(ins bitop3_0:$bitop3, op_sel0:$op_sel));

let Asm64 = "$vdst, $src0, $src1, $src2$bitop3";
let AsmVOP3OpSel = !subst("$op_sel", "$bitop3$op_sel", getAsmVOP3OpSel<3, 0, 0, 0, 0, 0>.ret);
}

let SubtargetPredicate = isGFX10Plus in {
let isCommutable = 1, isReMaterializable = 1 in {
defm V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
Expand Down Expand Up @@ -908,6 +926,16 @@ let SubtargetPredicate = isGFX12Plus in {

} // End SubtargetPredicate = isGFX12Plus

let SubtargetPredicate = HasBitOp3Insts in {
let isReMaterializable = 1 in {
defm V_BITOP3_B16 : VOP3Inst <"v_bitop3_b16",
VOP3_BITOP3_Profile<VOPProfile_True16<VOPProfile <[i16, i16, i16, i16, i8]>>,
VOP3_OPSEL>>;
defm V_BITOP3_B32 : VOP3Inst <"v_bitop3_b32",
VOP3_BITOP3_Profile<VOPProfile <[i32, i32, i32, i32, i8]>, VOP3_REGULAR>>;
}
} // End SubtargetPredicate = HasBitOp3Insts

class DivFmasPat<ValueType vt, Instruction inst, Register CondReg> : GCNPat<
(AMDGPUdiv_fmas (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
(vt (VOP3Mods vt:$src1, i32:$src1_modifiers)),
Expand Down Expand Up @@ -1606,6 +1634,23 @@ multiclass VOP3_Real_gfx9<bits<10> op, string AsmName> {
}
}

multiclass VOP3_Real_BITOP3_gfx9<bits<10> op, string AsmName, bit isSingle = 0> {
defvar ps = !cast<VOP_Pseudo>(NAME#"_e64");
let IsSingle = !or(isSingle, ps.Pfl.IsSingle) in {
def _gfx9 : VOP3_Real<!cast<VOP_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX9>,
VOP3e_vi <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl> {
let AsmString = AsmName # ps.AsmOperands;
bits<8> bitop3;
let Inst{60-59} = bitop3{7-6};
let Inst{10-8} = bitop3{5-3};
let Inst{63-61} = bitop3{2-0};
let Inst{11} = !if(ps.Pfl.HasOpSel, src0_modifiers{2}, 0);
let Inst{12} = !if(ps.Pfl.HasOpSel, src1_modifiers{2}, 0);
let Inst{13} = !if(ps.Pfl.HasOpSel, src2_modifiers{2}, 0);
let Inst{14} = !if(ps.Pfl.HasOpSel, src0_modifiers{3}, 0);
}
}
}
} // End AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9"

defm V_MAD_U64_U32 : VOP3be_Real_vi <0x1E8>;
Expand Down Expand Up @@ -1748,3 +1793,6 @@ defm V_CVT_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x2a3>;
defm V_CVT_PK_BF16_F32: VOP3OpSel_Real_gfx9 <0x268>;
defm V_CVT_SR_FP8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a4>;
defm V_CVT_SR_BF8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a5>;

defm V_BITOP3_B16 : VOP3_Real_BITOP3_gfx9<0x233, "v_bitop3_b16">;
defm V_BITOP3_B32 : VOP3_Real_BITOP3_gfx9<0x234, "v_bitop3_b32">;
66 changes: 58 additions & 8 deletions llvm/test/MC/AMDGPU/gfx950_asm_vop3.s
Original file line number Diff line number Diff line change
@@ -1,26 +1,76 @@
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=GFX950 %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck -check-prefix=GFX940-ERR --strict-whitespace %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx906 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX906-ERR %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx940 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX940-ERR %s
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx950 -show-encoding < %s | FileCheck --check-prefix=GFX950 %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX12-ERR %s

v_cvt_pk_bf16_f32 v5, v1, v2
// GFX906-ERR: error: instruction not supported on this GPU
// GFX940-ERR: error: instruction not supported on this GPU
// GFX950: v_cvt_pk_bf16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x68,0xd2,0x01,0x05,0x02,0x00]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
// GFX12-ERR: error: instruction not supported on this GPU

v_cvt_pk_bf16_f32 v5, v255, v255
// GFX906-ERR: error: instruction not supported on this GPU
// GFX940-ERR: error: instruction not supported on this GPU
// GFX950: v_cvt_pk_bf16_f32 v5, v255, v255 ; encoding: [0x05,0x00,0x68,0xd2,0xff,0xff,0x03,0x00]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
// GFX12-ERR: error: instruction not supported on this GPU

v_cvt_pk_bf16_f32 v5, v1, s2
// GFX906-ERR: error: instruction not supported on this GPU
// GFX940-ERR: error: instruction not supported on this GPU
// GFX950: v_cvt_pk_bf16_f32 v5, v1, s2 ; encoding: [0x05,0x00,0x68,0xd2,0x01,0x05,0x00,0x00]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
// GFX12-ERR: error: instruction not supported on this GPU

v_cvt_pk_bf16_f32 v5, m0, 0.5
// GFX906-ERR: error: instruction not supported on this GPU
// GFX940-ERR: error: instruction not supported on this GPU
// GFX950: v_cvt_pk_bf16_f32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x68,0xd2,0x7c,0xe0,0x01,0x00]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
// GFX12-ERR: error: instruction not supported on this GPU

v_cvt_pk_bf16_f32 v5, -1, exec_hi
// GFX906-ERR: error: instruction not supported on this GPU
// GFX940-ERR: error: instruction not supported on this GPU
// GFX950: v_cvt_pk_bf16_f32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x68,0xd2,0xc1,0xfe,0x00,0x00]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
// GFX12-ERR: error: instruction not supported on this GPU

v_cvt_pk_bf16_f32 v5, 0.5, m0 mul:2
// GFX906-ERR: error: instruction not supported on this GPU
// GFX940-ERR: error: instruction not supported on this GPU
// GFX950: v_cvt_pk_bf16_f32 v5, 0.5, m0 mul:2 ; encoding: [0x05,0x00,0x68,0xd2,0xf0,0xf8,0x00,0x08]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
// GFX12-ERR: error: instruction not supported on this GPU

v_bitop3_b32 v5, v1, v2, s3
// GFX906-ERR: error: instruction not supported on this GPU
// GFX940-ERR: error: instruction not supported on this GPU
// GFX950: v_bitop3_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x34,0xd2,0x01,0x05,0x0e,0x00]
// GFX12-ERR: error: instruction not supported on this GPU

v_bitop3_b32 v5, v1, v2, s3 bitop3:161
// GFX906-ERR: error: instruction not supported on this GPU
// GFX940-ERR: error: instruction not supported on this GPU
// GFX950: v_bitop3_b32 v5, v1, v2, s3 bitop3:0xa1 ; encoding: [0x05,0x04,0x34,0xd2,0x01,0x05,0x0e,0x30]
// GFX12-ERR: error: instruction not supported on this GPU

v_bitop3_b32 v5, m0, 0.5, m0 bitop3:5
// GFX906-ERR: error: instruction not supported on this GPU
// GFX940-ERR: error: instruction not supported on this GPU
// GFX950: v_bitop3_b32 v5, m0, 0.5, m0 bitop3:5 ; encoding: [0x05,0x00,0x34,0xd2,0x7c,0xe0,0xf1,0xa1]
// GFX12-ERR: error: instruction not supported on this GPU

v_bitop3_b32 v5, 0.5, m0, 0.5 bitop3:101
// GFX906-ERR: error: instruction not supported on this GPU
// GFX940-ERR: error: instruction not supported on this GPU
// GFX950: v_bitop3_b32 v5, 0.5, m0, 0.5 bitop3:0x65 ; encoding: [0x05,0x04,0x34,0xd2,0xf0,0xf8,0xc0,0xab]
// GFX12-ERR: error: instruction not supported on this GPU

v_bitop3_b16 v5, v1, v2, s3
// GFX906-ERR: error: instruction not supported on this GPU
// GFX940-ERR: error: instruction not supported on this GPU
// GFX950: v_bitop3_b16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x33,0xd2,0x01,0x05,0x0e,0x00]
// GFX12-ERR: error: instruction not supported on this GPU

v_bitop3_b16 v5, v1, v2, s3 bitop3:161
// GFX906-ERR: error: instruction not supported on this GPU
// GFX940-ERR: error: instruction not supported on this GPU
// GFX950: v_bitop3_b16 v5, v1, v2, s3 bitop3:0xa1 ; encoding: [0x05,0x04,0x33,0xd2,0x01,0x05,0x0e,0x30]
// GFX12-ERR: error: instruction not supported on this GPU
18 changes: 18 additions & 0 deletions llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,21 @@

# GFX950: v_cvt_pk_bf16_f32 v5, 0.5, m0 mul:2 ; encoding: [0x05,0x00,0x68,0xd2,0xf0,0xf8,0x00,0x08]
0x05,0x00,0x68,0xd2,0xf0,0xf8,0x00,0x08

# GFX950: v_bitop3_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x34,0xd2,0x01,0x05,0x0e,0x00]
0x05,0x00,0x34,0xd2,0x01,0x05,0x0e,0x00

# GFX950: v_bitop3_b32 v5, v1, v2, s3 bitop3:0xa1 ; encoding: [0x05,0x04,0x34,0xd2,0x01,0x05,0x0e,0x30]
0x05,0x04,0x34,0xd2,0x01,0x05,0x0e,0x30

# GFX950: v_bitop3_b32 v5, m0, 0.5, m0 bitop3:5 ; encoding: [0x05,0x00,0x34,0xd2,0x7c,0xe0,0xf1,0xa1]
0x05,0x00,0x34,0xd2,0x7c,0xe0,0xf1,0xa1

# GFX950: v_bitop3_b32 v5, 0.5, m0, 0.5 bitop3:0x65 ; encoding: [0x05,0x04,0x34,0xd2,0xf0,0xf8,0xc0,0xab]
0x05,0x04,0x34,0xd2,0xf0,0xf8,0xc0,0xab

# GFX950: v_bitop3_b16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x33,0xd2,0x01,0x05,0x0e,0x00]
0x05,0x00,0x33,0xd2,0x01,0x05,0x0e,0x00

# GFX950: v_bitop3_b16 v5, v1, v2, s3 bitop3:0xa1 ; encoding: [0x05,0x04,0x33,0xd2,0x01,0x05,0x0e,0x30]
0x05,0x04,0x33,0xd2,0x01,0x05,0x0e,0x30

0 comments on commit 174f311

Please sign in to comment.