Skip to content

Commit

Permalink
AMDGPU: Handle vcmpx+permalane gfx950 hazard
Browse files Browse the repository at this point in the history
Confusingly, this is a different hazard to the one on gfx10
with a subtarget feature.
  • Loading branch information
arsenm committed Nov 23, 2024
1 parent 639d7c6 commit d3de922
Show file tree
Hide file tree
Showing 3 changed files with 175 additions and 4 deletions.
34 changes: 30 additions & 4 deletions llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,11 @@ static bool isPermlane(const MachineInstr &MI) {
Opcode == AMDGPU::V_PERMLANE64_B32 ||
Opcode == AMDGPU::V_PERMLANEX16_B32_e64 ||
Opcode == AMDGPU::V_PERMLANE16_VAR_B32_e64 ||
Opcode == AMDGPU::V_PERMLANEX16_VAR_B32_e64;
Opcode == AMDGPU::V_PERMLANEX16_VAR_B32_e64 ||
Opcode == AMDGPU::V_PERMLANE16_SWAP_B32_e32 ||
Opcode == AMDGPU::V_PERMLANE16_SWAP_B32_e64 ||
Opcode == AMDGPU::V_PERMLANE32_SWAP_B32_e32 ||
Opcode == AMDGPU::V_PERMLANE32_SWAP_B32_e64;
}

static bool isLdsDma(const MachineInstr &MI) {
Expand Down Expand Up @@ -395,6 +399,9 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
SIInstrInfo::isDS(*MI))
return std::max(WaitStates, checkMAILdStHazards(MI));

if (ST.hasGFX950Insts() && isPermlane(*MI))
return std::max(WaitStates, checkPermlaneHazards(MI));

return WaitStates;
}

Expand Down Expand Up @@ -1200,16 +1207,21 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
fixRequiredExportPriority(MI);
}

static bool isVCmpXWritesExec(const SIInstrInfo &TII, const SIRegisterInfo &TRI,
const MachineInstr &MI) {
return (TII.isVOPC(MI) ||
(MI.isCompare() && (TII.isVOP3(MI) || TII.isSDWA(MI)))) &&
MI.modifiesRegister(AMDGPU::EXEC, &TRI);
}

bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
if (!ST.hasVcmpxPermlaneHazard() || !isPermlane(*MI))
return false;

const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
auto IsHazardFn = [TII, TRI](const MachineInstr &MI) {
return (TII->isVOPC(MI) ||
((TII->isVOP3(MI) || TII->isSDWA(MI)) && MI.isCompare())) &&
MI.modifiesRegister(AMDGPU::EXEC, TRI);
return isVCmpXWritesExec(*TII, *TRI, MI);
};

auto IsExpiredFn = [](const MachineInstr &MI, int) {
Expand Down Expand Up @@ -2529,6 +2541,20 @@ int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
return WaitStatesNeeded;
}

int GCNHazardRecognizer::checkPermlaneHazards(MachineInstr *MI) {
assert(!ST.hasVcmpxPermlaneHazard() &&
"this is a different vcmpx+permlane hazard");
const SIRegisterInfo *TRI = ST.getRegisterInfo();
const SIInstrInfo *TII = ST.getInstrInfo();

auto IsVCmpXWritesExecFn = [TII, TRI](const MachineInstr &MI) {
return isVCmpXWritesExec(*TII, *TRI, MI);
};

const int NumWaitStates = 4;
return NumWaitStates - getWaitStatesSince(IsVCmpXWritesExecFn, NumWaitStates);
}

static int GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates(int NumPasses) {
// 2 pass -> 4
// 4 pass -> 6
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
int checkMFMAPadding(MachineInstr *MI);
int checkMAIVALUHazards(MachineInstr *MI);
int checkMAILdStHazards(MachineInstr *MI);
int checkPermlaneHazards(MachineInstr *MI);

public:
GCNHazardRecognizer(const MachineFunction &MF);
Expand Down
144 changes: 144 additions & 0 deletions llvm/test/CodeGen/AMDGPU/hazards-gfx950.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
# RUN: llc -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs -run-pass=post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN %s

---
# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane16_swap_vop1
# GCN: V_CMPX_EQ_I32_e32
# GCN-NEXT: S_NOP 3
# GCN-NEXT: V_PERMLANE
name: vcmpx_vopc_write_exec_permlane16_swap_vop1
body: |
bb.0:
liveins: $vgpr0, $vgpr1
V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
...

---
# GCN-LABEL: name: vcmpx_vop3_write_exec_permlane16_swap_vop1
# GCN: V_CMPX_EQ_I32_e64
# GCN-NEXT: S_NOP 3
# GCN-NEXT: V_PERMLANE
name: vcmpx_vop3_write_exec_permlane16_swap_vop1
body: |
bb.0:
liveins: $vgpr0, $vgpr1
$exec = V_CMPX_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec
renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
...

---
# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane16_swap_vop3
# GCN: V_CMPX_EQ_I32_e32
# GCN-NEXT: S_NOP 3
# GCN-NEXT: V_PERMLANE
name: vcmpx_vopc_write_exec_permlane16_swap_vop3
body: |
bb.0:
liveins: $vgpr0, $vgpr1
V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec
...

---
# GCN-LABEL: name: vcmpx_vop3_write_exec_permlane16_swap_vop3
# GCN: V_CMPX_EQ_I32_e64
# GCN-NEXT: S_NOP 3
# GCN-NEXT: V_PERMLANE
name: vcmpx_vop3_write_exec_permlane16_swap_vop3
body: |
bb.0:
liveins: $vgpr0, $vgpr1
$exec = V_CMPX_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec
renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec
...

---
# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane32_swap_vop1
# GCN: V_CMPX_EQ_I32_e32
# GCN-NEXT: S_NOP 3
# GCN-NEXT: V_PERMLANE
name: vcmpx_vopc_write_exec_permlane32_swap_vop1
body: |
bb.0:
liveins: $vgpr0, $vgpr1
V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
...

---
# GCN-LABEL: name: vcmpx_vop3_write_exec_permlane32_swap_vop1
# GCN: V_CMPX_EQ_I32_e64
# GCN-NEXT: S_NOP 3
# GCN-NEXT: V_PERMLANE
name: vcmpx_vop3_write_exec_permlane32_swap_vop1
body: |
bb.0:
liveins: $vgpr0, $vgpr1
$exec = V_CMPX_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec
renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
...

---
# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane32_swap_vop3
# GCN: V_CMPX_EQ_I32_e32
# GCN-NEXT: S_NOP 3
# GCN-NEXT: V_PERMLANE
name: vcmpx_vopc_write_exec_permlane32_swap_vop3
body: |
bb.0:
liveins: $vgpr0, $vgpr1
V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec
...

---
# GCN-LABEL: name: vcmpx_vop3_write_exec_permlane32_swap_vop3
# GCN: V_CMPX_EQ_I32_e64
# GCN-NEXT: S_NOP 3
# GCN-NEXT: V_PERMLANE
name: vcmpx_vop3_write_exec_permlane32_swap_vop3
body: |
bb.0:
liveins: $vgpr0, $vgpr1
$exec = V_CMPX_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec
renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec
...

---
# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane16_swap_vop1__nowait
# GCN: V_CMPX_EQ_I32_e32
# GCN-NEXT: V_MOV_B32
# GCN-NEXT: V_MOV_B32
# GCN-NEXT: V_MOV_B32
# GCN-NEXT: V_MOV_B32
# GCN-NEXT: V_PERMLANE
name: vcmpx_vopc_write_exec_permlane16_swap_vop1__nowait
body: |
bb.0:
liveins: $vgpr0, $vgpr1
V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
$vgpr2 = V_MOV_B32_e32 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
$vgpr4 = V_MOV_B32_e32 0, implicit $exec
$vgpr5 = V_MOV_B32_e32 0, implicit $exec
renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
...

---
# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane16_swap_vop1__wait1
# GCN: V_CMPX_EQ_I32_e32
# GCN-NEXT: V_MOV_B32
# GCN-NEXT: V_MOV_B32
# GCN-NEXT: V_MOV_B32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: V_PERMLANE
name: vcmpx_vopc_write_exec_permlane16_swap_vop1__wait1
body: |
bb.0:
liveins: $vgpr0, $vgpr1
V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
$vgpr2 = V_MOV_B32_e32 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
$vgpr4 = V_MOV_B32_e32 0, implicit $exec
renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
...

0 comments on commit d3de922

Please sign in to comment.