Skip to content

Commit

Permalink
AMDGPU: Handle gfx950 change in mfma_f64_16x16x4 + valu hazard (#117262)
Browse files Browse the repository at this point in the history
Increase from 11 wait states to 19
  • Loading branch information
arsenm authored Nov 23, 2024
1 parent 33c2b20 commit b078b88
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 10 deletions.
10 changes: 7 additions & 3 deletions llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2603,6 +2603,7 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
const int DMFMA16x16WriteVgprMemExpReadWaitStates = 18;
const int DMFMA4x4WriteVgprVALUReadWaitStates = 6;
const int DMFMA16x16WriteVgprVALUReadWaitStates = 11;
const int GFX950_DMFMA16x16WriteVgprVALUReadWaitStates = 19;
const int DotWriteSameDotReadSrcAB = 3;
const int DotWriteDifferentVALURead = 3;
const int DMFMABetweenVALUWriteVMEMRead = 2;
Expand Down Expand Up @@ -2663,9 +2664,12 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
break;
case 8:
case 16:
NeedWaitStates = IsMemOrExport
? DMFMA16x16WriteVgprMemExpReadWaitStates
: DMFMA16x16WriteVgprVALUReadWaitStates;
NeedWaitStates =
IsMemOrExport
? DMFMA16x16WriteVgprMemExpReadWaitStates
: (ST.hasGFX950Insts()
? GFX950_DMFMA16x16WriteVgprVALUReadWaitStates
: DMFMA16x16WriteVgprVALUReadWaitStates);
break;
default:
llvm_unreachable("unexpected dgemm");
Expand Down
28 changes: 21 additions & 7 deletions llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,GFX940 %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,GFX950 %s

# GCN-LABEL: name: valu_write_vgpr_sgemm_mfma_read
# GCN: V_MOV_B32
Expand Down Expand Up @@ -803,8 +804,12 @@ body: |
...
# GCN-LABEL: name: dmfma16x16_write_vgpr_valu_read
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 2
# GFX940-NEXT: S_NOP 7
# GFX940-NEXT: S_NOP 2

# GFX950-NEXT: S_NOP 7
# GFX950-NEXT: S_NOP 7
# GFX950-NEXT: S_NOP 2
# GCN-NEXT: V_MOV_B32
name: dmfma16x16_write_vgpr_valu_read
body: |
Expand Down Expand Up @@ -867,8 +872,13 @@ body: |
...
# GCN-LABEL: name: dmfma16x16_write_vgpr_dot_read
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 2
# GFX940-NEXT: S_NOP 7
# GFX940-NEXT: S_NOP 2

# GFX950-NEXT: S_NOP 7
# GFX950-NEXT: S_NOP 7
# GFX950-NEXT: S_NOP 2

# GCN-NEXT: V_DOT
name: dmfma16x16_write_vgpr_dot_read
body: |
Expand Down Expand Up @@ -1505,8 +1515,12 @@ body: |
...
# GCN-LABEL: name: dmfma16x16_write_agpr_valu_read
# GCN: V_MFMA
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 2
# GFX940-NEXT: S_NOP 7
# GFX940-NEXT: S_NOP 2

# GFX950-NEXT: S_NOP 7
# GFX950-NEXT: S_NOP 7
# GFX950-NEXT: S_NOP 2
# GCN-NEXT: V_ACCVGPR_READ_B32_e64
name: dmfma16x16_write_agpr_valu_read
body: |
Expand Down

0 comments on commit b078b88

Please sign in to comment.