Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DCE] Ensure reserved simplifiable registers are live across call boundaries. #244

Merged
merged 2 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 25 additions & 4 deletions llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,24 @@ bool DeadMachineInstructionElimImpl::runImpl(MachineFunction &MF,
return AnyChanges;
}

SmallVector<MCPhysReg, 16>
getSimplifiableReservedRegs(const MachineRegisterInfo *MRI) {
BitVector ReservedRegs = MRI->getReservedRegs();
SmallVector<MCPhysReg, 16> SimplifiableReservedRegs;
for (MCPhysReg PhysReg : ReservedRegs.set_bits()) {
if (MRI->canSimplifyPhysReg(PhysReg)) {
SimplifiableReservedRegs.push_back(PhysReg);
}
}
return SimplifiableReservedRegs;
}

bool DeadMachineInstructionElimImpl::eliminateDeadMI(MachineFunction &MF) {
bool AnyChanges = false;

SmallVector<MCPhysReg, 16> SimplifiableReservedRegs =
getSimplifiableReservedRegs(MRI);

// Loop over all instructions in all blocks, from bottom to top, so that it's
// more likely that chains of dependent but ultimately dead instructions will
// be cleaned up.
Expand All @@ -165,10 +180,8 @@ bool DeadMachineInstructionElimImpl::eliminateDeadMI(MachineFunction &MF) {

// Reserved registers are considered always live, so consider them as
// live-outs for MBB. Inside MBB, dead assignments can still be detected.
for (MCPhysReg PhysReg : MRI->getReservedRegs().set_bits()) {
if (MRI->canSimplifyPhysReg(PhysReg)) {
LivePhysRegs.addReg(PhysReg);
}
for (MCPhysReg PhysReg : SimplifiableReservedRegs) {
LivePhysRegs.addReg(PhysReg);
}

// Now scan the instructions and delete dead ones, tracking physreg
Expand All @@ -187,6 +200,14 @@ bool DeadMachineInstructionElimImpl::eliminateDeadMI(MachineFunction &MF) {
}

LivePhysRegs.stepBackward(MI);

// If the instruction is a call, conservatively assume that it reads
// reserved registers.
if (MI.isCall()) {
for (MCPhysReg PhysReg : SimplifiableReservedRegs) {
LivePhysRegs.addReg(PhysReg);
}
}
}
}

Expand Down
120 changes: 106 additions & 14 deletions llvm/test/CodeGen/AIE/aie2/GlobalISel/dead-mi-elim.mir
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ body: |
; CHECK-NEXT: [[VLDA_UPS_S32_D8_ag_idx_imm1:%[0-9]+]]:acc1024 = VLDA_UPS_S32_D8_ag_idx_imm [[COPY2]], [[COPY3]], 0, implicit-def $srups_of, implicit $crsat, implicit $crupssign
; CHECK-NEXT: $crsat = COPY [[COPY1]]
; CHECK-NEXT: $crupssign = COPY [[COPY]]
; CHECK-NEXT: [[VLDA_UPS_S32_D8_ag_idx_imm2:%[0-9]+]]:acc1024 = VLDA_UPS_S32_D8_ag_idx_imm [[COPY2]], [[COPY3]], 0, implicit-def $srups_of, implicit $crsat, implicit $crupssign
; CHECK-NEXT: [[VLDA_UPS_S32_D8_ag_idx_imm:%[0-9]+]]:acc1024 = VLDA_UPS_S32_D8_ag_idx_imm [[COPY2]], [[COPY3]], 0, implicit-def $srups_of, implicit $crsat, implicit $crupssign
; CHECK-NEXT: $crupssign = MOV_scalar_imm10_pseudo 2
%0:er = MOV_RLC_imm10_pseudo 0
%1:er = COPY $r0
Expand Down Expand Up @@ -129,7 +129,6 @@ body: |
PseudoJ_jump_imm %bb.1

bb.1:
; predecessors: %bb.0
%8:ercr = COPY $crsat
$crsat = MOV_scalar_imm10_pseudo 3
$crsat = COPY %8:ercr
Expand Down Expand Up @@ -224,7 +223,7 @@ body: |
; CHECK-NEXT: [[VLDA_UPS_S32_D8_ag_idx_imm8:%[0-9]+]]:acc1024 = VLDA_UPS_S32_D8_ag_idx_imm [[COPY2]], [[COPY3]], 0, implicit-def $srups_of, implicit $crsat, implicit $crupssign
; CHECK-NEXT: $crsat = COPY [[COPY1]]
; CHECK-NEXT: $crupssign = COPY [[COPY]]
; CHECK-NEXT: [[VLDA_UPS_S32_D8_ag_idx_imm9:%[0-9]+]]:acc1024 = VLDA_UPS_S32_D8_ag_idx_imm [[COPY2]], [[COPY3]], 0, implicit-def $srups_of, implicit $crsat, implicit $crupssign
; CHECK-NEXT: [[VLDA_UPS_S32_D8_ag_idx_imm7:%[0-9]+]]:acc1024 = VLDA_UPS_S32_D8_ag_idx_imm [[COPY2]], [[COPY3]], 0, implicit-def $srups_of, implicit $crsat, implicit $crupssign
; CHECK-NEXT: $crupssign = MOV_scalar_imm10_pseudo 9
; CHECK-NEXT: PseudoRET implicit $lr
bb.0.entry:
Expand All @@ -248,7 +247,6 @@ body: |
PseudoJ_jump_imm %bb.1

bb.1:
; predecessors: %bb.0, %bb.1
successors: %bb.1(0x40000000), %bb.2(0x40000000); %bb.1(50.00%), %bb.2(50.00%)

%8:ercr = COPY $crsat
Expand All @@ -263,7 +261,6 @@ body: |
PseudoJ_jump_imm %bb.2

bb.2:
; predecessors: %bb.1
successors: %bb.4(0x80000000); %bb.4(100.00%)

%11:ercr = COPY $crsat
Expand All @@ -279,7 +276,6 @@ body: |
PseudoJ_jump_imm %bb.4

bb.3:
; predecessors: %bb.0
successors: %bb.4(0x80000000); %bb.4(100.00%)

$crsat = COPY %2:ercr
Expand All @@ -292,7 +288,6 @@ body: |
PseudoJ_jump_imm %bb.4

bb.4:
; predecessors: %bb.2, %bb.3

$crsat = COPY %2:ercr
$crupssign = COPY %1:er
Expand Down Expand Up @@ -373,7 +368,7 @@ body: |
; CHECK-NEXT: $crupssign = COPY [[COPY]]
; CHECK-NEXT: [[VLDA_UPS_S32_D8_ag_idx_imm7:%[0-9]+]]:acc1024 = VLDA_UPS_S32_D8_ag_idx_imm [[COPY1]], [[COPY2]], 0, implicit-def $srups_of, implicit $crsat, implicit $crupssign
; CHECK-NEXT: $crupssign = COPY [[COPY]]
; CHECK-NEXT: [[VLDA_UPS_S32_D8_ag_idx_imm8:%[0-9]+]]:acc1024 = VLDA_UPS_S32_D8_ag_idx_imm [[COPY1]], [[COPY2]], 0, implicit-def $srups_of, implicit $crsat, implicit $crupssign
; CHECK-NEXT: [[VLDA_UPS_S32_D8_ag_idx_imm6:%[0-9]+]]:acc1024 = VLDA_UPS_S32_D8_ag_idx_imm [[COPY1]], [[COPY2]], 0, implicit-def $srups_of, implicit $crsat, implicit $crupssign
; CHECK-NEXT: $crupssign = MOV_scalar_imm10_pseudo 5
; CHECK-NEXT: PseudoRET implicit $lr
bb.0.entry:
Expand All @@ -389,7 +384,6 @@ body: |
PseudoJ_jump_imm %bb.1

bb.1:
; predecessors: %bb.0, %bb.3
successors: %bb.2(0x80000000); %bb.2(100.00%)

$crupssign = COPY %1:er
Expand All @@ -400,7 +394,6 @@ body: |
PseudoJ_jump_imm %bb.2

bb.2:
; predecessors: %bb.1, %bb.2
successors: %bb.2(0x40000000), %bb.3(0x40000000); %bb.2(50.00%), %bb.3(50.00%)
liveins: $r0
$crupssign = COPY %1:er
Expand All @@ -413,7 +406,6 @@ body: |
PseudoJ_jump_imm %bb.3

bb.3:
; predecessors: %bb.2
successors: %bb.1(0x40000000), %bb.4(0x40000000); %bb.1(50.00%), %bb.4(50.00%)

$crupssign = COPY %1:er
Expand All @@ -425,7 +417,6 @@ body: |
PseudoJ_jump_imm %bb.4

bb.4:
; predecessors: %bb.3

$crupssign = COPY %1:er
%12:acc1024 = VLDA_UPS_S32_D8_ag_idx_imm %2:mss, %3:ep, 0, implicit-def $srups_of, implicit $crsat, implicit $crupssign
Expand Down Expand Up @@ -486,14 +477,12 @@ body: |
PseudoJ_jump_imm %bb.1

bb.1:
; predecessors: %bb.0
successors: %bb.2(0x80000000); %bb.2(100.00%)

%5:er = MOV_RLC_imm10_pseudo 23
PseudoJ_jump_imm %bb.2

bb.2:
; predecessors: %bb.1
liveins: $r2
%6:er = COPY $r2
$crupssign = COPY %6:er
Expand Down Expand Up @@ -547,3 +536,106 @@ body: |
$crfpmask = COPY %3:ercr
%10:acc512 = VNEGSUB_F %0:acc512, %1:acc512, %2:er, implicit-def $srfpflags, implicit $crfpmask
...

# Test 7 - Test for liveness of control registers across basic blocks with no use in bb.1.
---
name: live_control_regs_no_use
alignment: 16
legalized: true
regBankSelected: true
selected: true
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: live_control_regs_no_use
; CHECK: bb.0.entry:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $r1, $p0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[MOV_RLC_imm10_pseudo:%[0-9]+]]:er = MOV_RLC_imm10_pseudo 0
; CHECK-NEXT: [[COPY:%[0-9]+]]:er = COPY $r1
; CHECK-NEXT: $crsat = COPY [[MOV_RLC_imm10_pseudo]]
; CHECK-NEXT: $crrnd = COPY [[COPY]]
; CHECK-NEXT: PseudoJ_jump_imm %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: PseudoRET implicit $lr
bb.0.entry:
successors: %bb.1(0x80000000); %bb.1(100.00%)
liveins: $r1, $p0
%0:er = MOV_RLC_imm10_pseudo 0
%1:er = COPY $r1
$crsat = COPY %0:er
$crrnd = COPY %1:er
PseudoJ_jump_imm %bb.1
bb.1:
%5:er = MOV_RLC_imm10_pseudo 23
PseudoRET implicit $lr
...

# Test 8 - Test for liveness of control registers across basic blocks with use in bb.1.
---
name: live_control_regs
alignment: 16
legalized: true
regBankSelected: true
selected: true
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: live_control_regs
; CHECK: bb.0.entry:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $r1, $p0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[MOV_RLC_imm10_pseudo:%[0-9]+]]:er = MOV_RLC_imm10_pseudo 0
; CHECK-NEXT: [[COPY:%[0-9]+]]:er = COPY $r1
; CHECK-NEXT: $crsat = COPY [[MOV_RLC_imm10_pseudo]]
; CHECK-NEXT: $crupssign = COPY [[COPY]]
; CHECK-NEXT: PseudoJ_jump_imm %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: liveins: $p0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY1:%[0-9]+]]:mss = COPY [[MOV_RLC_imm10_pseudo]]
; CHECK-NEXT: [[COPY2:%[0-9]+]]:ep = COPY $p0
; CHECK-NEXT: [[VLDA_UPS_S32_D8_ag_idx_imm:%[0-9]+]]:acc1024 = VLDA_UPS_S32_D8_ag_idx_imm [[COPY1]], [[COPY2]], 0, implicit-def $srups_of, implicit $crsat, implicit $crupssign
; CHECK-NEXT: $crupssign = MOV_scalar_imm10_pseudo 0
bb.0.entry:
successors: %bb.1(0x80000000); %bb.1(100.00%)
liveins: $r1, $p0
%0:er = MOV_RLC_imm10_pseudo 0
%1:er = COPY $r1
$crsat = COPY %0:er
$crupssign = COPY %1:er
PseudoJ_jump_imm %bb.1

bb.1:
liveins: $p0
%2:mss = COPY %0:er
%3:ep = COPY $p0
%4:acc1024 = VLDA_UPS_S32_D8_ag_idx_imm %2:mss, %3:ep, 0, implicit-def $srups_of, implicit $crsat, implicit $crupssign
$crupssign = MOV_scalar_imm10_pseudo 0
...

# Test 9 - Test for liveness of control registers across call boundaries.
---
name: live_control_regs_call
alignment: 16
legalized: true
regBankSelected: true
selected: true
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $r1, $p0
; CHECK-LABEL: name: live_control_regs_call
; CHECK: liveins: $r1, $p0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[MOV_RLC_imm10_pseudo:%[0-9]+]]:er = MOV_RLC_imm10_pseudo 0
; CHECK-NEXT: $crsat = COPY [[MOV_RLC_imm10_pseudo]]
; CHECK-NEXT: $crrnd = COPY [[MOV_RLC_imm10_pseudo]]
; CHECK-NEXT: PseudoJL 0, csr_aie2
%0:er = MOV_RLC_imm10_pseudo 0
$crsat = COPY %0:er
$crrnd = COPY %0:er
PseudoJL 0, csr_aie2
...
64 changes: 64 additions & 0 deletions llvm/test/CodeGen/AIE/aie2/live-reserved-regs-call.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; This file is licensed under the Apache License v2.0 with LLVM Exceptions.
; See https://llvm.org/LICENSE.txt for license information.
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
;
; (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
;
; RUN: llc -mtriple=aie2 -O2 --issue-limit=1 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s

; Test to check for liveness of simplifiable reserved regs (i.e. crsat and crrnd in this test)
; across call boundaries.

define void @caller1() {
; CHECK-LABEL: caller1:
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: nopa ; nopb ; jl #callee1; nops
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: paddb [sp], #32 // Delay Slot 4
; CHECK-NEXT: st lr, [sp, #-32] // 4-byte Folded Spill Delay Slot 3
; CHECK-NEXT: mov crSat, #1 // Delay Slot 2
; CHECK-NEXT: mov crRnd, #12 // Delay Slot 1
; CHECK-NEXT: lda lr, [sp, #-32] // 4-byte Folded Reload
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: ret lr
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 3
; CHECK-NEXT: nop // Delay Slot 2
; CHECK-NEXT: paddb [sp], #-32 // Delay Slot 1
entry:
tail call void @llvm.aie2.set.ctrl.reg(i32 9, i32 1)
tail call void @llvm.aie2.set.ctrl.reg(i32 6, i32 12)
tail call void @callee1()
ret void
}

define void @callee1() {
; CHECK-LABEL: callee1:
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: mova r0, #1; nopb ; nopxm ; nops
; CHECK-NEXT: ret lr
; CHECK-NEXT: mov s0, r0 // Delay Slot 5
; CHECK-NEXT: vsrs.d8.s32 wh0, cm0, s0 // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 3
; CHECK-NEXT: nop // Delay Slot 2
; CHECK-NEXT: nop // Delay Slot 1
entry:
%0 = tail call noundef <16 x i64> @llvm.aie2.v32acc32()
%1 = tail call noundef <32 x i8> @llvm.aie2.I256.v32.acc32.srs(<16 x i64> %0, i32 1, i32 0)
ret void
}

declare <32 x i8> @llvm.aie2.I256.v32.acc32.srs(<16 x i64>, i32, i32)

declare <16 x i64> @llvm.aie2.v32acc32()

declare void @llvm.aie2.set.ctrl.reg(i32, i32)
Loading