Skip to content

Commit

Permalink
[AIEX] Extend rescheduling when the new SU is a multi-slot
Browse files Browse the repository at this point in the history
  • Loading branch information
krishnamtibrewala committed Oct 28, 2024
1 parent 71c9012 commit 23f63de
Show file tree
Hide file tree
Showing 27 changed files with 246 additions and 285 deletions.
15 changes: 14 additions & 1 deletion llvm/lib/Target/AIE/AIEHazardRecognizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -467,13 +467,26 @@ ScheduleHazardRecognizer::HazardType AIEHazardRecognizer::getHazardType(

ConflictTypeBits AIEHazardRecognizer::checkConflict(MachineInstr &MI,
int DeltaCycles) {
assert(!TII->getFormatInterface()->getAlternateInstsOpcode(MI.getOpcode()));
return checkConflict(Scoreboard, MI, DeltaCycles);
}

ConflictTypeBits AIEHazardRecognizer::checkConflict(
const ResourceScoreboard<FuncUnitWrapper> &Scoreboard, MachineInstr &MI,
int DeltaCycles) const {
const MCInstrDesc &Desc = MI.getDesc();
assert(!TII->getFormatInterface()->getAlternateInstsOpcode(MI.getOpcode()));
return checkConflict(Scoreboard, MI, MI.getDesc(), DeltaCycles);
}

ConflictTypeBits AIEHazardRecognizer::checkConflict(MachineInstr &MI,
const MCInstrDesc &Desc,
int DeltaCycles) {
return checkConflict(Scoreboard, MI, Desc, DeltaCycles);
}

ConflictTypeBits AIEHazardRecognizer::checkConflict(
const ResourceScoreboard<FuncUnitWrapper> &Scoreboard, MachineInstr &MI,
const MCInstrDesc &Desc, int DeltaCycles) const {
const unsigned SchedClass =
TII->getSchedClass(Desc, MI.operands(), MI.getMF()->getRegInfo());
const MemoryBankBits MemoryBanks = getMemoryBanks(&MI);
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/AIE/AIEHazardRecognizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,13 @@ class AIEHazardRecognizer : public ScheduleHazardRecognizer {
iterator_range<const MachineOperand *> MIOperands,
const MachineRegisterInfo &MRI, int DeltaCycles);

ConflictTypeBits
checkConflict(const ResourceScoreboard<FuncUnitWrapper> &Scoreboard,
MachineInstr &MI, const MCInstrDesc &Desc,
int DeltaCycles) const;
ConflictTypeBits checkConflict(MachineInstr &MI, const MCInstrDesc &Desc,
int DeltaCycles);

ConflictTypeBits
checkConflict(const ResourceScoreboard<FuncUnitWrapper> &Scoreboard,
MachineInstr &MI, int DeltaCycles) const;
Expand Down
168 changes: 101 additions & 67 deletions llvm/lib/Target/AIE/AIEMachineScheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -507,16 +507,12 @@ bool AIEPostRASchedStrategy::canShiftSlot(SUnit &SU, SchedBoundary &Zone,

const AIEBaseMCFormats &Formats = *getTII(*Zone.DAG)->getFormatInterface();
AIEHazardRecognizer &HR = *getAIEHazardRecognizer(Zone);
MachineInstr *NewMI = SU.getInstr();
std::vector<MachineInstr *> ScheduledMultiSlotInsts;
bool CanShiftSlot = false;

if (!(!Formats.getAlternateInstsOpcode(SU.getInstr()->getOpcode()) &&
(HR.checkConflict(*SU.getInstr(), DeltaCycle) &
static_cast<uint32_t>(AIEHazardRecognizer::ConflictType::Format)))) {
// We are only interested in single slot instructions and instructions that
// have only format hazard.
// TODO : Extend this to SUs that are multi-slot and have only format hazard
return false;
}
// Find and cache if there are any multi-slot instructions scheduled in the
// same delta cycle
for (MachineInstr &MI : *Zone.DAG) {
SUnit *ZoneSU = Zone.DAG->getSUnit(&MI);
if (!ZoneSU)
Expand All @@ -529,76 +525,114 @@ bool AIEPostRASchedStrategy::canShiftSlot(SUnit &SU, SchedBoundary &Zone,
static_cast<unsigned int>(CurrCycle - DeltaCycle))
continue;

// Check for a MultiSlot instruction scheduled in the same DeltaCycle, we
// focus on multi-slot because they can be scheduled in different slots
// Check for a MultiSlot instruction scheduled in the same DeltaCycle,
// we focus on multi-slot because they can be scheduled in different
// slots
auto AltOpcodes = Formats.getAlternateInstsOpcode(MI.getOpcode());
if (!AltOpcodes)
continue;
ScheduledMultiSlotInsts.push_back(&MI);
}

// Check if the scheduled multi-slot instruction has a slot conflict
// with the new instruction, if so we might have the possiblity to shift
// the multi-slot and schedule the new instruction.
if (!checkSlotConflict(HR.getSelectedAltDescs().getOpcode(&MI),
SU.getInstr()->getOpcode(), Formats))
continue;
// If there are no multi-slot instructions scheduled in the same DeltaCycle we
// cannot shift any instuction to a different slot.
if (ScheduledMultiSlotInsts.empty())
return false;

// Release the multi-slot instruction from the scoreboard to check if any
// other alternate opcode in presence of the new instruction will not create
// a hazard.
HR.releaseFromScoreboard(*HR.getSelectedAltDescs().getDesc(&MI),
HR.getMemoryBanks(&MI), MI.operands(),
MI.getMF()->getRegInfo(),
CurrCycle - ZoneSU->BotReadyCycle);

MachineInstr *NewMI = SU.getInstr();
// Check if the new instuction can be scheduled after unscheduling
// the conflicting multi-slot instruction.
if (HR.getHazardType(NewMI->getDesc(), HR.getMemoryBanks(NewMI),
NewMI->operands(), NewMI->getMF()->getRegInfo(),
DeltaCycle) !=
ScheduleHazardRecognizer::HazardType::NoHazard) {
// If the new instruction cannot be scheduled after unscheduling the
// mulit-slot revert back the state of scoreboard to original state and
// continue.
HR.emitInScoreboard(*HR.getSelectedAltDescs().getDesc(&MI),
HR.getMemoryBanks(&MI), MI.operands(),
MI.getMF()->getRegInfo(),
CurrCycle - ZoneSU->BotReadyCycle);
const std::vector<unsigned int> *AlternateOpcodes;
auto DefaultOpcode = std::vector<unsigned int>{SU.getInstr()->getOpcode()};
AlternateOpcodes =
Formats.getAlternateInstsOpcode(SU.getInstr()->getOpcode())
? Formats.getAlternateInstsOpcode(SU.getInstr()->getOpcode())
: &DefaultOpcode;

for (const unsigned int NewMIAltOpcode : *AlternateOpcodes) {
const MCInstrDesc &NewMIAltDesc = getTII(*Zone.DAG)->get(NewMIAltOpcode);
if (!(HR.checkConflict(*NewMI, NewMIAltDesc, DeltaCycle) &
static_cast<uint32_t>(AIEHazardRecognizer::ConflictType::Format)))
continue;
}

// Emit the new instruction in the scoreboard. This will help us
// to check if the previously unscheduled multi-slot instruction
// can be scheduled in the same cycle, with an alternate opcode.
HR.emitInScoreboard(NewMI->getDesc(), HR.getMemoryBanks(NewMI),
NewMI->operands(), NewMI->getMF()->getRegInfo(),
DeltaCycle);

// Check if the previously unscheduled multi-slot instruction
// can be rescheduled in presense of the new instruction in the
// same cycle, with a different opcode.
for (const auto AltOpcodeInside : *AltOpcodes) {
const MCInstrDesc &Desc = getTII(*Zone.DAG)->get(AltOpcodeInside);
if (HR.getHazardType(Desc, HR.getMemoryBanks(&MI), MI.operands(),
MI.getMF()->getRegInfo(), DeltaCycle) ==
for (MachineInstr *MI : ScheduledMultiSlotInsts) {
SUnit *ZoneSU = Zone.DAG->getSUnit(MI);
const int CurrCycle = Zone.getCurrCycle();
auto AltOpcodes = Formats.getAlternateInstsOpcode(MI->getOpcode());

// Check if the scheduled multi-slot instruction has a slot conflict
// with the new instruction, if so we might have the possiblity to shift
// the multi-slot and schedule the new instruction.
if (!checkSlotConflict(HR.getSelectedAltDescs().getOpcode(MI),
NewMIAltOpcode, Formats))
continue;

// Release the multi-slot instruction from the scoreboard to check if
// any other alternate opcode in presence of the new instruction will
// not create a hazard.
HR.releaseFromScoreboard(*HR.getSelectedAltDescs().getDesc(MI),
HR.getMemoryBanks(MI), MI->operands(),
MI->getMF()->getRegInfo(),
CurrCycle - ZoneSU->BotReadyCycle);

// Check if the new instuction can be scheduled after unscheduling
// the conflicting multi-slot instruction.
if (HR.getHazardType(NewMIAltDesc, HR.getMemoryBanks(NewMI),
NewMI->operands(), NewMI->getMF()->getRegInfo(),
DeltaCycle) !=
ScheduleHazardRecognizer::HazardType::NoHazard) {
// Cache the information to mutate the instruction during bumpNode()
MutateInstruction.insert(
std::make_pair(NewMI, std::make_pair(&MI, &Desc)));
CanShiftSlot = true;
break;
// If the new instruction cannot be scheduled after unscheduling the
// mulit-slot revert back the state of scoreboard to original state
// and continue.

HR.emitInScoreboard(*HR.getSelectedAltDescs().getDesc(MI),
HR.getMemoryBanks(MI), MI->operands(),
MI->getMF()->getRegInfo(),
CurrCycle - ZoneSU->BotReadyCycle);
continue;
}
}

// Revert back the state of scoreboard to original state.
HR.releaseFromScoreboard(NewMI->getDesc(), HR.getMemoryBanks(NewMI),
NewMI->operands(), NewMI->getMF()->getRegInfo(),
DeltaCycle);
HR.emitInScoreboard(*HR.getSelectedAltDescs().getDesc(&MI),
HR.getMemoryBanks(&MI), MI.operands(),
MI.getMF()->getRegInfo(),
CurrCycle - ZoneSU->BotReadyCycle);
// Emit the new instruction in the scoreboard. This will help us
// to check if the previously unscheduled multi-slot instruction
// can be scheduled in the same cycle, with an alternate opcode.
HR.emitInScoreboard(NewMIAltDesc, HR.getMemoryBanks(NewMI),
NewMI->operands(), NewMI->getMF()->getRegInfo(),
DeltaCycle);

// Check if the previously unscheduled multi-slot instruction
// can be rescheduled in presense of the new instruction in the
// same cycle, with a different opcode.
for (const auto AltOpcodeInside : *AltOpcodes) {
const MCInstrDesc &Desc = getTII(*Zone.DAG)->get(AltOpcodeInside);
if (HR.getHazardType(Desc, HR.getMemoryBanks(MI), MI->operands(),
MI->getMF()->getRegInfo(), DeltaCycle) ==
ScheduleHazardRecognizer::HazardType::NoHazard) {
// Cache the information to mutate the instruction during bumpNode()
MutateInstruction.insert(
std::make_pair(NewMI, std::make_pair(MI, &Desc)));

// if the new instuction was a multi-slot instuction and it failed the
// general check for isAvailabeNode() this means we have not set the
// selected opcode for the instruction. Set the selected opcode for
// the instruction.
if (AlternateOpcodes->size() > 1)
HR.getSelectedAltDescs().setAlternateDescriptor(NewMI,
&NewMIAltDesc);

CanShiftSlot = true;
break;
}
}

// Revert back the state of scoreboard to original state.
HR.releaseFromScoreboard(NewMIAltDesc, HR.getMemoryBanks(NewMI),
NewMI->operands(), NewMI->getMF()->getRegInfo(),
DeltaCycle);
HR.emitInScoreboard(*HR.getSelectedAltDescs().getDesc(MI),
HR.getMemoryBanks(MI), MI->operands(),
MI->getMF()->getRegInfo(),
CurrCycle - ZoneSU->BotReadyCycle);

if (CanShiftSlot)
break;
}
if (CanShiftSlot)
break;
}
Expand Down
34 changes: 12 additions & 22 deletions llvm/test/CodeGen/AIE/aie2/GlobalISel/legalize-dyn-stackalloc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,13 @@ define void @test_simple_dyn_alloca(i32 noundef %n) {
; CHECK-LABEL: test_simple_dyn_alloca:
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: paddb [sp], #32; nopa ; nops ; nopxm ; nopv
; CHECK-NEXT: mova r1, #2; nopx
; CHECK-NEXT: padda [sp], #32; nopb ; movx r1, #2
; CHECK-NEXT: lshl r0, r0, r1
; CHECK-NEXT: st p7, [sp, #-32] // 4-byte Folded Spill
; CHECK-NEXT: mov p7, sp
; CHECK-NEXT: mov p1, sp
; CHECK-NEXT: lshl r0, r0, r1
; CHECK-NEXT: mova r1, #-32
; CHECK-NEXT: st lr, [sp, #-28] // 4-byte Folded Spill
; CHECK-NEXT: padda [p7], #-32
; CHECK-NEXT: mova r1, #-32; padds [p7], #-32
; CHECK-NEXT: add r0, r0, #31
; CHECK-NEXT: jl #extern_call
; CHECK-NEXT: mov p0, p1 // Delay Slot 5
Expand Down Expand Up @@ -53,24 +51,19 @@ define void @test_loop_dyn_alloca(i32 noundef %n) {
; CHECK-LABEL: test_loop_dyn_alloca:
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: nopa ; paddb [sp], #64; nopxm
; CHECK-NEXT: st p7, [sp, #-64] // 4-byte Folded Spill
; CHECK-NEXT: mov p7, sp
; CHECK-NEXT: st r16, [sp, #-36] // 4-byte Folded Spill
; CHECK-NEXT: mova r16, #1
; CHECK-NEXT: paddb [sp], #64; nopa ; nops ; nopxm ; nopv
; CHECK-NEXT: st r16, [sp, #-36]; nopx // 4-byte Folded Spill
; CHECK-NEXT: st r17, [sp, #-40] // 4-byte Folded Spill
; CHECK-NEXT: mova r17, #0
; CHECK-NEXT: st r18, [sp, #-44] // 4-byte Folded Spill
; CHECK-NEXT: mova r18, #10
; CHECK-NEXT: mova r16, #1; movx r18, #10; mov r17, #0
; CHECK-NEXT: st r19, [sp, #-48] // 4-byte Folded Spill
; CHECK-NEXT: mova r19, #2
; CHECK-NEXT: st r20, [sp, #-52] // 4-byte Folded Spill
; CHECK-NEXT: mova r20, #-32
; CHECK-NEXT: st r21, [sp, #-56] // 4-byte Folded Spill
; CHECK-NEXT: mova r21, #0
; CHECK-NEXT: st p7, [sp, #-64] // 4-byte Folded Spill
; CHECK-NEXT: mov p7, sp
; CHECK-NEXT: st lr, [sp, #-32] // 4-byte Folded Spill
; CHECK-NEXT: st p6, [sp, #-60] // 4-byte Folded Spill
; CHECK-NEXT: padda [p7], #-64
; CHECK-NEXT: mova r19, #2; padds [p7], #-64; movx r21, #0; mov r20, #-32
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB1_1: // %for.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
Expand Down Expand Up @@ -133,11 +126,9 @@ define void @test_huge_stack(i32 noundef %n) #0 {
; CHECK-LABEL: test_huge_stack:
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: nopa ; paddb [sp], #40064; nopx
; CHECK-NEXT: nopa ; paddb [sp], #40064; nopxm
; CHECK-NEXT: movxm m0, #-40064
; CHECK-NEXT: mova r1, #0
; CHECK-NEXT: mova r2, #2
; CHECK-NEXT: mova r3, #-32
; CHECK-NEXT: mova r1, #0; movx r3, #-32; mov r2, #2
; CHECK-NEXT: st p7, [sp, #-40064] // 4-byte Folded Spill
; CHECK-NEXT: mov p7, sp
; CHECK-NEXT: mov p1, sp
Expand All @@ -150,11 +141,10 @@ define void @test_huge_stack(i32 noundef %n) #0 {
; CHECK-NEXT: mov p2, p7
; CHECK-NEXT: mov p6, p7
; CHECK-NEXT: paddb [p0], m0
; CHECK-NEXT: paddb [p6], #-32
; CHECK-NEXT: movxm m0, #-40032
; CHECK-NEXT: st r0, [p0, #0]
; CHECK-NEXT: lda r0, [p0, #0]
; CHECK-NEXT: paddb [p2], m0
; CHECK-NEXT: paddb [p6], #-32; padds [p2], m0
; CHECK-NEXT: mov p0, sp
; CHECK-NEXT: mov r16, p2
; CHECK-NEXT: st p0, [p6, #0]
Expand Down
8 changes: 2 additions & 6 deletions llvm/test/CodeGen/AIE/aie2/accfloat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,7 @@ define dso_local noundef <8 x i64> @test_add_conf(<8 x i64> noundef %acc1, <8 x
; CHECK-LABEL: test_add_conf:
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: nopb ; mova r3, #12; nops ; nopxm ; nopv
; CHECK-NEXT: mova r4, #13; nopx
; CHECK-NEXT: mova r5, #28
; CHECK-NEXT: mova r3, #12; movx r5, #28; mov r4, #13
; CHECK-NEXT: lshl r1, r1, r3
; CHECK-NEXT: lshl r2, r2, r4
; CHECK-NEXT: or r0, r1, r0
Expand Down Expand Up @@ -41,9 +39,7 @@ define dso_local noundef <8 x i64> @test_sub_conf(<8 x i64> noundef %acc1, <8 x
; CHECK-LABEL: test_sub_conf:
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: nopb ; mova r3, #12; nops ; nopxm ; nopv
; CHECK-NEXT: mova r4, #13; nopx
; CHECK-NEXT: mova r5, #28
; CHECK-NEXT: mova r3, #12; movx r5, #28; mov r4, #13
; CHECK-NEXT: lshl r1, r1, r3
; CHECK-NEXT: lshl r2, r2, r4
; CHECK-NEXT: or r0, r1, r0
Expand Down
10 changes: 4 additions & 6 deletions llvm/test/CodeGen/AIE/aie2/addr_1d2d3d.ll
Original file line number Diff line number Diff line change
Expand Up @@ -170,9 +170,8 @@ define dso_local ptr @test_add_2d_ptr_backTOback_call(ptr %a, i32 noundef %off,
; CHECK-LABEL: test_add_2d_ptr_backTOback_call:
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: nopx ; mov p0, p1
; CHECK-NEXT: mova r3, #6
; CHECK-NEXT: mova dc0, #0
; CHECK-NEXT: nopa ; nopx ; mov p0, p1
; CHECK-NEXT: mova r3, #6; mov dc0, #0
; CHECK-NEXT: mov dn0, r1
; CHECK-NEXT: lshl r0, r0, r3
; CHECK-NEXT: ret lr
Expand Down Expand Up @@ -200,9 +199,8 @@ define dso_local ptr @test_add_3d_ptr_backTOback_call(ptr %a, i32 noundef %off,
; CHECK-LABEL: test_add_3d_ptr_backTOback_call:
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: nopx ; mov p0, p1
; CHECK-NEXT: mova r5, #6
; CHECK-NEXT: mova dc0, #0
; CHECK-NEXT: nopa ; nopx ; mov p0, p1
; CHECK-NEXT: mova r5, #6; mov dc0, #0
; CHECK-NEXT: mov dn0, r1
; CHECK-NEXT: mov dn4, r3
; CHECK-NEXT: lshl r0, r0, r5
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AIE/aie2/aiev2_v2int32.ll
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ define dso_local noundef i64 @_Z14return_v2int32v() local_unnamed_addr #0 {
; CHECK-NEXT: nopx // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 3
; CHECK-NEXT: mova r0, #100 // Delay Slot 2
; CHECK-NEXT: mova r1, #0 // Delay Slot 1
; CHECK-NEXT: nop // Delay Slot 2
; CHECK-NEXT: mova r0, #100; movx r1, #0 // Delay Slot 1
entry:
ret i64 100
}
Expand Down
Loading

0 comments on commit 23f63de

Please sign in to comment.