Skip to content

Commit

Permalink
[AIEX] Reschedule Multi-Slot Instruction for better schedule
Browse files Browse the repository at this point in the history
  • Loading branch information
krishnamtibrewala committed Oct 28, 2024
1 parent 81ef6d7 commit 71c9012
Show file tree
Hide file tree
Showing 8 changed files with 345 additions and 24 deletions.
9 changes: 8 additions & 1 deletion llvm/lib/Target/AIE/AIEAlternateDescriptors.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@

namespace llvm {

using MutateInstructionMap =
std::unordered_map<MachineInstr *,
std::pair<MachineInstr *, const MCInstrDesc *>>;
using MIAltDescsMap = std::unordered_map<MachineInstr *, const MCInstrDesc *>;

class AIEAlternateDescriptors {
Expand All @@ -40,7 +43,11 @@ class AIEAlternateDescriptors {
const AIEBaseSubtarget &STI = AIEBaseSubtarget::get(*MI->getMF());
const AIEBaseInstrInfo *TII = STI.getInstrInfo();

AlternateDescs[MI] = &TII->get(AltInstOpcode);
setAlternateDescriptor(MI, &TII->get(AltInstOpcode));
}

void setAlternateDescriptor(MachineInstr *MI, const MCInstrDesc *AltDesc) {
AlternateDescs[MI] = AltDesc;
}

// Return the alternate descriptor for the given multi-opcode instruction.
Expand Down
29 changes: 25 additions & 4 deletions llvm/lib/Target/AIE/AIEHazardRecognizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,14 @@ auto toHazardType(bool Conflict) {
}
} // namespace

ScheduleHazardRecognizer::HazardType AIEHazardRecognizer::getHazardType(
const MCInstrDesc &Desc, MemoryBankBits MemoryBanks,
iterator_range<const MachineOperand *> MIOperands,
const MachineRegisterInfo &MRI, int DeltaCycles) {
return getHazardType(Scoreboard, Desc, MemoryBanks, MIOperands, MRI,
DeltaCycles);
}

// These functions interpret the itinerary, translating InstrStages
// to ResourceCycles to apply.
// We deviate from the standard ScoreboardHazardRecognizer by not
Expand All @@ -457,6 +465,11 @@ ScheduleHazardRecognizer::HazardType AIEHazardRecognizer::getHazardType(
FUDepthLimit));
}

ConflictTypeBits AIEHazardRecognizer::checkConflict(MachineInstr &MI,
int DeltaCycles) {
return checkConflict(Scoreboard, MI, DeltaCycles);
}

ConflictTypeBits AIEHazardRecognizer::checkConflict(
const ResourceScoreboard<FuncUnitWrapper> &Scoreboard, MachineInstr &MI,
int DeltaCycles) const {
Expand All @@ -476,18 +489,18 @@ ConflictTypeBits AIEHazardRecognizer::checkConflict(
MemoryBankBits MemoryBanks, SmallVector<int, 2> MemoryAccessCycles,
int DeltaCycles, std::optional<int> FUDepthLimit) {
assert(Scoreboard.isValidDelta(DeltaCycles));
ConflictTypeBits Conflict = ConflictType::NoConflict;
ConflictTypeBits Conflict = static_cast<uint32_t>(ConflictType::NoConflict);

if (checkFormatConflict(Scoreboard, DeltaCycles, SlotSet))
Conflict |= ConflictType::Format;
Conflict |= static_cast<uint32_t>(ConflictType::Format);

if (checkMemoryBankConflict(MemoryAccessCycles, Scoreboard, DeltaCycles,
MemoryBanks))
Conflict |= ConflictType::MemoryBank;
Conflict |= static_cast<uint32_t>(ConflictType::MemoryBank);

if (checkFUConflict(ItinData, SchedClass, DeltaCycles, Scoreboard,
FUDepthLimit))
Conflict |= ConflictType::FU;
Conflict |= static_cast<uint32_t>(ConflictType::FU);

return Conflict;
}
Expand Down Expand Up @@ -573,6 +586,14 @@ void AIEHazardRecognizer::emitInScoreboard(
TII->getMemoryCycles(SchedClass), DeltaCycles, FUDepthLimit);
}

void AIEHazardRecognizer::releaseFromScoreboard(
const MCInstrDesc &Desc, MemoryBankBits MemoryBanks,
iterator_range<const MachineOperand *> MIOperands,
const MachineRegisterInfo &MRI, int DeltaCycles) {
releaseFromScoreboard(Scoreboard, Desc, MemoryBanks, MIOperands, MRI,
DeltaCycles);
}

void AIEHazardRecognizer::releaseFromScoreboard(
ResourceScoreboard<FuncUnitWrapper> &TheScoreboard, const MCInstrDesc &Desc,
MemoryBankBits MemoryBanks,
Expand Down
15 changes: 13 additions & 2 deletions llvm/lib/Target/AIE/AIEHazardRecognizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
namespace llvm {

class MachineInstr;
using ConflictTypeBits = uint64_t;
using ConflictTypeBits = std::uint32_t;

void applyFormatOrdering(AIE::MachineBundle &Bundle, const VLIWFormat &Format,
MachineInstr *BundleRoot,
Expand Down Expand Up @@ -101,7 +101,7 @@ class AIEHazardRecognizer : public ScheduleHazardRecognizer {
void computeMaxLatency();

public:
enum ConflictType {
enum class ConflictType : std::uint32_t {
NoConflict = 0b000,
Format = 0b001,
MemoryBank = 0b010,
Expand Down Expand Up @@ -164,6 +164,11 @@ class AIEHazardRecognizer : public ScheduleHazardRecognizer {
iterator_range<const MachineOperand *> MIOperands,
const MachineRegisterInfo &MRI,
int DeltaCycles) const;
// Apply the above function to the local scoreboard.
void releaseFromScoreboard(const MCInstrDesc &Desc,
MemoryBankBits MemoryBanks,
iterator_range<const MachineOperand *> MIOperands,
const MachineRegisterInfo &MRI, int DeltaCycles);

/// Block all scoreboard resources at DeltaCycles
void blockCycleInScoreboard(int DeltaCycle);
Expand Down Expand Up @@ -205,9 +210,15 @@ class AIEHazardRecognizer : public ScheduleHazardRecognizer {
const MCInstrDesc &Desc, MemoryBankBits MemoryBanks,
iterator_range<const MachineOperand *> MIOperands,
const MachineRegisterInfo &MRI, int DeltaCycles) const;
ScheduleHazardRecognizer::HazardType
getHazardType(const MCInstrDesc &Desc, MemoryBankBits MemoryBanks,
iterator_range<const MachineOperand *> MIOperands,
const MachineRegisterInfo &MRI, int DeltaCycles);

ConflictTypeBits
checkConflict(const ResourceScoreboard<FuncUnitWrapper> &Scoreboard,
MachineInstr &MI, int DeltaCycles) const;
ConflictTypeBits checkConflict(MachineInstr &MI, int DeltaCycles);

protected:
ScheduleHazardRecognizer::HazardType getHazardType(const MCInstrDesc &Desc,
Expand Down
156 changes: 155 additions & 1 deletion llvm/lib/Target/AIE/AIEMachineScheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,13 @@ static cl::opt<bool> UseLoopHeuristics(
"aie-loop-sched-heuristics", cl::init(true),
cl::desc("Use special picking heuristics when scheduling a loop region"));

/// This option enables instruction mutuation to shift a multislot instruction
/// in event of a slot conflict.
static cl::opt<bool> InstructionMutation(
"aie-instruction-mutation", cl::init(true),
cl::desc("Allow instuction mutation to shift a multislot "
"instruction in event of a slot conflict"));

namespace {
// A sentinel value to represent an unknown SUnit.
const constexpr unsigned UnknownSUNum = ~0;
Expand Down Expand Up @@ -479,6 +486,125 @@ int AIEPostRASchedStrategy::getMaxDeltaCycles(const SchedBoundary &Zone) const {
BottomUpDelta.getValue()});
}

static bool checkSlotConflict(const unsigned OpCodeA, const unsigned OpCodeB,
const AIEBaseMCFormats &Formats) {

MCSlotKind SlotKindA = Formats.getSlotKind(OpCodeA);
MCSlotKind SlotKindB = Formats.getSlotKind(OpCodeB);

if (SlotKindA != MCSlotKind() && SlotKindB != MCSlotKind()) {
return (Formats.getSlotInfo(SlotKindA)->getSlotSet() &
Formats.getSlotInfo(SlotKindB)->getSlotSet());
}
return true;
}

bool AIEPostRASchedStrategy::canShiftSlot(SUnit &SU, SchedBoundary &Zone,
const int DeltaCycle) {

if (!InstructionMutation)
return false;

const AIEBaseMCFormats &Formats = *getTII(*Zone.DAG)->getFormatInterface();
AIEHazardRecognizer &HR = *getAIEHazardRecognizer(Zone);
bool CanShiftSlot = false;

if (!(!Formats.getAlternateInstsOpcode(SU.getInstr()->getOpcode()) &&
(HR.checkConflict(*SU.getInstr(), DeltaCycle) &
static_cast<uint32_t>(AIEHazardRecognizer::ConflictType::Format)))) {
// We are only interested in single slot instructions and instructions that
// have only format hazard.
// TODO : Extend this to SUs that are multi-slot and have only format hazard
return false;
}
for (MachineInstr &MI : *Zone.DAG) {
SUnit *ZoneSU = Zone.DAG->getSUnit(&MI);
if (!ZoneSU)
continue;
if (!ZoneSU->isScheduled)
continue;

const int CurrCycle = Zone.getCurrCycle();
if (ZoneSU->BotReadyCycle !=
static_cast<unsigned int>(CurrCycle - DeltaCycle))
continue;

// Check for a MultiSlot instruction scheduled in the same DeltaCycle, we
// focus on multi-slot because they can be scheduled in different slots
auto AltOpcodes = Formats.getAlternateInstsOpcode(MI.getOpcode());
if (!AltOpcodes)
continue;

// Check if the scheduled multi-slot instruction has a slot conflict
// with the new instruction, if so we might have the possiblity to shift
// the multi-slot and schedule the new instruction.
if (!checkSlotConflict(HR.getSelectedAltDescs().getOpcode(&MI),
SU.getInstr()->getOpcode(), Formats))
continue;

// Release the multi-slot instruction from the scoreboard to check if any
// other alternate opcode in presence of the new instruction will not create
// a hazard.
HR.releaseFromScoreboard(*HR.getSelectedAltDescs().getDesc(&MI),
HR.getMemoryBanks(&MI), MI.operands(),
MI.getMF()->getRegInfo(),
CurrCycle - ZoneSU->BotReadyCycle);

MachineInstr *NewMI = SU.getInstr();
// Check if the new instuction can be scheduled after unscheduling
// the conflicting multi-slot instruction.
if (HR.getHazardType(NewMI->getDesc(), HR.getMemoryBanks(NewMI),
NewMI->operands(), NewMI->getMF()->getRegInfo(),
DeltaCycle) !=
ScheduleHazardRecognizer::HazardType::NoHazard) {
// If the new instruction cannot be scheduled after unscheduling the
// mulit-slot revert back the state of scoreboard to original state and
// continue.
HR.emitInScoreboard(*HR.getSelectedAltDescs().getDesc(&MI),
HR.getMemoryBanks(&MI), MI.operands(),
MI.getMF()->getRegInfo(),
CurrCycle - ZoneSU->BotReadyCycle);
continue;
}

// Emit the new instruction in the scoreboard. This will help us
// to check if the previously unscheduled multi-slot instruction
// can be scheduled in the same cycle, with an alternate opcode.
HR.emitInScoreboard(NewMI->getDesc(), HR.getMemoryBanks(NewMI),
NewMI->operands(), NewMI->getMF()->getRegInfo(),
DeltaCycle);

// Check if the previously unscheduled multi-slot instruction
// can be rescheduled in presense of the new instruction in the
// same cycle, with a different opcode.
for (const auto AltOpcodeInside : *AltOpcodes) {
const MCInstrDesc &Desc = getTII(*Zone.DAG)->get(AltOpcodeInside);
if (HR.getHazardType(Desc, HR.getMemoryBanks(&MI), MI.operands(),
MI.getMF()->getRegInfo(), DeltaCycle) ==
ScheduleHazardRecognizer::HazardType::NoHazard) {
// Cache the information to mutate the instruction during bumpNode()
MutateInstruction.insert(
std::make_pair(NewMI, std::make_pair(&MI, &Desc)));
CanShiftSlot = true;
break;
}
}

// Revert back the state of scoreboard to original state.
HR.releaseFromScoreboard(NewMI->getDesc(), HR.getMemoryBanks(NewMI),
NewMI->operands(), NewMI->getMF()->getRegInfo(),
DeltaCycle);
HR.emitInScoreboard(*HR.getSelectedAltDescs().getDesc(&MI),
HR.getMemoryBanks(&MI), MI.operands(),
MI.getMF()->getRegInfo(),
CurrCycle - ZoneSU->BotReadyCycle);

if (CanShiftSlot)
break;
}
return CanShiftSlot;
}

bool AIEPostRASchedStrategy::isAvailableNode(SUnit &SU, SchedBoundary &Zone,
bool /*VerifyReadyCycle*/) {
// Whether or not the zone is Top or Bot, verify if SU is ready to be
Expand All @@ -497,7 +623,8 @@ bool AIEPostRASchedStrategy::isAvailableNode(SUnit &SU, SchedBoundary &Zone,
// ReadyCycle is always greater or equal to the current cycle,
// so DeltaCycles will always be less or equal to 0.
if (Zone.checkHazard(&SU, DeltaCycles))
continue;
if (!canShiftSlot(SU, Zone, DeltaCycles))
continue;
SU.BotReadyCycle = CurrCycle - DeltaCycles;
return true;
}
Expand All @@ -514,10 +641,37 @@ void AIEPostRASchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
if (IsTopNode) {
PostGenericScheduler::schedNode(SU, IsTopNode);
} else {
AIEHazardRecognizer &HR = *getAIEHazardRecognizer(Bot);
int DeltaCycles = int(Bot.getCurrCycle()) - int(SU->BotReadyCycle);
assert(DeltaCycles <= 0);

// Check if an instuction needs to be moved to a different slot.
if (MutateInstruction.find(SU->getInstr()) != MutateInstruction.end()) {
auto [MI, Desc] = MutateInstruction[SU->getInstr()];
HR.releaseFromScoreboard(*HR.getSelectedAltDescs().getDesc(MI),
HR.getMemoryBanks(MI), MI->operands(),
MI->getMF()->getRegInfo(), DeltaCycles);
// Update the selected opcode for the instruction, refer
// AIEPostRASchedStrategy::canShiftSlot()
HR.getSelectedAltDescs().setAlternateDescriptor(MI, Desc);
}

Bot.bumpNode(SU, DeltaCycles);

if (MutateInstruction.find(SU->getInstr()) != MutateInstruction.end()) {
auto [MI, Desc] = MutateInstruction[SU->getInstr()];
assert(HR.getHazardType(*Desc, HR.getMemoryBanks(MI), MI->operands(),
MI->getMF()->getRegInfo(), DeltaCycles) ==
ScheduleHazardRecognizer::HazardType::NoHazard);
// Reschedule the instruction with the new opcode.
HR.emitInScoreboard(*Desc, HR.getMemoryBanks(MI), MI->operands(),
MI->getMF()->getRegInfo(), DeltaCycles);
}
}
// Clear the MutateInstruction map since after scheduling the instruction the
// validity of mutation map can no longer be guaranteed.
MutateInstruction.clear();
SU->isScheduled = true;
}

void AIEPostRASchedStrategy::enterFunction(MachineFunction *MF) {
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AIE/AIEMachineScheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ std::vector<AIE::MachineBundle> computeAndFinalizeBundles(SchedBoundary &Zone);
class AIEPostRASchedStrategy : public PostGenericScheduler {
/// Maintain the state of interblock/loop-aware scheduling
AIE::InterBlockScheduling InterBlock;
MutateInstructionMap MutateInstruction;

public:
AIEPostRASchedStrategy(const MachineSchedContext *C);
Expand All @@ -50,6 +51,7 @@ class AIEPostRASchedStrategy : public PostGenericScheduler {
SUnit *pickNodeAndCycle(bool &IsTopNode,
std::optional<unsigned> &BotEmissionCycle) override;

bool canShiftSlot(SUnit &SU, SchedBoundary &Zone, const int DeltaCycle);
bool isAvailableNode(SUnit &SU, SchedBoundary &Zone,
bool VerifyReadyCycle) override;

Expand Down
13 changes: 5 additions & 8 deletions llvm/test/CodeGen/AIE/aie2/end-to-end/Add2D-red.ll
Original file line number Diff line number Diff line change
Expand Up @@ -72,16 +72,13 @@ define void @add2d(ptr noalias %params, ptr noalias %ifm1_data, ptr noalias %ifm
; ASM-NEXT: lda r9, [p5, #0]; paddb [p6], #-56; mov p5, sp
; ASM-NEXT: lda r6, [p6, #0]; paddb [p5], #-80; mov p4, sp
; ASM-NEXT: lda r10, [p5, #0]; paddb [p4], #-60; mov p5, sp
; ASM-NEXT: lda p6, [p4, #0]; paddb [p5], #-84
; ASM-NEXT: lda r11, [p5, #0]; mov p0, sp
; ASM-NEXT: paddb [p0], #-72; mov p4, sp
; ASM-NEXT: lda p0, [p0, #0]; paddb [p4], #-64; mov p5, sp
; ASM-NEXT: lda p6, [p4, #0]; paddb [p5], #-84; mov p4, sp
; ASM-NEXT: lda r11, [p5, #0]; paddb [p4], #-64; mov p5, sp
; ASM-NEXT: lda p7, [p4, #0]; paddb [p5], #-88; mov p4, sp
; ASM-NEXT: lda r12, [p5, #0]; paddb [p4], #-68; mov p5, sp
; ASM-NEXT: lda p4, [p4, #0]; paddb [p5], #-92
; ASM-NEXT: lda r13, [p5, #0]
; ASM-NEXT: mova r6, #1; add r7, r1, #-1; mov p5, r6
; ASM-NEXT: mova r6, #3; ne r3, r3, r6
; ASM-NEXT: lda p4, [p4, #0]; paddb [p5], #-92; add r7, r1, #-1; mov p0, sp
; ASM-NEXT: lda r13, [p5, #0]; paddb [p0], #-72; movx r6, #1; mov p5, r6
; ASM-NEXT: lda p0, [p0, #0]; ne r3, r3, r6; mov r6, #3
; ASM-NEXT: ltu r7, r7, r6
; ASM-NEXT: jz r7, #.LBB0_2
; ASM-NEXT: st dn4, [p5, #0]; nez r0, r0 // Delay Slot 5
Expand Down
Loading

0 comments on commit 71c9012

Please sign in to comment.