diff --git a/llvm/lib/Target/AIE/AIEHazardRecognizer.cpp b/llvm/lib/Target/AIE/AIEHazardRecognizer.cpp index 0b535ef7354a..8e2ad67d76cf 100644 --- a/llvm/lib/Target/AIE/AIEHazardRecognizer.cpp +++ b/llvm/lib/Target/AIE/AIEHazardRecognizer.cpp @@ -448,10 +448,26 @@ ScheduleHazardRecognizer::HazardType AIEHazardRecognizer::getHazardType( FUDepthLimit)); } -bool AIEHazardRecognizer::checkConflict( +ConflictTypeBits AIEHazardRecognizer::checkConflict(MachineInstr &MI, + int DeltaCycles) const { + return checkConflict(Scoreboard, MI, DeltaCycles); +} + +ConflictTypeBits AIEHazardRecognizer::checkConflict( const ResourceScoreboard &Scoreboard, MachineInstr &MI, int DeltaCycles) const { - const MCInstrDesc &Desc = MI.getDesc(); + return checkConflict(Scoreboard, MI, MI.getDesc(), DeltaCycles); +} + +ConflictTypeBits AIEHazardRecognizer::checkConflict(MachineInstr &MI, + const MCInstrDesc &Desc, + int DeltaCycles) { + return checkConflict(Scoreboard, MI, Desc, DeltaCycles); +} + +ConflictTypeBits AIEHazardRecognizer::checkConflict( + const ResourceScoreboard &Scoreboard, MachineInstr &MI, + const MCInstrDesc &Desc, int DeltaCycles) const { const unsigned SchedClass = TII->getSchedClass(Desc, MI.operands(), MI.getMF()->getRegInfo()); const MemoryBankBits MemoryBanks = getMemoryBanks(&MI); @@ -461,18 +477,42 @@ bool AIEHazardRecognizer::checkConflict( MemoryBanks, TII->getMemoryCycles(SchedClass), DeltaCycles, std::nullopt); } -bool AIEHazardRecognizer::checkConflict( +ConflictTypeBits AIEHazardRecognizer::checkConflict( const ResourceScoreboard &Scoreboard, const InstrItineraryData *ItinData, unsigned SchedClass, SlotBits SlotSet, MemoryBankBits MemoryBanks, SmallVector MemoryAccessCycles, int DeltaCycles, std::optional FUDepthLimit) { assert(Scoreboard.isValidDelta(DeltaCycles)); + ConflictTypeBits Conflict = ConflictType::NoConflict; + + if (checkFormatConflict(Scoreboard, DeltaCycles, SlotSet)) + Conflict |= ConflictType::Format; + + if (checkMemoryBankConflict(MemoryAccessCycles, Scoreboard, DeltaCycles, + MemoryBanks)) + Conflict |= ConflictType::MemoryBank; + + if (checkFUConflict(ItinData, SchedClass, DeltaCycles, Scoreboard, + FUDepthLimit)) + Conflict |= ConflictType::FU; + + return Conflict; +} +// Return true if there is a conflict due to format. +bool AIEHazardRecognizer::checkFormatConflict( + const ResourceScoreboard &Scoreboard, int DeltaCycles, + unsigned SlotSet) { // Verify format hazards FuncUnitWrapper EmissionCycle(/*Req=*/0, /*Res=*/0, SlotSet); - if (EmissionCycle.conflict(Scoreboard[DeltaCycles])) - return true; + return EmissionCycle.conflict(Scoreboard[DeltaCycles]); +} +// Return true if there is a conflict due to memory banks. +bool AIEHazardRecognizer::checkMemoryBankConflict( + const SmallVector &MemoryAccessCycles, + const ResourceScoreboard &Scoreboard, int DeltaCycles, + unsigned MemoryBanks) { // Verify memory bank hazards if (!MemoryAccessCycles.empty()) { FuncUnitWrapper MemoryBankAccessCycle(/*Req=*/0, /*Res=*/0, /*SlotSet=*/0, @@ -488,15 +528,22 @@ bool AIEHazardRecognizer::checkConflict( } } } + return false; +} + +// Return true if there is a conflict in the functional units. +bool AIEHazardRecognizer::checkFUConflict( + const InstrItineraryData *ItinData, unsigned SchedClass, int DeltaCycles, + const ResourceScoreboard &Scoreboard, + const std::optional &FUDepthLimit) { // Note that Delta will be negative for bottom-up scheduling. // Cycle is 'our' cycle at which each stage of the itinerary starts. // It gets updated by the increment from the InstrStage. int Cycle = DeltaCycles; for (const InstrStage &IS : ItinData->getStages(SchedClass)) { - if (FUDepthLimit && (Cycle - DeltaCycles) >= *FUDepthLimit) { + if (FUDepthLimit && (Cycle - DeltaCycles) >= *FUDepthLimit) break; - } // Check availability of this stage's resources for the specified number // of cycles const FuncUnitWrapper ThisCycle(IS); @@ -504,18 +551,13 @@ bool AIEHazardRecognizer::checkConflict( int StageCycle = Cycle + (int)C; assert(StageCycle < Scoreboard.getDepth()); - if (ThisCycle.conflict(Scoreboard[StageCycle])) { - LLVM_DEBUG(dbgs() << "*** Hazard in cycle=" << StageCycle - << " EC=" << StageCycle - DeltaCycles << ":\n"; - ThisCycle.dump(); dbgs() << "\n"); + if (ThisCycle.conflict(Scoreboard[StageCycle])) return true; - } } // Advance the cycle to the next stage. Cycle += IS.getNextCycles(); } - return false; } diff --git a/llvm/lib/Target/AIE/AIEHazardRecognizer.h b/llvm/lib/Target/AIE/AIEHazardRecognizer.h index 2e712e7c5ae8..eff912d610bd 100644 --- a/llvm/lib/Target/AIE/AIEHazardRecognizer.h +++ b/llvm/lib/Target/AIE/AIEHazardRecognizer.h @@ -29,6 +29,7 @@ namespace llvm { class MachineInstr; +using ConflictTypeBits = uint64_t; void applyFormatOrdering(AIE::MachineBundle &Bundle, const VLIWFormat &Format, MachineInstr *BundleRoot, @@ -99,6 +100,13 @@ class AIEHazardRecognizer : public ScheduleHazardRecognizer { void computeMaxLatency(); public: + enum ConflictType { + NoConflict = 0b000, + Format = 0b001, + MemoryBank = 0b010, + FU = 0b100, + }; + /// ScoreboardDepth can be used to speficy a fixed depth without querying the /// scheduling model. This is mostly used for testing, for other cases we /// should trust the instruction itineraries. @@ -189,19 +197,44 @@ class AIEHazardRecognizer : public ScheduleHazardRecognizer { const MCInstrDesc &Desc, MemoryBankBits MemoryBanks, iterator_range MIOperands, const MachineRegisterInfo &MRI, int DeltaCycles) const; - bool checkConflict(const ResourceScoreboard &Scoreboard, - MachineInstr &MI, int DeltaCycles) const; + + ConflictTypeBits checkConflict(MachineInstr &MI, int DeltaCycles) const; + ConflictTypeBits + checkConflict(const ResourceScoreboard &Scoreboard, + MachineInstr &MI, int DeltaCycles) const; + + ConflictTypeBits + checkConflict(const ResourceScoreboard &Scoreboard, + MachineInstr &MI, const MCInstrDesc &Desc, + int DeltaCycles) const; + ConflictTypeBits checkConflict(MachineInstr &MI, const MCInstrDesc &Desc, + int DeltaCycles); protected: ScheduleHazardRecognizer::HazardType getHazardType(const MCInstrDesc &Desc, int DeltaCycles); - static bool + static ConflictTypeBits checkConflict(const ResourceScoreboard &Scoreboard, const InstrItineraryData *ItinData, unsigned SchedClass, SlotBits SlotSet, MemoryBankBits MemoryBanks, SmallVector MemoryAccessCycles, int DeltaCycles, std::optional FUDepthLimit); + static bool + checkFormatConflict(const ResourceScoreboard &Scoreboard, + int DeltaCycles, unsigned SlotSet); + + static bool + checkMemoryBankConflict(const SmallVector &MemoryAccessCycles, + const ResourceScoreboard &Scoreboard, + int DeltaCycles, unsigned MemoryBanks); + + static bool + checkFUConflict(const InstrItineraryData *ItinData, unsigned SchedClass, + int DeltaCycles, + const ResourceScoreboard &Scoreboard, + const std::optional &FUDepthLimit); + static void enterResources(ResourceScoreboard &Scoreboard, const InstrItineraryData *ItinData, unsigned SchedClass, SlotBits SlotSet, diff --git a/llvm/lib/Target/AIE/AIEMachineScheduler.cpp b/llvm/lib/Target/AIE/AIEMachineScheduler.cpp index 4296bece91b5..d5d9b2acd5fb 100644 --- a/llvm/lib/Target/AIE/AIEMachineScheduler.cpp +++ b/llvm/lib/Target/AIE/AIEMachineScheduler.cpp @@ -88,6 +88,10 @@ static cl::opt UseLoopHeuristics( "aie-loop-sched-heuristics", cl::init(true), cl::desc("Use special picking heuristics when scheduling a loop region")); +static cl::opt IgnoreMemoryBankConflict( + "aie-ignore-bank-conflict", cl::init(false), + cl::desc("Ignore bank conflicts based on special heuristics")); + namespace { // A sentinel value to represent an unknown SUnit. const constexpr unsigned UnknownSUNum = ~0; @@ -479,6 +483,56 @@ int AIEPostRASchedStrategy::getMaxDeltaCycles(const SchedBoundary &Zone) const { BottomUpDelta.getValue()}); } +bool AIEPostRASchedStrategy::canOptimizeMemoryAccess(SUnit &SU, + SchedBoundary &Zone, + const int DeltaCycle) { + if (!IgnoreMemoryBankConflict) + return false; + + if (!SU.getInstr()->mayLoadOrStore()) { + return false; + } + + const int MinDelta = -getMaxDeltaCycles(Zone); + if (!(DeltaCycle - 1 >= MinDelta)) + return false; + + const AIEBaseMCFormats &Formats = *getTII(*Zone.DAG)->getFormatInterface(); + AIEHazardRecognizer &HR = *getAIEHazardRecognizer(Zone); + MachineInstr *MI = SU.getInstr(); + + const std::vector *AlternateOpcodes; + auto DefaultOpcode = std::vector{SU.getInstr()->getOpcode()}; + AlternateOpcodes = + Formats.getAlternateInstsOpcode(SU.getInstr()->getOpcode()) + ? Formats.getAlternateInstsOpcode(SU.getInstr()->getOpcode()) + : &DefaultOpcode; + + unsigned int OpcodeWithMemoryBankConflict = 0; + for (const unsigned int AltOpcode : *AlternateOpcodes) { + // Check if the conflict was caused by a memory bank. + if (HR.checkConflict(*MI, getTII(*Zone.DAG)->get(AltOpcode), DeltaCycle) == + AIEHazardRecognizer::ConflictType::MemoryBank) { + OpcodeWithMemoryBankConflict = AltOpcode; + break; + } + } + // Check if the memory operation will also have a conflict in the next cycle. + // If so, we could schedule the instruction in the current delta cycle, even + // though it causes bank conflict. + // NOTE : With this optimization if the resultant schedule does not decrease + // the total instr. count in the kernel loop by the same number of bank + // conflict we are allowing we will see regression. + if (OpcodeWithMemoryBankConflict && Zone.checkHazard(&SU, DeltaCycle - 1)) { + if (AlternateOpcodes->size() > 1) + HR.getSelectedAltDescs().setAlternateDescriptor( + MI, OpcodeWithMemoryBankConflict); + return true; + } + + return false; +} + bool AIEPostRASchedStrategy::isAvailableNode(SUnit &SU, SchedBoundary &Zone, bool /*VerifyReadyCycle*/) { // Whether or not the zone is Top or Bot, verify if SU is ready to be @@ -497,7 +551,8 @@ bool AIEPostRASchedStrategy::isAvailableNode(SUnit &SU, SchedBoundary &Zone, // ReadyCycle is always greater or equal to the current cycle, // so DeltaCycles will always be less or equal to 0. if (Zone.checkHazard(&SU, DeltaCycles)) - continue; + if (!canOptimizeMemoryAccess(SU, Zone, DeltaCycles)) + continue; SU.BotReadyCycle = CurrCycle - DeltaCycles; return true; } diff --git a/llvm/lib/Target/AIE/AIEMachineScheduler.h b/llvm/lib/Target/AIE/AIEMachineScheduler.h index 43b918e0e27c..64e7c9ce971b 100644 --- a/llvm/lib/Target/AIE/AIEMachineScheduler.h +++ b/llvm/lib/Target/AIE/AIEMachineScheduler.h @@ -50,6 +50,9 @@ class AIEPostRASchedStrategy : public PostGenericScheduler { SUnit *pickNodeAndCycle(bool &IsTopNode, std::optional &BotEmissionCycle) override; + bool canOptimizeMemoryAccess(SUnit &SU, SchedBoundary &Zone, + const int DeltaCycle); + bool isAvailableNode(SUnit &SU, SchedBoundary &Zone, bool VerifyReadyCycle) override;