From 562118a4593c5de75dd9c0cb7015d4a367d1ff47 Mon Sep 17 00:00:00 2001 From: Krishnam Tibrewala Date: Sat, 5 Oct 2024 07:44:46 -0700 Subject: [PATCH] [AIEX] Reschedule Multi-Slot Instruction for better schedule --- llvm/lib/Target/AIE/AIEAlternateDescriptors.h | 9 +- llvm/lib/Target/AIE/AIEHazardRecognizer.cpp | 29 +++- llvm/lib/Target/AIE/AIEHazardRecognizer.h | 15 +- llvm/lib/Target/AIE/AIEMachineScheduler.cpp | 156 +++++++++++++++++- llvm/lib/Target/AIE/AIEMachineScheduler.h | 2 + .../CodeGen/AIE/aie2/end-to-end/Add2D-red.ll | 13 +- .../instruction_mutation/rescheduling.mir | 123 ++++++++++++++ .../negative_latencies/load_accumulate.mir | 22 ++- 8 files changed, 345 insertions(+), 24 deletions(-) create mode 100644 llvm/test/CodeGen/AIE/aie2/schedule/instruction_mutation/rescheduling.mir diff --git a/llvm/lib/Target/AIE/AIEAlternateDescriptors.h b/llvm/lib/Target/AIE/AIEAlternateDescriptors.h index 01673a348efd..e7ed347d1651 100644 --- a/llvm/lib/Target/AIE/AIEAlternateDescriptors.h +++ b/llvm/lib/Target/AIE/AIEAlternateDescriptors.h @@ -22,6 +22,9 @@ namespace llvm { +using MutateInstructionMap = + std::unordered_map>; using MIAltDescsMap = std::unordered_map; class AIEAlternateDescriptors { @@ -40,7 +43,11 @@ class AIEAlternateDescriptors { const AIEBaseSubtarget &STI = AIEBaseSubtarget::get(*MI->getMF()); const AIEBaseInstrInfo *TII = STI.getInstrInfo(); - AlternateDescs[MI] = &TII->get(AltInstOpcode); + setAlternateDescriptor(MI, &TII->get(AltInstOpcode)); + } + + void setAlternateDescriptor(MachineInstr *MI, const MCInstrDesc *AltDesc) { + AlternateDescs[MI] = AltDesc; } // Return the alternate descriptor for the given multi-opcode instruction. diff --git a/llvm/lib/Target/AIE/AIEHazardRecognizer.cpp b/llvm/lib/Target/AIE/AIEHazardRecognizer.cpp index b3a558d487f4..ec7cee3d47b3 100644 --- a/llvm/lib/Target/AIE/AIEHazardRecognizer.cpp +++ b/llvm/lib/Target/AIE/AIEHazardRecognizer.cpp @@ -440,6 +440,14 @@ auto toHazardType(bool Conflict) { } } // namespace +ScheduleHazardRecognizer::HazardType AIEHazardRecognizer::getHazardType( + const MCInstrDesc &Desc, MemoryBankBits MemoryBanks, + iterator_range MIOperands, + const MachineRegisterInfo &MRI, int DeltaCycles) { + return getHazardType(Scoreboard, Desc, MemoryBanks, MIOperands, MRI, + DeltaCycles); +} + // These functions interpret the itinerary, translating InstrStages // to ResourceCycles to apply. // We deviate from the standard ScoreboardHazardRecognizer by not @@ -457,6 +465,11 @@ ScheduleHazardRecognizer::HazardType AIEHazardRecognizer::getHazardType( FUDepthLimit)); } +ConflictTypeBits AIEHazardRecognizer::checkConflict(MachineInstr &MI, + int DeltaCycles) { + return checkConflict(Scoreboard, MI, DeltaCycles); +} + ConflictTypeBits AIEHazardRecognizer::checkConflict( const ResourceScoreboard &Scoreboard, MachineInstr &MI, int DeltaCycles) const { @@ -476,18 +489,18 @@ ConflictTypeBits AIEHazardRecognizer::checkConflict( MemoryBankBits MemoryBanks, SmallVector MemoryAccessCycles, int DeltaCycles, std::optional FUDepthLimit) { assert(Scoreboard.isValidDelta(DeltaCycles)); - ConflictTypeBits Conflict = ConflictType::NoConflict; + ConflictTypeBits Conflict = static_cast(ConflictType::NoConflict); if (checkFormatConflict(Scoreboard, DeltaCycles, SlotSet)) - Conflict |= ConflictType::Format; + Conflict |= static_cast(ConflictType::Format); if (checkMemoryBankConflict(MemoryAccessCycles, Scoreboard, DeltaCycles, MemoryBanks)) - Conflict |= ConflictType::MemoryBank; + Conflict |= static_cast(ConflictType::MemoryBank); if (checkFUConflict(ItinData, SchedClass, DeltaCycles, Scoreboard, FUDepthLimit)) - Conflict |= ConflictType::FU; + Conflict |= static_cast(ConflictType::FU); return Conflict; } @@ -573,6 +586,14 @@ void AIEHazardRecognizer::emitInScoreboard( TII->getMemoryCycles(SchedClass), DeltaCycles, FUDepthLimit); } +void AIEHazardRecognizer::releaseFromScoreboard( + const MCInstrDesc &Desc, MemoryBankBits MemoryBanks, + iterator_range MIOperands, + const MachineRegisterInfo &MRI, int DeltaCycles) { + releaseFromScoreboard(Scoreboard, Desc, MemoryBanks, MIOperands, MRI, + DeltaCycles); +} + void AIEHazardRecognizer::releaseFromScoreboard( ResourceScoreboard &TheScoreboard, const MCInstrDesc &Desc, MemoryBankBits MemoryBanks, diff --git a/llvm/lib/Target/AIE/AIEHazardRecognizer.h b/llvm/lib/Target/AIE/AIEHazardRecognizer.h index 3b4b63f8b002..92b4f8c70a7e 100644 --- a/llvm/lib/Target/AIE/AIEHazardRecognizer.h +++ b/llvm/lib/Target/AIE/AIEHazardRecognizer.h @@ -29,7 +29,7 @@ namespace llvm { class MachineInstr; -using ConflictTypeBits = uint64_t; +using ConflictTypeBits = std::uint32_t; void applyFormatOrdering(AIE::MachineBundle &Bundle, const VLIWFormat &Format, MachineInstr *BundleRoot, @@ -101,7 +101,7 @@ class AIEHazardRecognizer : public ScheduleHazardRecognizer { void computeMaxLatency(); public: - enum ConflictType { + enum class ConflictType : std::uint32_t { NoConflict = 0b000, Format = 0b001, MemoryBank = 0b010, @@ -164,6 +164,11 @@ class AIEHazardRecognizer : public ScheduleHazardRecognizer { iterator_range MIOperands, const MachineRegisterInfo &MRI, int DeltaCycles) const; + // Apply the above function to the local scoreboard. + void releaseFromScoreboard(const MCInstrDesc &Desc, + MemoryBankBits MemoryBanks, + iterator_range MIOperands, + const MachineRegisterInfo &MRI, int DeltaCycles); /// Block all scoreboard resources at DeltaCycles void blockCycleInScoreboard(int DeltaCycle); @@ -205,9 +210,15 @@ class AIEHazardRecognizer : public ScheduleHazardRecognizer { const MCInstrDesc &Desc, MemoryBankBits MemoryBanks, iterator_range MIOperands, const MachineRegisterInfo &MRI, int DeltaCycles) const; + ScheduleHazardRecognizer::HazardType + getHazardType(const MCInstrDesc &Desc, MemoryBankBits MemoryBanks, + iterator_range MIOperands, + const MachineRegisterInfo &MRI, int DeltaCycles); + ConflictTypeBits checkConflict(const ResourceScoreboard &Scoreboard, MachineInstr &MI, int DeltaCycles) const; + ConflictTypeBits checkConflict(MachineInstr &MI, int DeltaCycles); protected: ScheduleHazardRecognizer::HazardType getHazardType(const MCInstrDesc &Desc, diff --git a/llvm/lib/Target/AIE/AIEMachineScheduler.cpp b/llvm/lib/Target/AIE/AIEMachineScheduler.cpp index 4296bece91b5..2b85de4b26a9 100644 --- a/llvm/lib/Target/AIE/AIEMachineScheduler.cpp +++ b/llvm/lib/Target/AIE/AIEMachineScheduler.cpp @@ -88,6 +88,13 @@ static cl::opt UseLoopHeuristics( "aie-loop-sched-heuristics", cl::init(true), cl::desc("Use special picking heuristics when scheduling a loop region")); +/// This option enables instruction mutuation to shift a multislot instruction +/// in event of a slot conflict. +static cl::opt InstructionMutation( + "aie-instruction-mutation", cl::init(true), + cl::desc("Allow instuction mutation to shift a multislot " + "instruction in event of a slot conflict")); + namespace { // A sentinel value to represent an unknown SUnit. const constexpr unsigned UnknownSUNum = ~0; @@ -479,6 +486,125 @@ int AIEPostRASchedStrategy::getMaxDeltaCycles(const SchedBoundary &Zone) const { BottomUpDelta.getValue()}); } +static bool checkSlotConflict(const unsigned OpCodeA, const unsigned OpCodeB, + const AIEBaseMCFormats &Formats) { + + MCSlotKind SlotKindA = Formats.getSlotKind(OpCodeA); + MCSlotKind SlotKindB = Formats.getSlotKind(OpCodeB); + + if (SlotKindA != MCSlotKind() && SlotKindB != MCSlotKind()) { + return (Formats.getSlotInfo(SlotKindA)->getSlotSet() & + Formats.getSlotInfo(SlotKindB)->getSlotSet()); + } + return true; +} + +bool AIEPostRASchedStrategy::canShiftSlot(SUnit &SU, SchedBoundary &Zone, + const int DeltaCycle) { + + if (!InstructionMutation) + return false; + + const AIEBaseMCFormats &Formats = *getTII(*Zone.DAG)->getFormatInterface(); + AIEHazardRecognizer &HR = *getAIEHazardRecognizer(Zone); + bool CanShiftSlot = false; + + if (!(!Formats.getAlternateInstsOpcode(SU.getInstr()->getOpcode()) && + (HR.checkConflict(*SU.getInstr(), DeltaCycle) & + static_cast(AIEHazardRecognizer::ConflictType::Format)))) { + // We are only interested in single slot instructions and instructions that + // have only format hazard. + // TODO : Extend this to SUs that are multi-slot and have only format hazard + return false; + } + for (MachineInstr &MI : *Zone.DAG) { + SUnit *ZoneSU = Zone.DAG->getSUnit(&MI); + if (!ZoneSU) + continue; + if (!ZoneSU->isScheduled) + continue; + + const int CurrCycle = Zone.getCurrCycle(); + if (ZoneSU->BotReadyCycle != + static_cast(CurrCycle - DeltaCycle)) + continue; + + // Check for a MultiSlot instruction scheduled in the same DeltaCycle, we + // focus on multi-slot because they can be scheduled in different slots + auto AltOpcodes = Formats.getAlternateInstsOpcode(MI.getOpcode()); + if (!AltOpcodes) + continue; + + // Check if the scheduled multi-slot instruction has a slot conflict + // with the new instruction, if so we might have the possiblity to shift + // the multi-slot and schedule the new instruction. + if (!checkSlotConflict(HR.getSelectedAltDescs().getOpcode(&MI), + SU.getInstr()->getOpcode(), Formats)) + continue; + + // Release the multi-slot instruction from the scoreboard to check if any + // other alternate opcode in presence of the new instruction will not create + // a hazard. + HR.releaseFromScoreboard(*HR.getSelectedAltDescs().getDesc(&MI), + HR.getMemoryBanks(&MI), MI.operands(), + MI.getMF()->getRegInfo(), + CurrCycle - ZoneSU->BotReadyCycle); + + MachineInstr *NewMI = SU.getInstr(); + // Check if the new instuction can be scheduled after unscheduling + // the conflicting multi-slot instruction. + if (HR.getHazardType(NewMI->getDesc(), HR.getMemoryBanks(NewMI), + NewMI->operands(), NewMI->getMF()->getRegInfo(), + DeltaCycle) != + ScheduleHazardRecognizer::HazardType::NoHazard) { + // If the new instruction cannot be scheduled after unscheduling the + // mulit-slot revert back the state of scoreboard to original state and + // continue. + HR.emitInScoreboard(*HR.getSelectedAltDescs().getDesc(&MI), + HR.getMemoryBanks(&MI), MI.operands(), + MI.getMF()->getRegInfo(), + CurrCycle - ZoneSU->BotReadyCycle); + continue; + } + + // Emit the new instruction in the scoreboard. This will help us + // to check if the previously unscheduled multi-slot instruction + // can be scheduled in the same cycle, with an alternate opcode. + HR.emitInScoreboard(NewMI->getDesc(), HR.getMemoryBanks(NewMI), + NewMI->operands(), NewMI->getMF()->getRegInfo(), + DeltaCycle); + + // Check if the previously unscheduled multi-slot instruction + // can be rescheduled in presense of the new instruction in the + // same cycle, with a different opcode. + for (const auto AltOpcodeInside : *AltOpcodes) { + const MCInstrDesc &Desc = getTII(*Zone.DAG)->get(AltOpcodeInside); + if (HR.getHazardType(Desc, HR.getMemoryBanks(&MI), MI.operands(), + MI.getMF()->getRegInfo(), DeltaCycle) == + ScheduleHazardRecognizer::HazardType::NoHazard) { + // Cache the information to mutate the instruction during bumpNode() + MutateInstruction.insert( + std::make_pair(NewMI, std::make_pair(&MI, &Desc))); + CanShiftSlot = true; + break; + } + } + + // Revert back the state of scoreboard to original state. + HR.releaseFromScoreboard(NewMI->getDesc(), HR.getMemoryBanks(NewMI), + NewMI->operands(), NewMI->getMF()->getRegInfo(), + DeltaCycle); + HR.emitInScoreboard(*HR.getSelectedAltDescs().getDesc(&MI), + HR.getMemoryBanks(&MI), MI.operands(), + MI.getMF()->getRegInfo(), + CurrCycle - ZoneSU->BotReadyCycle); + + if (CanShiftSlot) + break; + } + return CanShiftSlot; +} + bool AIEPostRASchedStrategy::isAvailableNode(SUnit &SU, SchedBoundary &Zone, bool /*VerifyReadyCycle*/) { // Whether or not the zone is Top or Bot, verify if SU is ready to be @@ -497,7 +623,8 @@ bool AIEPostRASchedStrategy::isAvailableNode(SUnit &SU, SchedBoundary &Zone, // ReadyCycle is always greater or equal to the current cycle, // so DeltaCycles will always be less or equal to 0. if (Zone.checkHazard(&SU, DeltaCycles)) - continue; + if (!canShiftSlot(SU, Zone, DeltaCycles)) + continue; SU.BotReadyCycle = CurrCycle - DeltaCycles; return true; } @@ -514,10 +641,37 @@ void AIEPostRASchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { if (IsTopNode) { PostGenericScheduler::schedNode(SU, IsTopNode); } else { + AIEHazardRecognizer &HR = *getAIEHazardRecognizer(Bot); int DeltaCycles = int(Bot.getCurrCycle()) - int(SU->BotReadyCycle); assert(DeltaCycles <= 0); + + // Check if an instuction needs to be moved to a different slot. + if (MutateInstruction.find(SU->getInstr()) != MutateInstruction.end()) { + auto [MI, Desc] = MutateInstruction[SU->getInstr()]; + HR.releaseFromScoreboard(*HR.getSelectedAltDescs().getDesc(MI), + HR.getMemoryBanks(MI), MI->operands(), + MI->getMF()->getRegInfo(), DeltaCycles); + // Update the selected opcode for the instruction, refer + // AIEPostRASchedStrategy::canShiftSlot() + HR.getSelectedAltDescs().setAlternateDescriptor(MI, Desc); + } + Bot.bumpNode(SU, DeltaCycles); + + if (MutateInstruction.find(SU->getInstr()) != MutateInstruction.end()) { + auto [MI, Desc] = MutateInstruction[SU->getInstr()]; + assert(HR.getHazardType(*Desc, HR.getMemoryBanks(MI), MI->operands(), + MI->getMF()->getRegInfo(), DeltaCycles) == + ScheduleHazardRecognizer::HazardType::NoHazard); + // Reschedule the instruction with the new opcode. + HR.emitInScoreboard(*Desc, HR.getMemoryBanks(MI), MI->operands(), + MI->getMF()->getRegInfo(), DeltaCycles); + } } + // Clear the MutateInstruction map since after scheduling the instruction the + // validity of mutation map can no longer be guaranteed. + MutateInstruction.clear(); + SU->isScheduled = true; } void AIEPostRASchedStrategy::enterFunction(MachineFunction *MF) { diff --git a/llvm/lib/Target/AIE/AIEMachineScheduler.h b/llvm/lib/Target/AIE/AIEMachineScheduler.h index 43b918e0e27c..b958257aaf8b 100644 --- a/llvm/lib/Target/AIE/AIEMachineScheduler.h +++ b/llvm/lib/Target/AIE/AIEMachineScheduler.h @@ -37,6 +37,7 @@ std::vector computeAndFinalizeBundles(SchedBoundary &Zone); class AIEPostRASchedStrategy : public PostGenericScheduler { /// Maintain the state of interblock/loop-aware scheduling AIE::InterBlockScheduling InterBlock; + MutateInstructionMap MutateInstruction; public: AIEPostRASchedStrategy(const MachineSchedContext *C); @@ -50,6 +51,7 @@ class AIEPostRASchedStrategy : public PostGenericScheduler { SUnit *pickNodeAndCycle(bool &IsTopNode, std::optional &BotEmissionCycle) override; + bool canShiftSlot(SUnit &SU, SchedBoundary &Zone, const int DeltaCycle); bool isAvailableNode(SUnit &SU, SchedBoundary &Zone, bool VerifyReadyCycle) override; diff --git a/llvm/test/CodeGen/AIE/aie2/end-to-end/Add2D-red.ll b/llvm/test/CodeGen/AIE/aie2/end-to-end/Add2D-red.ll index 17dfb2a60671..ed30b233b1a9 100644 --- a/llvm/test/CodeGen/AIE/aie2/end-to-end/Add2D-red.ll +++ b/llvm/test/CodeGen/AIE/aie2/end-to-end/Add2D-red.ll @@ -72,16 +72,13 @@ define void @add2d(ptr noalias %params, ptr noalias %ifm1_data, ptr noalias %ifm ; ASM-NEXT: lda r9, [p5, #0]; paddb [p6], #-56; mov p5, sp ; ASM-NEXT: lda r6, [p6, #0]; paddb [p5], #-80; mov p4, sp ; ASM-NEXT: lda r10, [p5, #0]; paddb [p4], #-60; mov p5, sp -; ASM-NEXT: lda p6, [p4, #0]; paddb [p5], #-84 -; ASM-NEXT: lda r11, [p5, #0]; mov p0, sp -; ASM-NEXT: paddb [p0], #-72; mov p4, sp -; ASM-NEXT: lda p0, [p0, #0]; paddb [p4], #-64; mov p5, sp +; ASM-NEXT: lda p6, [p4, #0]; paddb [p5], #-84; mov p4, sp +; ASM-NEXT: lda r11, [p5, #0]; paddb [p4], #-64; mov p5, sp ; ASM-NEXT: lda p7, [p4, #0]; paddb [p5], #-88; mov p4, sp ; ASM-NEXT: lda r12, [p5, #0]; paddb [p4], #-68; mov p5, sp -; ASM-NEXT: lda p4, [p4, #0]; paddb [p5], #-92 -; ASM-NEXT: lda r13, [p5, #0] -; ASM-NEXT: mova r6, #1; add r7, r1, #-1; mov p5, r6 -; ASM-NEXT: mova r6, #3; ne r3, r3, r6 +; ASM-NEXT: lda p4, [p4, #0]; paddb [p5], #-92; add r7, r1, #-1; mov p0, sp +; ASM-NEXT: lda r13, [p5, #0]; paddb [p0], #-72; movx r6, #1; mov p5, r6 +; ASM-NEXT: lda p0, [p0, #0]; ne r3, r3, r6; mov r6, #3 ; ASM-NEXT: ltu r7, r7, r6 ; ASM-NEXT: jz r7, #.LBB0_2 ; ASM-NEXT: st dn4, [p5, #0]; nez r0, r0 // Delay Slot 5 diff --git a/llvm/test/CodeGen/AIE/aie2/schedule/instruction_mutation/rescheduling.mir b/llvm/test/CodeGen/AIE/aie2/schedule/instruction_mutation/rescheduling.mir new file mode 100644 index 000000000000..a6d2cc1e38e1 --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2/schedule/instruction_mutation/rescheduling.mir @@ -0,0 +1,123 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates +# RUN: llc -march=aie2 -run-pass=postmisched --aie-instruction-mutation=true --verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ON +# RUN: llc -march=aie2 -run-pass=postmisched --aie-instruction-mutation=false --verify-machineinstrs %s -o - | FileCheck %s --check-prefix=OFF + +--- +name: check_rescheduling_1 +alignment: 16 +body: | + bb.0.entry: + ; ON-LABEL: name: check_rescheduling_1 + ; ON: BUNDLE implicit-def $r1, implicit-def $p1, implicit killed $p1 { + ; ON-NEXT: $r1 = LDA_TM killed $p1 :: (load (s32) from custom "TileMemory", addrspace 15) + ; ON-NEXT: $p1 = MOV_mv_cg 1 + ; ON-NEXT: } + ; ON-NEXT: $r2 = LDA_TM killed $p1 :: (load (s32) from custom "TileMemory", addrspace 15) + ; ON-NEXT: NOP + ; ON-NEXT: NOP + ; ON-NEXT: NOP + ; ON-NEXT: NOP + ; ON-NEXT: NOP + ; ON-NEXT: NOP + ; + ; OFF-LABEL: name: check_rescheduling_1 + ; OFF: $r1 = LDA_TM killed $p1 :: (load (s32) from custom "TileMemory", addrspace 15) + ; OFF-NEXT: $p1 = MOVA_lda_cg 1 + ; OFF-NEXT: $r2 = LDA_TM killed $p1 :: (load (s32) from custom "TileMemory", addrspace 15) + ; OFF-NEXT: NOP + ; OFF-NEXT: NOP + ; OFF-NEXT: NOP + ; OFF-NEXT: NOP + ; OFF-NEXT: NOP + ; OFF-NEXT: NOP + $r1 = LDA_TM $p1 :: (load (s32) from custom "TileMemory") + $p1 = MOV_PD_imm10_pseudo 1 + $r2 = LDA_TM $p1 :: (load (s32) from custom "TileMemory") +... + + +--- +name: check_rescheduling_2 +alignment: 16 +body: | + bb.0.entry: + ; ON-LABEL: name: check_rescheduling_2 + ; ON: BUNDLE implicit-def $wh1, implicit-def $p1, implicit killed $p1 { + ; ON-NEXT: $wh1 = VLDA_dmw_lda_w_ag_idx_imm killed $p1, 0 + ; ON-NEXT: $p1 = MOV_mv_cg 1 + ; ON-NEXT: } + ; ON-NEXT: $r2 = LDA_TM killed $p1 :: (load (s32) from custom "TileMemory", addrspace 15) + ; ON-NEXT: NOP + ; ON-NEXT: NOP + ; ON-NEXT: NOP + ; ON-NEXT: NOP + ; ON-NEXT: NOP + ; ON-NEXT: NOP + ; + ; OFF-LABEL: name: check_rescheduling_2 + ; OFF: $wh1 = VLDA_dmw_lda_w_ag_idx_imm killed $p1, 0 + ; OFF-NEXT: $p1 = MOVA_lda_cg 1 + ; OFF-NEXT: $r2 = LDA_TM killed $p1 :: (load (s32) from custom "TileMemory", addrspace 15) + ; OFF-NEXT: NOP + ; OFF-NEXT: NOP + ; OFF-NEXT: NOP + ; OFF-NEXT: NOP + ; OFF-NEXT: NOP + ; OFF-NEXT: NOP + $wh1 = VLDA_dmw_lda_w_ag_idx_imm $p1, 0 + $p1 = MOV_PD_imm10_pseudo 1 + $r2 = LDA_TM $p1 :: (load (s32) from custom "TileMemory") +... + +--- +name: check_rescheduling_3 +alignment: 16 +body: | + bb.0.entry: + ; ON-LABEL: name: check_rescheduling_3 + ; ON: BUNDLE implicit-def $wh1, implicit-def $x7, implicit-def $wl7, implicit-def $wh7, implicit killed $p1, implicit killed $wl7 { + ; ON-NEXT: $wh1 = VLDA_dmw_lda_w_ag_idx_imm killed $p1, 0 + ; ON-NEXT: $x7 = VUNPACK_S8_S4 killed $wl7 + ; ON-NEXT: } + ; ON-NEXT: NOP + ; ON-NEXT: NOP + ; ON-NEXT: NOP + ; ON-NEXT: NOP + ; ON-NEXT: NOP + ; ON-NEXT: NOP + ; + ; OFF-LABEL: name: check_rescheduling_3 + ; OFF: $x7 = VUNPACK_S8_S4 killed $wl7 + ; OFF-NEXT: $wh1 = VLDB_dmw_ldb_ag_idx_imm killed $p1, 0 + ; OFF-NEXT: NOP + ; OFF-NEXT: NOP + ; OFF-NEXT: NOP + ; OFF-NEXT: NOP + ; OFF-NEXT: NOP + ; OFF-NEXT: NOP + $x7 = VUNPACK_S8_S4 $wl7 + $wh1 = VLD_idx_imm_3x32_pseudo $p1, 0 +... + +--- +name: check_rescheduling_4 +alignment: 16 +body: | + bb.0.entry: + ; ON-LABEL: name: check_rescheduling_4 + ; ON: BUNDLE implicit-def $p0, implicit-def $r0 { + ; ON-NEXT: $p0 = MOVA_lda_cg 10 + ; ON-NEXT: $r0 = MOVX_alu_cg 10 + ; ON-NEXT: } + ; + ; OFF-LABEL: name: check_rescheduling_4 + ; OFF: $p0 = MOVA_lda_cg 10 + ; OFF-NEXT: $r0 = MOVA_lda_cg 10 + $p0 = MOVA_lda_cg 10 + $r0 = MOV_RLC_imm10_pseudo 10 +... diff --git a/llvm/test/CodeGen/AIE/aie2/schedule/negative_latencies/load_accumulate.mir b/llvm/test/CodeGen/AIE/aie2/schedule/negative_latencies/load_accumulate.mir index ad76b8843308..2c4bc6617ed9 100644 --- a/llvm/test/CodeGen/AIE/aie2/schedule/negative_latencies/load_accumulate.mir +++ b/llvm/test/CodeGen/AIE/aie2/schedule/negative_latencies/load_accumulate.mir @@ -24,20 +24,26 @@ body: | ; CHECK: liveins: $p0, $p1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $dj0 = MOVA_lda_cg 14 - ; CHECK-NEXT: $r3 = LDA_S16_ag_idx $p0, killed $dj0 - ; CHECK-NEXT: $dj0 = MOVA_lda_cg 16 + ; CHECK-NEXT: BUNDLE implicit-def $r3, implicit-def $dj0, implicit $p0, implicit killed $dj0 { + ; CHECK-NEXT: $r3 = LDA_S16_ag_idx $p0, killed $dj0 + ; CHECK-NEXT: $dj0 = MOV_mv_cg 16 + ; CHECK-NEXT: } ; CHECK-NEXT: $r4 = LDA_S16_ag_idx $p0, killed $dj0 ; CHECK-NEXT: $dj0 = MOVA_lda_cg 18 - ; CHECK-NEXT: $r4 = LDA_S16_ag_idx $p0, killed $dj0 - ; CHECK-NEXT: $dj0 = MOVA_lda_cg 20 - ; CHECK-NEXT: $r4 = LDA_S16_ag_idx $p0, killed $dj0 - ; CHECK-NEXT: $dj0 = MOVA_lda_cg 22 + ; CHECK-NEXT: BUNDLE implicit-def $r4, implicit-def $dj0, implicit $p0, implicit killed $dj0 { + ; CHECK-NEXT: $r4 = LDA_S16_ag_idx $p0, killed $dj0 + ; CHECK-NEXT: $dj0 = MOV_mv_cg 20 + ; CHECK-NEXT: } + ; CHECK-NEXT: BUNDLE implicit-def $r4, implicit-def $dj0, implicit $p0, implicit killed $dj0 { + ; CHECK-NEXT: $r4 = LDA_S16_ag_idx $p0, killed $dj0 + ; CHECK-NEXT: $dj0 = MOV_mv_cg 22 + ; CHECK-NEXT: } ; CHECK-NEXT: $r4 = LDA_S16_ag_idx killed $p0, killed $dj0 ; CHECK-NEXT: NOP - ; CHECK-NEXT: $r3 = ADD killed $r3, $r4, implicit-def $srcarry + ; CHECK-NEXT: NOP ; CHECK-NEXT: $r3 = ADD killed $r3, $r4, implicit-def $srcarry ; CHECK-NEXT: RET implicit $lr - ; CHECK-NEXT: NOP + ; CHECK-NEXT: $r3 = ADD killed $r3, $r4, implicit-def $srcarry ; CHECK-NEXT: $r3 = ADD killed $r3, $r4, implicit-def $srcarry ; CHECK-NEXT: $r3 = ADD killed $r3, killed $r4, implicit-def $srcarry ; CHECK-NEXT: ST_dms_sts_idx_imm killed $r3, killed $p1, 0