From a0a8fc04786a7bc490d4052ebdb61a6c09ebaeb4 Mon Sep 17 00:00:00 2001 From: Krishnam Tibrewala Date: Thu, 17 Oct 2024 23:52:24 -0700 Subject: [PATCH] [AIEX] Extend rescheduling when the new SU is a multi-slot --- llvm/lib/Target/AIE/AIEHazardRecognizer.cpp | 15 +- llvm/lib/Target/AIE/AIEHazardRecognizer.h | 7 + llvm/lib/Target/AIE/AIEMachineScheduler.cpp | 168 +++++++++++------- .../GlobalISel/legalize-dyn-stackalloc.ll | 34 ++-- llvm/test/CodeGen/AIE/aie2/accfloat.ll | 8 +- llvm/test/CodeGen/AIE/aie2/addr_1d2d3d.ll | 10 +- llvm/test/CodeGen/AIE/aie2/aiev2_v2int32.ll | 4 +- llvm/test/CodeGen/AIE/aie2/dyn-stackalloc.ll | 34 ++-- .../CodeGen/AIE/aie2/end-to-end/Add2D-red.ll | 4 +- llvm/test/CodeGen/AIE/aie2/extract.ll | 15 +- .../CodeGen/AIE/aie2/hardware-loops/nested.ll | 4 +- .../AIE/aie2/hardware-loops/sibling.ll | 6 +- .../CodeGen/AIE/aie2/hardware-loops/simple.ll | 5 +- .../AIE/aie2/hardware-loops/unknown-tc.ll | 6 +- .../CodeGen/AIE/aie2/intrinsics-128bit.ll | 33 ++-- .../CodeGen/AIE/aie2/intrinsics-shufflevec.ll | 13 +- llvm/test/CodeGen/AIE/aie2/ld_128.ll | 5 +- llvm/test/CodeGen/AIE/aie2/memcalls.ll | 4 +- llvm/test/CodeGen/AIE/aie2/odd-stackoffset.ll | 11 +- llvm/test/CodeGen/AIE/aie2/set.ll | 15 +- llvm/test/CodeGen/AIE/aie2/vaddmac.ll | 28 +-- llvm/test/CodeGen/AIE/aie2/vextract.ll | 18 +- llvm/test/CodeGen/AIE/aie2/vmac.ll | 35 ++-- llvm/test/CodeGen/AIE/aie2/vmul.ll | 4 +- llvm/test/CodeGen/AIE/aie2/vnegmul.ll | 3 +- llvm/test/CodeGen/AIE/aie2/vst_srs.ll | 36 ++-- llvm/test/CodeGen/AIE/aie2/vsub_acc.ll | 6 +- 27 files changed, 246 insertions(+), 285 deletions(-) diff --git a/llvm/lib/Target/AIE/AIEHazardRecognizer.cpp b/llvm/lib/Target/AIE/AIEHazardRecognizer.cpp index ec7cee3d47b3..8716bc2360a8 100644 --- a/llvm/lib/Target/AIE/AIEHazardRecognizer.cpp +++ b/llvm/lib/Target/AIE/AIEHazardRecognizer.cpp @@ -467,13 +467,26 @@ ScheduleHazardRecognizer::HazardType AIEHazardRecognizer::getHazardType( ConflictTypeBits AIEHazardRecognizer::checkConflict(MachineInstr &MI, int DeltaCycles) { + assert(!TII->getFormatInterface()->getAlternateInstsOpcode(MI.getOpcode())); return checkConflict(Scoreboard, MI, DeltaCycles); } ConflictTypeBits AIEHazardRecognizer::checkConflict( const ResourceScoreboard &Scoreboard, MachineInstr &MI, int DeltaCycles) const { - const MCInstrDesc &Desc = MI.getDesc(); + assert(!TII->getFormatInterface()->getAlternateInstsOpcode(MI.getOpcode())); + return checkConflict(Scoreboard, MI, MI.getDesc(), DeltaCycles); +} + +ConflictTypeBits AIEHazardRecognizer::checkConflict(MachineInstr &MI, + const MCInstrDesc &Desc, + int DeltaCycles) { + return checkConflict(Scoreboard, MI, Desc, DeltaCycles); +} + +ConflictTypeBits AIEHazardRecognizer::checkConflict( + const ResourceScoreboard &Scoreboard, MachineInstr &MI, + const MCInstrDesc &Desc, int DeltaCycles) const { const unsigned SchedClass = TII->getSchedClass(Desc, MI.operands(), MI.getMF()->getRegInfo()); const MemoryBankBits MemoryBanks = getMemoryBanks(&MI); diff --git a/llvm/lib/Target/AIE/AIEHazardRecognizer.h b/llvm/lib/Target/AIE/AIEHazardRecognizer.h index 92b4f8c70a7e..ed225301264c 100644 --- a/llvm/lib/Target/AIE/AIEHazardRecognizer.h +++ b/llvm/lib/Target/AIE/AIEHazardRecognizer.h @@ -215,6 +215,13 @@ class AIEHazardRecognizer : public ScheduleHazardRecognizer { iterator_range MIOperands, const MachineRegisterInfo &MRI, int DeltaCycles); + ConflictTypeBits + checkConflict(const ResourceScoreboard &Scoreboard, + MachineInstr &MI, const MCInstrDesc &Desc, + int DeltaCycles) const; + ConflictTypeBits checkConflict(MachineInstr &MI, const MCInstrDesc &Desc, + int DeltaCycles); + ConflictTypeBits checkConflict(const ResourceScoreboard &Scoreboard, MachineInstr &MI, int DeltaCycles) const; diff --git a/llvm/lib/Target/AIE/AIEMachineScheduler.cpp b/llvm/lib/Target/AIE/AIEMachineScheduler.cpp index 2b85de4b26a9..7e89595cdc81 100644 --- a/llvm/lib/Target/AIE/AIEMachineScheduler.cpp +++ b/llvm/lib/Target/AIE/AIEMachineScheduler.cpp @@ -507,16 +507,12 @@ bool AIEPostRASchedStrategy::canShiftSlot(SUnit &SU, SchedBoundary &Zone, const AIEBaseMCFormats &Formats = *getTII(*Zone.DAG)->getFormatInterface(); AIEHazardRecognizer &HR = *getAIEHazardRecognizer(Zone); + MachineInstr *NewMI = SU.getInstr(); + std::vector ScheduledMultiSlotInsts; bool CanShiftSlot = false; - if (!(!Formats.getAlternateInstsOpcode(SU.getInstr()->getOpcode()) && - (HR.checkConflict(*SU.getInstr(), DeltaCycle) & - static_cast(AIEHazardRecognizer::ConflictType::Format)))) { - // We are only interested in single slot instructions and instructions that - // have only format hazard. - // TODO : Extend this to SUs that are multi-slot and have only format hazard - return false; - } + // Find and cache if there are any multi-slot instructions scheduled in the + // same delta cycle for (MachineInstr &MI : *Zone.DAG) { SUnit *ZoneSU = Zone.DAG->getSUnit(&MI); if (!ZoneSU) @@ -529,76 +525,114 @@ bool AIEPostRASchedStrategy::canShiftSlot(SUnit &SU, SchedBoundary &Zone, static_cast(CurrCycle - DeltaCycle)) continue; - // Check for a MultiSlot instruction scheduled in the same DeltaCycle, we - // focus on multi-slot because they can be scheduled in different slots + // Check for a MultiSlot instruction scheduled in the same DeltaCycle, + // we focus on multi-slot because they can be scheduled in different + // slots auto AltOpcodes = Formats.getAlternateInstsOpcode(MI.getOpcode()); if (!AltOpcodes) continue; + ScheduledMultiSlotInsts.push_back(&MI); + } - // Check if the scheduled multi-slot instruction has a slot conflict - // with the new instruction, if so we might have the possiblity to shift - // the multi-slot and schedule the new instruction. - if (!checkSlotConflict(HR.getSelectedAltDescs().getOpcode(&MI), - SU.getInstr()->getOpcode(), Formats)) - continue; + // If there are no multi-slot instructions scheduled in the same DeltaCycle we + // cannot shift any instuction to a different slot. + if (ScheduledMultiSlotInsts.empty()) + return false; - // Release the multi-slot instruction from the scoreboard to check if any - // other alternate opcode in presence of the new instruction will not create - // a hazard. - HR.releaseFromScoreboard(*HR.getSelectedAltDescs().getDesc(&MI), - HR.getMemoryBanks(&MI), MI.operands(), - MI.getMF()->getRegInfo(), - CurrCycle - ZoneSU->BotReadyCycle); - - MachineInstr *NewMI = SU.getInstr(); - // Check if the new instuction can be scheduled after unscheduling - // the conflicting multi-slot instruction. - if (HR.getHazardType(NewMI->getDesc(), HR.getMemoryBanks(NewMI), - NewMI->operands(), NewMI->getMF()->getRegInfo(), - DeltaCycle) != - ScheduleHazardRecognizer::HazardType::NoHazard) { - // If the new instruction cannot be scheduled after unscheduling the - // mulit-slot revert back the state of scoreboard to original state and - // continue. - HR.emitInScoreboard(*HR.getSelectedAltDescs().getDesc(&MI), - HR.getMemoryBanks(&MI), MI.operands(), - MI.getMF()->getRegInfo(), - CurrCycle - ZoneSU->BotReadyCycle); + const std::vector *AlternateOpcodes; + auto DefaultOpcode = std::vector{SU.getInstr()->getOpcode()}; + AlternateOpcodes = + Formats.getAlternateInstsOpcode(SU.getInstr()->getOpcode()) + ? Formats.getAlternateInstsOpcode(SU.getInstr()->getOpcode()) + : &DefaultOpcode; + + for (const unsigned int NewMIAltOpcode : *AlternateOpcodes) { + const MCInstrDesc &NewMIAltDesc = getTII(*Zone.DAG)->get(NewMIAltOpcode); + if (!(HR.checkConflict(*NewMI, NewMIAltDesc, DeltaCycle) & + static_cast(AIEHazardRecognizer::ConflictType::Format))) continue; - } - // Emit the new instruction in the scoreboard. This will help us - // to check if the previously unscheduled multi-slot instruction - // can be scheduled in the same cycle, with an alternate opcode. - HR.emitInScoreboard(NewMI->getDesc(), HR.getMemoryBanks(NewMI), - NewMI->operands(), NewMI->getMF()->getRegInfo(), - DeltaCycle); - - // Check if the previously unscheduled multi-slot instruction - // can be rescheduled in presense of the new instruction in the - // same cycle, with a different opcode. - for (const auto AltOpcodeInside : *AltOpcodes) { - const MCInstrDesc &Desc = getTII(*Zone.DAG)->get(AltOpcodeInside); - if (HR.getHazardType(Desc, HR.getMemoryBanks(&MI), MI.operands(), - MI.getMF()->getRegInfo(), DeltaCycle) == + for (MachineInstr *MI : ScheduledMultiSlotInsts) { + SUnit *ZoneSU = Zone.DAG->getSUnit(MI); + const int CurrCycle = Zone.getCurrCycle(); + auto AltOpcodes = Formats.getAlternateInstsOpcode(MI->getOpcode()); + + // Check if the scheduled multi-slot instruction has a slot conflict + // with the new instruction, if so we might have the possiblity to shift + // the multi-slot and schedule the new instruction. + if (!checkSlotConflict(HR.getSelectedAltDescs().getOpcode(MI), + NewMIAltOpcode, Formats)) + continue; + + // Release the multi-slot instruction from the scoreboard to check if + // any other alternate opcode in presence of the new instruction will + // not create a hazard. + HR.releaseFromScoreboard(*HR.getSelectedAltDescs().getDesc(MI), + HR.getMemoryBanks(MI), MI->operands(), + MI->getMF()->getRegInfo(), + CurrCycle - ZoneSU->BotReadyCycle); + + // Check if the new instuction can be scheduled after unscheduling + // the conflicting multi-slot instruction. + if (HR.getHazardType(NewMIAltDesc, HR.getMemoryBanks(NewMI), + NewMI->operands(), NewMI->getMF()->getRegInfo(), + DeltaCycle) != ScheduleHazardRecognizer::HazardType::NoHazard) { - // Cache the information to mutate the instruction during bumpNode() - MutateInstruction.insert( - std::make_pair(NewMI, std::make_pair(&MI, &Desc))); - CanShiftSlot = true; - break; + // If the new instruction cannot be scheduled after unscheduling the + // mulit-slot revert back the state of scoreboard to original state + // and continue. + + HR.emitInScoreboard(*HR.getSelectedAltDescs().getDesc(MI), + HR.getMemoryBanks(MI), MI->operands(), + MI->getMF()->getRegInfo(), + CurrCycle - ZoneSU->BotReadyCycle); + continue; } - } - // Revert back the state of scoreboard to original state. - HR.releaseFromScoreboard(NewMI->getDesc(), HR.getMemoryBanks(NewMI), - NewMI->operands(), NewMI->getMF()->getRegInfo(), - DeltaCycle); - HR.emitInScoreboard(*HR.getSelectedAltDescs().getDesc(&MI), - HR.getMemoryBanks(&MI), MI.operands(), - MI.getMF()->getRegInfo(), - CurrCycle - ZoneSU->BotReadyCycle); + // Emit the new instruction in the scoreboard. This will help us + // to check if the previously unscheduled multi-slot instruction + // can be scheduled in the same cycle, with an alternate opcode. + HR.emitInScoreboard(NewMIAltDesc, HR.getMemoryBanks(NewMI), + NewMI->operands(), NewMI->getMF()->getRegInfo(), + DeltaCycle); + + // Check if the previously unscheduled multi-slot instruction + // can be rescheduled in presense of the new instruction in the + // same cycle, with a different opcode. + for (const auto AltOpcodeInside : *AltOpcodes) { + const MCInstrDesc &Desc = getTII(*Zone.DAG)->get(AltOpcodeInside); + if (HR.getHazardType(Desc, HR.getMemoryBanks(MI), MI->operands(), + MI->getMF()->getRegInfo(), DeltaCycle) == + ScheduleHazardRecognizer::HazardType::NoHazard) { + // Cache the information to mutate the instruction during bumpNode() + MutateInstruction.insert( + std::make_pair(NewMI, std::make_pair(MI, &Desc))); + + // if the new instuction was a multi-slot instuction and it failed the + // general check for isAvailabeNode() this means we have not set the + // selected opcode for the instruction. Set the selected opcode for + // the instruction. + if (AlternateOpcodes->size() > 1) + HR.getSelectedAltDescs().setAlternateDescriptor(NewMI, + &NewMIAltDesc); + + CanShiftSlot = true; + break; + } + } + + // Revert back the state of scoreboard to original state. + HR.releaseFromScoreboard(NewMIAltDesc, HR.getMemoryBanks(NewMI), + NewMI->operands(), NewMI->getMF()->getRegInfo(), + DeltaCycle); + HR.emitInScoreboard(*HR.getSelectedAltDescs().getDesc(MI), + HR.getMemoryBanks(MI), MI->operands(), + MI->getMF()->getRegInfo(), + CurrCycle - ZoneSU->BotReadyCycle); + if (CanShiftSlot) + break; + } if (CanShiftSlot) break; } diff --git a/llvm/test/CodeGen/AIE/aie2/GlobalISel/legalize-dyn-stackalloc.ll b/llvm/test/CodeGen/AIE/aie2/GlobalISel/legalize-dyn-stackalloc.ll index 2915bbb173ef..ac0d01a076bd 100644 --- a/llvm/test/CodeGen/AIE/aie2/GlobalISel/legalize-dyn-stackalloc.ll +++ b/llvm/test/CodeGen/AIE/aie2/GlobalISel/legalize-dyn-stackalloc.ll @@ -12,15 +12,13 @@ define void @test_simple_dyn_alloca(i32 noundef %n) { ; CHECK-LABEL: test_simple_dyn_alloca: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: paddb [sp], #32; nopa ; nops ; nopxm ; nopv -; CHECK-NEXT: mova r1, #2; nopx +; CHECK-NEXT: padda [sp], #32; nopb ; movx r1, #2 +; CHECK-NEXT: lshl r0, r0, r1 ; CHECK-NEXT: st p7, [sp, #-32] // 4-byte Folded Spill ; CHECK-NEXT: mov p7, sp ; CHECK-NEXT: mov p1, sp -; CHECK-NEXT: lshl r0, r0, r1 -; CHECK-NEXT: mova r1, #-32 ; CHECK-NEXT: st lr, [sp, #-28] // 4-byte Folded Spill -; CHECK-NEXT: padda [p7], #-32 +; CHECK-NEXT: mova r1, #-32; padds [p7], #-32 ; CHECK-NEXT: add r0, r0, #31 ; CHECK-NEXT: jl #extern_call ; CHECK-NEXT: mov p0, p1 // Delay Slot 5 @@ -53,24 +51,19 @@ define void @test_loop_dyn_alloca(i32 noundef %n) { ; CHECK-LABEL: test_loop_dyn_alloca: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: nopa ; paddb [sp], #64; nopxm -; CHECK-NEXT: st p7, [sp, #-64] // 4-byte Folded Spill -; CHECK-NEXT: mov p7, sp -; CHECK-NEXT: st r16, [sp, #-36] // 4-byte Folded Spill -; CHECK-NEXT: mova r16, #1 +; CHECK-NEXT: paddb [sp], #64; nopa ; nops ; nopxm ; nopv +; CHECK-NEXT: st r16, [sp, #-36]; nopx // 4-byte Folded Spill ; CHECK-NEXT: st r17, [sp, #-40] // 4-byte Folded Spill -; CHECK-NEXT: mova r17, #0 ; CHECK-NEXT: st r18, [sp, #-44] // 4-byte Folded Spill -; CHECK-NEXT: mova r18, #10 +; CHECK-NEXT: mova r16, #1; movx r18, #10; mov r17, #0 ; CHECK-NEXT: st r19, [sp, #-48] // 4-byte Folded Spill -; CHECK-NEXT: mova r19, #2 ; CHECK-NEXT: st r20, [sp, #-52] // 4-byte Folded Spill -; CHECK-NEXT: mova r20, #-32 ; CHECK-NEXT: st r21, [sp, #-56] // 4-byte Folded Spill -; CHECK-NEXT: mova r21, #0 +; CHECK-NEXT: st p7, [sp, #-64] // 4-byte Folded Spill +; CHECK-NEXT: mov p7, sp ; CHECK-NEXT: st lr, [sp, #-32] // 4-byte Folded Spill ; CHECK-NEXT: st p6, [sp, #-60] // 4-byte Folded Spill -; CHECK-NEXT: padda [p7], #-64 +; CHECK-NEXT: mova r19, #2; padds [p7], #-64; movx r21, #0; mov r20, #-32 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB1_1: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 @@ -133,11 +126,9 @@ define void @test_huge_stack(i32 noundef %n) #0 { ; CHECK-LABEL: test_huge_stack: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: nopa ; paddb [sp], #40064; nopx +; CHECK-NEXT: nopa ; paddb [sp], #40064; nopxm ; CHECK-NEXT: movxm m0, #-40064 -; CHECK-NEXT: mova r1, #0 -; CHECK-NEXT: mova r2, #2 -; CHECK-NEXT: mova r3, #-32 +; CHECK-NEXT: mova r1, #0; movx r3, #-32; mov r2, #2 ; CHECK-NEXT: st p7, [sp, #-40064] // 4-byte Folded Spill ; CHECK-NEXT: mov p7, sp ; CHECK-NEXT: mov p1, sp @@ -150,11 +141,10 @@ define void @test_huge_stack(i32 noundef %n) #0 { ; CHECK-NEXT: mov p2, p7 ; CHECK-NEXT: mov p6, p7 ; CHECK-NEXT: paddb [p0], m0 -; CHECK-NEXT: paddb [p6], #-32 ; CHECK-NEXT: movxm m0, #-40032 ; CHECK-NEXT: st r0, [p0, #0] ; CHECK-NEXT: lda r0, [p0, #0] -; CHECK-NEXT: paddb [p2], m0 +; CHECK-NEXT: paddb [p6], #-32; padds [p2], m0 ; CHECK-NEXT: mov p0, sp ; CHECK-NEXT: mov r16, p2 ; CHECK-NEXT: st p0, [p6, #0] diff --git a/llvm/test/CodeGen/AIE/aie2/accfloat.ll b/llvm/test/CodeGen/AIE/aie2/accfloat.ll index 789f45baac0b..06c05bc988ea 100644 --- a/llvm/test/CodeGen/AIE/aie2/accfloat.ll +++ b/llvm/test/CodeGen/AIE/aie2/accfloat.ll @@ -11,9 +11,7 @@ define dso_local noundef <8 x i64> @test_add_conf(<8 x i64> noundef %acc1, <8 x ; CHECK-LABEL: test_add_conf: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: nopb ; mova r3, #12; nops ; nopxm ; nopv -; CHECK-NEXT: mova r4, #13; nopx -; CHECK-NEXT: mova r5, #28 +; CHECK-NEXT: mova r3, #12; movx r5, #28; mov r4, #13 ; CHECK-NEXT: lshl r1, r1, r3 ; CHECK-NEXT: lshl r2, r2, r4 ; CHECK-NEXT: or r0, r1, r0 @@ -41,9 +39,7 @@ define dso_local noundef <8 x i64> @test_sub_conf(<8 x i64> noundef %acc1, <8 x ; CHECK-LABEL: test_sub_conf: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: nopb ; mova r3, #12; nops ; nopxm ; nopv -; CHECK-NEXT: mova r4, #13; nopx -; CHECK-NEXT: mova r5, #28 +; CHECK-NEXT: mova r3, #12; movx r5, #28; mov r4, #13 ; CHECK-NEXT: lshl r1, r1, r3 ; CHECK-NEXT: lshl r2, r2, r4 ; CHECK-NEXT: or r0, r1, r0 diff --git a/llvm/test/CodeGen/AIE/aie2/addr_1d2d3d.ll b/llvm/test/CodeGen/AIE/aie2/addr_1d2d3d.ll index d39b5d32afe6..7cffd7efb679 100644 --- a/llvm/test/CodeGen/AIE/aie2/addr_1d2d3d.ll +++ b/llvm/test/CodeGen/AIE/aie2/addr_1d2d3d.ll @@ -170,9 +170,8 @@ define dso_local ptr @test_add_2d_ptr_backTOback_call(ptr %a, i32 noundef %off, ; CHECK-LABEL: test_add_2d_ptr_backTOback_call: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: nopx ; mov p0, p1 -; CHECK-NEXT: mova r3, #6 -; CHECK-NEXT: mova dc0, #0 +; CHECK-NEXT: nopa ; nopx ; mov p0, p1 +; CHECK-NEXT: mova r3, #6; mov dc0, #0 ; CHECK-NEXT: mov dn0, r1 ; CHECK-NEXT: lshl r0, r0, r3 ; CHECK-NEXT: ret lr @@ -200,9 +199,8 @@ define dso_local ptr @test_add_3d_ptr_backTOback_call(ptr %a, i32 noundef %off, ; CHECK-LABEL: test_add_3d_ptr_backTOback_call: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: nopx ; mov p0, p1 -; CHECK-NEXT: mova r5, #6 -; CHECK-NEXT: mova dc0, #0 +; CHECK-NEXT: nopa ; nopx ; mov p0, p1 +; CHECK-NEXT: mova r5, #6; mov dc0, #0 ; CHECK-NEXT: mov dn0, r1 ; CHECK-NEXT: mov dn4, r3 ; CHECK-NEXT: lshl r0, r0, r5 diff --git a/llvm/test/CodeGen/AIE/aie2/aiev2_v2int32.ll b/llvm/test/CodeGen/AIE/aie2/aiev2_v2int32.ll index 81cfb1e47739..037a5157b8a2 100644 --- a/llvm/test/CodeGen/AIE/aie2/aiev2_v2int32.ll +++ b/llvm/test/CodeGen/AIE/aie2/aiev2_v2int32.ll @@ -29,8 +29,8 @@ define dso_local noundef i64 @_Z14return_v2int32v() local_unnamed_addr #0 { ; CHECK-NEXT: nopx // Delay Slot 5 ; CHECK-NEXT: nop // Delay Slot 4 ; CHECK-NEXT: nop // Delay Slot 3 -; CHECK-NEXT: mova r0, #100 // Delay Slot 2 -; CHECK-NEXT: mova r1, #0 // Delay Slot 1 +; CHECK-NEXT: nop // Delay Slot 2 +; CHECK-NEXT: mova r0, #100; movx r1, #0 // Delay Slot 1 entry: ret i64 100 } diff --git a/llvm/test/CodeGen/AIE/aie2/dyn-stackalloc.ll b/llvm/test/CodeGen/AIE/aie2/dyn-stackalloc.ll index 337fecd1e4bd..ab01f8ad3348 100644 --- a/llvm/test/CodeGen/AIE/aie2/dyn-stackalloc.ll +++ b/llvm/test/CodeGen/AIE/aie2/dyn-stackalloc.ll @@ -12,15 +12,13 @@ define void @test_simple_dyn_alloca(i32 noundef %n) { ; CHECK-LABEL: test_simple_dyn_alloca: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: paddb [sp], #32; nopa ; nops ; nopxm ; nopv -; CHECK-NEXT: mova r1, #2; nopx +; CHECK-NEXT: padda [sp], #32; nopb ; movx r1, #2 +; CHECK-NEXT: lshl r0, r0, r1 ; CHECK-NEXT: st p7, [sp, #-32] // 4-byte Folded Spill ; CHECK-NEXT: mov p7, sp ; CHECK-NEXT: mov p1, sp -; CHECK-NEXT: lshl r0, r0, r1 -; CHECK-NEXT: mova r1, #-32 ; CHECK-NEXT: st lr, [sp, #-28] // 4-byte Folded Spill -; CHECK-NEXT: padda [p7], #-32 +; CHECK-NEXT: mova r1, #-32; padds [p7], #-32 ; CHECK-NEXT: add r0, r0, #31 ; CHECK-NEXT: jl #extern_call ; CHECK-NEXT: mov p0, p1 // Delay Slot 5 @@ -53,24 +51,19 @@ define void @test_loop_dyn_alloca(i32 noundef %n) { ; CHECK-LABEL: test_loop_dyn_alloca: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: nopa ; paddb [sp], #64; nopxm -; CHECK-NEXT: st p7, [sp, #-64] // 4-byte Folded Spill -; CHECK-NEXT: mov p7, sp -; CHECK-NEXT: st r16, [sp, #-36] // 4-byte Folded Spill -; CHECK-NEXT: mova r16, #1 +; CHECK-NEXT: paddb [sp], #64; nopa ; nops ; nopxm ; nopv +; CHECK-NEXT: st r16, [sp, #-36]; nopx // 4-byte Folded Spill ; CHECK-NEXT: st r17, [sp, #-40] // 4-byte Folded Spill -; CHECK-NEXT: mova r17, #0 ; CHECK-NEXT: st r18, [sp, #-44] // 4-byte Folded Spill -; CHECK-NEXT: mova r18, #10 +; CHECK-NEXT: mova r16, #1; movx r18, #10; mov r17, #0 ; CHECK-NEXT: st r19, [sp, #-48] // 4-byte Folded Spill -; CHECK-NEXT: mova r19, #2 ; CHECK-NEXT: st r20, [sp, #-52] // 4-byte Folded Spill -; CHECK-NEXT: mova r20, #-32 ; CHECK-NEXT: st r21, [sp, #-56] // 4-byte Folded Spill -; CHECK-NEXT: mova r21, #0 +; CHECK-NEXT: st p7, [sp, #-64] // 4-byte Folded Spill +; CHECK-NEXT: mov p7, sp ; CHECK-NEXT: st lr, [sp, #-32] // 4-byte Folded Spill ; CHECK-NEXT: st p6, [sp, #-60] // 4-byte Folded Spill -; CHECK-NEXT: padda [p7], #-64 +; CHECK-NEXT: mova r19, #2; padds [p7], #-64; movx r21, #0; mov r20, #-32 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB1_1: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 @@ -133,11 +126,9 @@ define void @test_huge_stack(i32 noundef %n) #0 { ; CHECK-LABEL: test_huge_stack: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: nopa ; paddb [sp], #40064; nopx +; CHECK-NEXT: nopa ; paddb [sp], #40064; nopxm ; CHECK-NEXT: movxm m0, #-40064 -; CHECK-NEXT: mova r1, #0 -; CHECK-NEXT: mova r2, #2 -; CHECK-NEXT: mova r3, #-32 +; CHECK-NEXT: mova r1, #0; movx r3, #-32; mov r2, #2 ; CHECK-NEXT: st p7, [sp, #-40064] // 4-byte Folded Spill ; CHECK-NEXT: mov p7, sp ; CHECK-NEXT: mov p1, sp @@ -150,11 +141,10 @@ define void @test_huge_stack(i32 noundef %n) #0 { ; CHECK-NEXT: mov p2, p7 ; CHECK-NEXT: mov p6, p7 ; CHECK-NEXT: paddb [p0], m0 -; CHECK-NEXT: paddb [p6], #-32 ; CHECK-NEXT: movxm m0, #-40032 ; CHECK-NEXT: st r0, [p0, #0] ; CHECK-NEXT: lda r0, [p0, #0] -; CHECK-NEXT: paddb [p2], m0 +; CHECK-NEXT: paddb [p6], #-32; padds [p2], m0 ; CHECK-NEXT: mov p0, sp ; CHECK-NEXT: mov r16, p2 ; CHECK-NEXT: st p0, [p6, #0] diff --git a/llvm/test/CodeGen/AIE/aie2/end-to-end/Add2D-red.ll b/llvm/test/CodeGen/AIE/aie2/end-to-end/Add2D-red.ll index ed30b233b1a9..48829480f339 100644 --- a/llvm/test/CodeGen/AIE/aie2/end-to-end/Add2D-red.ll +++ b/llvm/test/CodeGen/AIE/aie2/end-to-end/Add2D-red.ll @@ -84,8 +84,8 @@ define void @add2d(ptr noalias %params, ptr noalias %ifm1_data, ptr noalias %ifm ; ASM-NEXT: st dn4, [p5, #0]; nez r0, r0 // Delay Slot 5 ; ASM-NEXT: st r0, [p6, #0] // Delay Slot 4 ; ASM-NEXT: paddb [p2], m3; st r5, [p7, #0] // Delay Slot 3 -; ASM-NEXT: padda [p1], m2; paddb [p2], m5; and r8, r1, r6; st r3, [p4, #0] // Delay Slot 2 -; ASM-NEXT: mova r6, #0; paddb [p2], m4; st r8, [p0, #0] // Delay Slot 1 +; ASM-NEXT: st r3, [p4, #0]; paddb [p2], m5; and r8, r1, r6 // Delay Slot 2 +; ASM-NEXT: padda [p1], m2; paddb [p2], m4; movx r6, #0; st r8, [p0, #0] // Delay Slot 1 ; ASM-NEXT: // %bb.1: ; ASM-NEXT: nopb ; nopa ; nops ; j #.LBB0_6; nopv ; ASM-NEXT: nopa ; nopx // Delay Slot 5 diff --git a/llvm/test/CodeGen/AIE/aie2/extract.ll b/llvm/test/CodeGen/AIE/aie2/extract.ll index 54630f271bca..f8a6c292fc3b 100644 --- a/llvm/test/CodeGen/AIE/aie2/extract.ll +++ b/llvm/test/CodeGen/AIE/aie2/extract.ll @@ -72,8 +72,7 @@ define dso_local noundef <32 x i8> @_Z30test_extract_v64uint4_256_1024Dv128_DU8_ ; CHECK-LABEL: _Z30test_extract_v64uint4_256_1024Dv128_DU8_i: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: paddb [sp], #160; nopx -; CHECK-NEXT: mova r2, #4 +; CHECK-NEXT: nopa ; paddb [sp], #160; nopx ; CHECK-NEXT: mov r1, r0 ; CHECK-NEXT: st r16, [sp, #-160] // 4-byte Folded Spill ; CHECK-NEXT: vst wl4, [sp, #-128] // 32-byte Folded Spill @@ -82,7 +81,7 @@ define dso_local noundef <32 x i8> @_Z30test_extract_v64uint4_256_1024Dv128_DU8_ ; CHECK-NEXT: vst wl5, [sp, #-64] // 32-byte Folded Spill Delay Slot 4 ; CHECK-NEXT: st lr, [sp, #-156] // 4-byte Folded Spill Delay Slot 3 ; CHECK-NEXT: vst wh5, [sp, #-32] // 32-byte Folded Spill Delay Slot 2 -; CHECK-NEXT: mova r16, #2 // Delay Slot 1 +; CHECK-NEXT: mova r2, #4; movx r16, #2 // Delay Slot 1 ; CHECK-NEXT: nopb ; nopa ; nops ; eq r1, r0, r16; nopm ; nopv ; CHECK-NEXT: jnz r1, #.LBB2_5 ; CHECK-NEXT: nop // Delay Slot 5 @@ -434,8 +433,7 @@ define dso_local noundef <4 x i64> @_Z20test_extract_v8acc32Dv32_u7__acc32i(<16 ; CHECK-LABEL: _Z20test_extract_v8acc32Dv32_u7__acc32i: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: paddb [sp], #160; nopx -; CHECK-NEXT: mova r2, #4 +; CHECK-NEXT: nopa ; paddb [sp], #160; nopx ; CHECK-NEXT: mov r1, r0 ; CHECK-NEXT: st r16, [sp, #-160] // 4-byte Folded Spill ; CHECK-NEXT: vst amll1, [sp, #-128] // 32-byte Folded Spill @@ -444,7 +442,7 @@ define dso_local noundef <4 x i64> @_Z20test_extract_v8acc32Dv32_u7__acc32i(<16 ; CHECK-NEXT: vst amhl1, [sp, #-64] // 32-byte Folded Spill Delay Slot 4 ; CHECK-NEXT: st lr, [sp, #-156] // 4-byte Folded Spill Delay Slot 3 ; CHECK-NEXT: vst amhh1, [sp, #-32] // 32-byte Folded Spill Delay Slot 2 -; CHECK-NEXT: mova r16, #2 // Delay Slot 1 +; CHECK-NEXT: mova r2, #4; movx r16, #2 // Delay Slot 1 ; CHECK-NEXT: nopb ; nopa ; nops ; eq r1, r0, r16; nopm ; nopv ; CHECK-NEXT: jnz r1, #.LBB13_5 ; CHECK-NEXT: nop // Delay Slot 5 @@ -665,8 +663,7 @@ define dso_local noundef <4 x i64> @_Z20test_extract_v4acc64Dv16_u7__acc64i(<16 ; CHECK-LABEL: _Z20test_extract_v4acc64Dv16_u7__acc64i: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: paddb [sp], #160; nopx -; CHECK-NEXT: mova r2, #4 +; CHECK-NEXT: nopa ; paddb [sp], #160; nopx ; CHECK-NEXT: mov r1, r0 ; CHECK-NEXT: st r16, [sp, #-160] // 4-byte Folded Spill ; CHECK-NEXT: vst amll1, [sp, #-128] // 32-byte Folded Spill @@ -675,7 +672,7 @@ define dso_local noundef <4 x i64> @_Z20test_extract_v4acc64Dv16_u7__acc64i(<16 ; CHECK-NEXT: vst amhl1, [sp, #-64] // 32-byte Folded Spill Delay Slot 4 ; CHECK-NEXT: st lr, [sp, #-156] // 4-byte Folded Spill Delay Slot 3 ; CHECK-NEXT: vst amhh1, [sp, #-32] // 32-byte Folded Spill Delay Slot 2 -; CHECK-NEXT: mova r16, #2 // Delay Slot 1 +; CHECK-NEXT: mova r2, #4; movx r16, #2 // Delay Slot 1 ; CHECK-NEXT: nopb ; nopa ; nops ; eq r1, r0, r16; nopm ; nopv ; CHECK-NEXT: jnz r1, #.LBB20_5 ; CHECK-NEXT: nop // Delay Slot 5 diff --git a/llvm/test/CodeGen/AIE/aie2/hardware-loops/nested.ll b/llvm/test/CodeGen/AIE/aie2/hardware-loops/nested.ll index 66977e1274ca..42b7a2cab608 100644 --- a/llvm/test/CodeGen/AIE/aie2/hardware-loops/nested.ll +++ b/llvm/test/CodeGen/AIE/aie2/hardware-loops/nested.ll @@ -20,8 +20,8 @@ define void @nested(ptr nocapture %out, ptr nocapture readonly %in, i32 noundef ; CHECK-NEXT: // %bb.0: // %for.cond3.preheader.lr.ph ; CHECK-NEXT: nopa ; nopb ; j #.LBB0_3 ; CHECK-NEXT: nop // Delay Slot 5 -; CHECK-NEXT: mova r3, #0 // Delay Slot 4 -; CHECK-NEXT: mova r4, #2 // Delay Slot 3 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: mova r3, #0; movx r4, #2 // Delay Slot 3 ; CHECK-NEXT: movxm p2, #.LBB0_1 // Delay Slot 2 ; CHECK-NEXT: lda r2, [p0, #0] // Delay Slot 1 ; CHECK-NEXT: .p2align 4 diff --git a/llvm/test/CodeGen/AIE/aie2/hardware-loops/sibling.ll b/llvm/test/CodeGen/AIE/aie2/hardware-loops/sibling.ll index 5e1501961f7a..74396c4e2ddf 100644 --- a/llvm/test/CodeGen/AIE/aie2/hardware-loops/sibling.ll +++ b/llvm/test/CodeGen/AIE/aie2/hardware-loops/sibling.ll @@ -12,11 +12,9 @@ define void @sibling(ptr nocapture %out, ptr nocapture readonly %in, i32 noundef ; CHECK-LABEL: sibling: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %for.body.lr.ph -; CHECK-NEXT: mova r2, #0; nopxm -; CHECK-NEXT: add.nc r0, r0, #-1 -; CHECK-NEXT: mova r4, #2 +; CHECK-NEXT: nopa ; nopb ; nopx ; add.nc r0, r0, #-1 ; CHECK-NEXT: movxm p2, #.LBB0_1 -; CHECK-NEXT: mova r5, #0 +; CHECK-NEXT: mova r2, #0; movx r5, #0; mov r4, #2 ; CHECK-NEXT: lda r3, [p0, #0] ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_1: // %for.body diff --git a/llvm/test/CodeGen/AIE/aie2/hardware-loops/simple.ll b/llvm/test/CodeGen/AIE/aie2/hardware-loops/simple.ll index 7168d4023e37..fccb281c9c64 100644 --- a/llvm/test/CodeGen/AIE/aie2/hardware-loops/simple.ll +++ b/llvm/test/CodeGen/AIE/aie2/hardware-loops/simple.ll @@ -12,9 +12,8 @@ define void @simple(ptr nocapture %out, ptr nocapture readonly %in, i32 noundef ; CHECK-LABEL: simple: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %for.body.lr.ph -; CHECK-NEXT: mova r2, #0; nopb ; nopxm ; nops -; CHECK-NEXT: add.nc r0, r0, #-1 -; CHECK-NEXT: mova r3, #2 +; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; add.nc r0, r0, #-1; nopv +; CHECK-NEXT: mova r2, #0; movx r3, #2 ; CHECK-NEXT: movxm p2, #.LBB0_1 ; CHECK-NEXT: lda r1, [p0, #0] ; CHECK-NEXT: .p2align 4 diff --git a/llvm/test/CodeGen/AIE/aie2/hardware-loops/unknown-tc.ll b/llvm/test/CodeGen/AIE/aie2/hardware-loops/unknown-tc.ll index 22a430126228..e4cebabe0c0a 100644 --- a/llvm/test/CodeGen/AIE/aie2/hardware-loops/unknown-tc.ll +++ b/llvm/test/CodeGen/AIE/aie2/hardware-loops/unknown-tc.ll @@ -11,8 +11,7 @@ define void @cbz_exit(ptr %in, ptr %res) { ; CHECK-LABEL: cbz_exit: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: mova r0, #-1; nopb ; nopxm -; CHECK-NEXT: mova r1, #2 +; CHECK-NEXT: nopb ; mova r0, #-1; nops ; movx r1, #2; nopm ; nopv ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_1: // %loop ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 @@ -59,8 +58,7 @@ define void @cbnz_exit(ptr %in, ptr %res) { ; CHECK-LABEL: cbnz_exit: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: mova r0, #-1; nopb ; nopxm -; CHECK-NEXT: mova r1, #2 +; CHECK-NEXT: nopb ; mova r0, #-1; nops ; movx r1, #2; nopm ; nopv ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB1_1: // %loop ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 diff --git a/llvm/test/CodeGen/AIE/aie2/intrinsics-128bit.ll b/llvm/test/CodeGen/AIE/aie2/intrinsics-128bit.ll index c112bc527753..d1e4f36100d4 100644 --- a/llvm/test/CodeGen/AIE/aie2/intrinsics-128bit.ll +++ b/llvm/test/CodeGen/AIE/aie2/intrinsics-128bit.ll @@ -32,9 +32,9 @@ define <32 x i16> @test_set_v32int16(i32 noundef %idx, ptr nocapture readonly % ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: vlda.128 wl0, [p0] ; CHECK-NEXT: nop -; CHECK-NEXT: mova r1, #4 +; CHECK-NEXT: nop ; CHECK-NEXT: ret lr -; CHECK-NEXT: mova r2, #64 // Delay Slot 5 +; CHECK-NEXT: mova r1, #4; movx r2, #64 // Delay Slot 5 ; CHECK-NEXT: lshl r0, r0, r1 // Delay Slot 4 ; CHECK-NEXT: sub r0, r2, r0 // Delay Slot 3 ; CHECK-NEXT: vshift x0, x0, x0, r0 // Delay Slot 2 @@ -55,11 +55,9 @@ define <64 x i8> @insert_128_in_512(<64 x i8> noundef %v, i32 noundef %idx, <16 ; CHECK-LABEL: insert_128_in_512: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; mov r5, r16; nopv -; CHECK-NEXT: mova r1, #4 -; CHECK-NEXT: mova r2, #64 -; CHECK-NEXT: mova r3, #2 -; CHECK-NEXT: mova r4, #15 +; CHECK-NEXT: nopb ; mova r1, #4; nops ; nopxm ; nopv +; CHECK-NEXT: nopa ; mov r5, r16 +; CHECK-NEXT: mova r2, #64; movx r4, #15; mov r3, #2 ; CHECK-NEXT: lshl r1, r0, r1 ; CHECK-NEXT: lshl r0, r0, r3 ; CHECK-NEXT: ret lr @@ -87,11 +85,9 @@ define dso_local noundef <32 x i8> @insert_128_in_256(<32 x i8> noundef %v, i32 ; CHECK-LABEL: insert_128_in_256: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; mov r5, r16; nopv -; CHECK-NEXT: mova r1, #4 -; CHECK-NEXT: mova r2, #64 -; CHECK-NEXT: mova r3, #2 -; CHECK-NEXT: mova r4, #15 +; CHECK-NEXT: nopb ; mova r1, #4; nops ; nopxm ; nopv +; CHECK-NEXT: nopa ; mov r5, r16 +; CHECK-NEXT: mova r2, #64; movx r4, #15; mov r3, #2 ; CHECK-NEXT: lshl r1, r0, r1 ; CHECK-NEXT: lshl r0, r0, r3 ; CHECK-NEXT: ret lr @@ -121,15 +117,13 @@ define <64 x i8> @test_concat_4_v32uint4(<16 x i8> noundef %v0, <16 x i8> nounde ; CHECK-LABEL: test_concat_4_v32uint4: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: mova r0, #48; nopx ; CHECK-NEXT: mov r3, r16 ; CHECK-NEXT: mov r4, r17 ; CHECK-NEXT: mov r5, r18 -; CHECK-NEXT: mova r0, #48 -; CHECK-NEXT: mova r1, #32 ; CHECK-NEXT: movxm r16, #3840 -; CHECK-NEXT: mova r2, #16 ; CHECK-NEXT: movxm r17, #61440 -; CHECK-NEXT: mova r18, #15 +; CHECK-NEXT: mova r1, #32; movx r18, #15; mov r2, #16 ; CHECK-NEXT: vshift x0, x0, x4, r0 ; CHECK-NEXT: vshift x4, x0, x6, r1 ; CHECK-NEXT: vsel.32 x0, x0, x4, r16 @@ -164,10 +158,9 @@ define <32 x i8> @test_concat_2_v32uint4(<16 x i8> noundef %v0, <16 x i8> nounde ; CHECK-LABEL: test_concat_2_v32uint4: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: nopx ; mov r1, r16 -; CHECK-NEXT: ret lr -; CHECK-NEXT: mova r0, #48 // Delay Slot 5 -; CHECK-NEXT: mova r16, #15 // Delay Slot 4 +; CHECK-NEXT: nopa ; ret lr ; nopm +; CHECK-NEXT: mov r1, r16 // Delay Slot 5 +; CHECK-NEXT: mova r0, #48; movx r16, #15 // Delay Slot 4 ; CHECK-NEXT: vshift x0, x0, x4, r0 // Delay Slot 3 ; CHECK-NEXT: vsel.32 x0, x0, x2, r16 // Delay Slot 2 ; CHECK-NEXT: mov r16, r1 // Delay Slot 1 diff --git a/llvm/test/CodeGen/AIE/aie2/intrinsics-shufflevec.ll b/llvm/test/CodeGen/AIE/aie2/intrinsics-shufflevec.ll index f61e90620642..2e79655064cc 100644 --- a/llvm/test/CodeGen/AIE/aie2/intrinsics-shufflevec.ll +++ b/llvm/test/CodeGen/AIE/aie2/intrinsics-shufflevec.ll @@ -93,16 +93,13 @@ define <16 x i32> @test_insert_vector(<16 x i32> noundef %a, i32 noundef %idx, < ; CHECK-LABEL: test_insert_vector: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: nopa ; nopb ; nopx ; mov r24, r16; nops +; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; mov r27, r19; nopv +; CHECK-NEXT: nopa ; mov r26, r18 ; CHECK-NEXT: mov r25, r17 -; CHECK-NEXT: mov r26, r18 -; CHECK-NEXT: mov r27, r19 -; CHECK-NEXT: mova r19, #0 -; CHECK-NEXT: mova r18, #1 -; CHECK-NEXT: mova r17, #2 -; CHECK-NEXT: mova r16, #3 +; CHECK-NEXT: mov r24, r16 +; CHECK-NEXT: mova r19, #0; movx r16, #3; mov r18, #1 ; CHECK-NEXT: vextract.s32 r4, x4, r16 -; CHECK-NEXT: mova r16, #4 +; CHECK-NEXT: mova r17, #2; movx r16, #4 ; CHECK-NEXT: vextract.s32 r1, x4, r19 ; CHECK-NEXT: vextract.s32 r2, x4, r18 ; CHECK-NEXT: vextract.s32 r3, x4, r17 diff --git a/llvm/test/CodeGen/AIE/aie2/ld_128.ll b/llvm/test/CodeGen/AIE/aie2/ld_128.ll index 1f0ed5b2f6bf..f09a3eba1284 100644 --- a/llvm/test/CodeGen/AIE/aie2/ld_128.ll +++ b/llvm/test/CodeGen/AIE/aie2/ld_128.ll @@ -73,9 +73,8 @@ define dso_local noundef <8 x i32> @test4() { ; CHECK-LABEL: test4: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: paddb [sp], #32; nopa ; nops ; nopxm ; nopv -; CHECK-NEXT: mova dj0, #512 -; CHECK-NEXT: mov p0, sp +; CHECK-NEXT: nopb ; padda [sp], #32; nops ; movxm dj0, #512; nopv +; CHECK-NEXT: nopx ; mov p0, sp ; CHECK-NEXT: paddb [p0], #-32 ; CHECK-NEXT: vldb.128 wl0, [p0, dj0] ; CHECK-NEXT: ret lr diff --git a/llvm/test/CodeGen/AIE/aie2/memcalls.ll b/llvm/test/CodeGen/AIE/aie2/memcalls.ll index c1021232e7da..3bdc2255106d 100644 --- a/llvm/test/CodeGen/AIE/aie2/memcalls.ll +++ b/llvm/test/CodeGen/AIE/aie2/memcalls.ll @@ -68,10 +68,10 @@ define void @callmemset(ptr %p) { ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: nopa ; nopb ; jl #memset -; CHECK-NEXT: mova r0, #42 // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 5 ; CHECK-NEXT: paddb [sp], #32 // Delay Slot 4 ; CHECK-NEXT: st lr, [sp, #-32] // 4-byte Folded Spill Delay Slot 3 -; CHECK-NEXT: mova r1, #40 // Delay Slot 2 +; CHECK-NEXT: mova r0, #42; movx r1, #40 // Delay Slot 2 ; CHECK-NEXT: mov p1, p0 // Delay Slot 1 ; CHECK-NEXT: lda lr, [sp, #-32] // 4-byte Folded Reload ; CHECK-NEXT: nop diff --git a/llvm/test/CodeGen/AIE/aie2/odd-stackoffset.ll b/llvm/test/CodeGen/AIE/aie2/odd-stackoffset.ll index 4b72acceaabf..d088f9a4839c 100644 --- a/llvm/test/CodeGen/AIE/aie2/odd-stackoffset.ll +++ b/llvm/test/CodeGen/AIE/aie2/odd-stackoffset.ll @@ -14,14 +14,13 @@ define void @f() { ; CHECK-LABEL: f: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: paddb [sp], #32; nopa ; nops ; nopxm ; nopv -; CHECK-NEXT: mova m0, #-27; nopx +; CHECK-NEXT: paddb [sp], #32 ; CHECK-NEXT: jl #f0 -; CHECK-NEXT: mov p0, sp // Delay Slot 5 -; CHECK-NEXT: mov p1, sp // Delay Slot 4 +; CHECK-NEXT: mova m0, #-27 // Delay Slot 5 +; CHECK-NEXT: mov p0, sp // Delay Slot 4 ; CHECK-NEXT: st lr, [sp, #-32] // 4-byte Folded Spill Delay Slot 3 -; CHECK-NEXT: paddb [p0], #-28 // Delay Slot 2 -; CHECK-NEXT: paddb [p1], m0 // Delay Slot 1 +; CHECK-NEXT: mov p1, sp // Delay Slot 2 +; CHECK-NEXT: paddb [p0], #-28; padds [p1], m0 // Delay Slot 1 ; CHECK-NEXT: lda lr, [sp, #-32] // 4-byte Folded Reload ; CHECK-NEXT: nop ; CHECK-NEXT: nop diff --git a/llvm/test/CodeGen/AIE/aie2/set.ll b/llvm/test/CodeGen/AIE/aie2/set.ll index 6cc8405f48f6..1f8f5dd0b5ee 100644 --- a/llvm/test/CodeGen/AIE/aie2/set.ll +++ b/llvm/test/CodeGen/AIE/aie2/set.ll @@ -72,15 +72,14 @@ define dso_local noundef <128 x i8> @_Z27test_set_v256uint4_1024_256iDv32_DU8_(i ; CHECK-LABEL: _Z27test_set_v256uint4_1024_256iDv32_DU8_: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: nopa ; paddb [sp], #160; nopxm ; nops -; CHECK-NEXT: mova r2, #4 +; CHECK-NEXT: paddb [sp], #160; nopa ; nops ; nopxm ; nopv ; CHECK-NEXT: mov r1, r0 ; CHECK-NEXT: jl #__modsi3 ; CHECK-NEXT: vmov wh5, wl0 // Delay Slot 5 ; CHECK-NEXT: st r16, [sp, #-160] // 4-byte Folded Spill Delay Slot 4 ; CHECK-NEXT: st lr, [sp, #-156] // 4-byte Folded Spill Delay Slot 3 ; CHECK-NEXT: vst wh5, [sp, #-32] // 32-byte Folded Spill Delay Slot 2 -; CHECK-NEXT: mova r16, #2 // Delay Slot 1 +; CHECK-NEXT: mova r2, #4; movx r16, #2 // Delay Slot 1 ; CHECK-NEXT: nopb ; nopa ; nops ; eq r1, r0, r16; nopm ; nopv ; CHECK-NEXT: jnz r1, #.LBB2_5 ; CHECK-NEXT: nop // Delay Slot 5 @@ -378,15 +377,14 @@ define dso_local noundef <16 x i64> @_Z17test_set_v32acc32iDv8_u7__acc32(i32 nou ; CHECK-LABEL: _Z17test_set_v32acc32iDv8_u7__acc32: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: nopa ; paddb [sp], #160; nopxm ; nops -; CHECK-NEXT: mova r2, #4 +; CHECK-NEXT: paddb [sp], #160; nopa ; nops ; nopxm ; nopv ; CHECK-NEXT: mov r1, r0 ; CHECK-NEXT: jl #__modsi3 ; CHECK-NEXT: vmov amhl0, amll1 // Delay Slot 5 ; CHECK-NEXT: st r16, [sp, #-160] // 4-byte Folded Spill Delay Slot 4 ; CHECK-NEXT: st lr, [sp, #-156] // 4-byte Folded Spill Delay Slot 3 ; CHECK-NEXT: vst amhl0, [sp, #-64] // 32-byte Folded Spill Delay Slot 2 -; CHECK-NEXT: mova r16, #2 // Delay Slot 1 +; CHECK-NEXT: mova r2, #4; movx r16, #2 // Delay Slot 1 ; CHECK-NEXT: nopb ; nopa ; nops ; eq r1, r0, r16; nopm ; nopv ; CHECK-NEXT: jnz r1, #.LBB10_5 ; CHECK-NEXT: nop // Delay Slot 5 @@ -561,15 +559,14 @@ define dso_local noundef <16 x i64> @_Z17test_set_v16acc64iDv4_u7__acc64(i32 nou ; CHECK-LABEL: _Z17test_set_v16acc64iDv4_u7__acc64: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: nopa ; paddb [sp], #160; nopxm ; nops -; CHECK-NEXT: mova r2, #4 +; CHECK-NEXT: paddb [sp], #160; nopa ; nops ; nopxm ; nopv ; CHECK-NEXT: mov r1, r0 ; CHECK-NEXT: jl #__modsi3 ; CHECK-NEXT: vmov amhl0, amll1 // Delay Slot 5 ; CHECK-NEXT: st r16, [sp, #-160] // 4-byte Folded Spill Delay Slot 4 ; CHECK-NEXT: st lr, [sp, #-156] // 4-byte Folded Spill Delay Slot 3 ; CHECK-NEXT: vst amhl0, [sp, #-64] // 32-byte Folded Spill Delay Slot 2 -; CHECK-NEXT: mova r16, #2 // Delay Slot 1 +; CHECK-NEXT: mova r2, #4; movx r16, #2 // Delay Slot 1 ; CHECK-NEXT: nopb ; nopa ; nops ; eq r1, r0, r16; nopm ; nopv ; CHECK-NEXT: jnz r1, #.LBB14_5 ; CHECK-NEXT: nop // Delay Slot 5 diff --git a/llvm/test/CodeGen/AIE/aie2/vaddmac.ll b/llvm/test/CodeGen/AIE/aie2/vaddmac.ll index ab5a70ae1a34..7078248d85d7 100644 --- a/llvm/test/CodeGen/AIE/aie2/vaddmac.ll +++ b/llvm/test/CodeGen/AIE/aie2/vaddmac.ll @@ -11,8 +11,7 @@ define dso_local noundef <16 x i64> @test_addmac_acc32(<64 x i8> noundef %a, i32 ; CHECK-LABEL: test_addmac_acc32: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: mova r2, #9; nopb ; nopxm -; CHECK-NEXT: mova r3, #8 +; CHECK-NEXT: nopb ; mova r2, #9; nops ; movx r3, #8; nopm ; nopv ; CHECK-NEXT: vmov cm0, cm1 ; CHECK-NEXT: lshl r0, r0, r2 ; CHECK-NEXT: lshl r1, r1, r3 @@ -36,8 +35,7 @@ define dso_local noundef <16 x i64> @test_addmsc_acc32(<64 x i8> noundef %a, i32 ; CHECK-LABEL: test_addmsc_acc32: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: mova r2, #9; nopb ; nopxm -; CHECK-NEXT: mova r3, #8 +; CHECK-NEXT: nopb ; mova r2, #9; nops ; movx r3, #8; nopm ; nopv ; CHECK-NEXT: vmov cm0, cm1 ; CHECK-NEXT: lshl r0, r0, r2 ; CHECK-NEXT: lshl r1, r1, r3 @@ -61,8 +59,7 @@ define dso_local noundef <16 x i64> @test_submac_acc32(<64 x i8> noundef %a, i32 ; CHECK-LABEL: test_submac_acc32: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: mova r2, #9; nopb ; nopxm -; CHECK-NEXT: mova r3, #8 +; CHECK-NEXT: nopb ; mova r2, #9; nops ; movx r3, #8; nopm ; nopv ; CHECK-NEXT: vmov cm0, cm1 ; CHECK-NEXT: lshl r0, r0, r2 ; CHECK-NEXT: lshl r1, r1, r3 @@ -86,8 +83,7 @@ define dso_local noundef <16 x i64> @test_submsc_acc32(<64 x i8> noundef %a, i32 ; CHECK-LABEL: test_submsc_acc32: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: mova r2, #9; nopb ; nopxm -; CHECK-NEXT: mova r3, #8 +; CHECK-NEXT: nopb ; mova r2, #9; nops ; movx r3, #8; nopm ; nopv ; CHECK-NEXT: vmov cm0, cm1 ; CHECK-NEXT: lshl r0, r0, r2 ; CHECK-NEXT: lshl r1, r1, r3 @@ -111,9 +107,7 @@ define dso_local noundef <16 x i64> @test_addmac_acc64(<32 x i16> noundef %a, i3 ; CHECK-LABEL: test_addmac_acc64: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: mova r2, #9 -; CHECK-NEXT: mova r3, #8 -; CHECK-NEXT: mova r4, #18 +; CHECK-NEXT: mova r2, #9; nopb ; movx r4, #18; mov r3, #8 ; CHECK-NEXT: vmov cm0, cm1 ; CHECK-NEXT: lshl r0, r0, r2 ; CHECK-NEXT: lshl r1, r1, r3 @@ -140,9 +134,7 @@ define dso_local noundef <16 x i64> @test_addmsc_acc64(<32 x i16> noundef %a, i3 ; CHECK-LABEL: test_addmsc_acc64: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: mova r2, #9 -; CHECK-NEXT: mova r3, #8 -; CHECK-NEXT: mova r4, #18 +; CHECK-NEXT: mova r2, #9; nopb ; movx r4, #18; mov r3, #8 ; CHECK-NEXT: vmov cm0, cm1 ; CHECK-NEXT: lshl r0, r0, r2 ; CHECK-NEXT: lshl r1, r1, r3 @@ -169,9 +161,7 @@ define dso_local noundef <16 x i64> @test_subadd_acc64(<32 x i16> noundef %a, i3 ; CHECK-LABEL: test_subadd_acc64: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: mova r2, #9 -; CHECK-NEXT: mova r3, #8 -; CHECK-NEXT: mova r4, #18 +; CHECK-NEXT: mova r2, #9; nopb ; movx r4, #18; mov r3, #8 ; CHECK-NEXT: vmov cm0, cm1 ; CHECK-NEXT: lshl r0, r0, r2 ; CHECK-NEXT: lshl r1, r1, r3 @@ -198,9 +188,7 @@ define dso_local noundef <16 x i64> @test_submsc_acc64(<32 x i16> noundef %a, i3 ; CHECK-LABEL: test_submsc_acc64: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: mova r2, #9 -; CHECK-NEXT: mova r3, #8 -; CHECK-NEXT: mova r4, #18 +; CHECK-NEXT: mova r2, #9; nopb ; movx r4, #18; mov r3, #8 ; CHECK-NEXT: vmov cm0, cm1 ; CHECK-NEXT: lshl r0, r0, r2 ; CHECK-NEXT: lshl r1, r1, r3 diff --git a/llvm/test/CodeGen/AIE/aie2/vextract.ll b/llvm/test/CodeGen/AIE/aie2/vextract.ll index c83f999ffa27..e13a9f1ce41a 100644 --- a/llvm/test/CodeGen/AIE/aie2/vextract.ll +++ b/llvm/test/CodeGen/AIE/aie2/vextract.ll @@ -417,16 +417,16 @@ define dso_local noundef signext i8 @_Z5test1Dv128_a(<128 x i8> noundef %vec) { ; CHECK-LABEL: _Z5test1Dv128_a: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: paddb [sp], #32; nopxm +; CHECK-NEXT: nopa ; paddb [sp], #32; nopx ; CHECK-NEXT: mov p0, sp ; CHECK-NEXT: paddb [p0], #-32 ; CHECK-NEXT: lda r0, [p0, #0] ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop +; CHECK-NEXT: nop ; CHECK-NEXT: mov r3, r16 -; CHECK-NEXT: mova r1, #0 -; CHECK-NEXT: movx r2, #64 +; CHECK-NEXT: movx r1, #0; mov r2, #64 ; CHECK-NEXT: lt r27, r0, r2 ; CHECK-NEXT: sel.nez r1, r1, r2, r27 ; CHECK-NEXT: add r24, r27, #-1 @@ -448,16 +448,16 @@ define dso_local noundef signext i16 @_Z5test2Dv64_s(<64 x i16> noundef %vec) { ; CHECK-LABEL: _Z5test2Dv64_s: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: nopa ; paddb [sp], #32; nopx +; CHECK-NEXT: paddb [sp], #32; nopx ; CHECK-NEXT: mov p0, sp ; CHECK-NEXT: paddb [p0], #-32 ; CHECK-NEXT: lda r0, [p0, #0] ; CHECK-NEXT: nop ; CHECK-NEXT: nop +; CHECK-NEXT: nop ; CHECK-NEXT: mov r3, r16 ; CHECK-NEXT: mov r4, r17 -; CHECK-NEXT: mova r1, #0 -; CHECK-NEXT: movx r2, #32 +; CHECK-NEXT: movx r1, #0; mov r2, #32 ; CHECK-NEXT: lt r27, r0, r2 ; CHECK-NEXT: sel.nez r1, r1, r2, r27 ; CHECK-NEXT: add r17, r27, #-1 @@ -479,16 +479,16 @@ define dso_local noundef i32 @_Z5test3Dv32_i(<32 x i32> noundef %vec) { ; CHECK-LABEL: _Z5test3Dv32_i: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: nopa ; paddb [sp], #32; nopx +; CHECK-NEXT: paddb [sp], #32; nopx ; CHECK-NEXT: mov p0, sp ; CHECK-NEXT: paddb [p0], #-32 ; CHECK-NEXT: lda r0, [p0, #0] ; CHECK-NEXT: nop ; CHECK-NEXT: nop +; CHECK-NEXT: nop ; CHECK-NEXT: mov r3, r16 ; CHECK-NEXT: mov r4, r17 -; CHECK-NEXT: mova r1, #0 -; CHECK-NEXT: movx r2, #16 +; CHECK-NEXT: movx r1, #0; mov r2, #16 ; CHECK-NEXT: lt r27, r0, r2 ; CHECK-NEXT: sel.nez r1, r1, r2, r27 ; CHECK-NEXT: add r17, r27, #-1 diff --git a/llvm/test/CodeGen/AIE/aie2/vmac.ll b/llvm/test/CodeGen/AIE/aie2/vmac.ll index 8e73066effd2..91f9efb4eedc 100644 --- a/llvm/test/CodeGen/AIE/aie2/vmac.ll +++ b/llvm/test/CodeGen/AIE/aie2/vmac.ll @@ -11,12 +11,8 @@ define <16 x i64> @_Z21test_mac_4x2_2x4_confiiiiii(i32 noundef %sgn_x, i32 noun ; CHECK-LABEL: _Z21test_mac_4x2_2x4_confiiiiii: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: nopb ; mova r6, #10; nops ; nopxm ; nopv -; CHECK-NEXT: mova r7, #11 -; CHECK-NEXT: mova r8, #12 -; CHECK-NEXT: mova r9, #9 -; CHECK-NEXT: mova r10, #8 -; CHECK-NEXT: mova r11, #2 +; CHECK-NEXT: mova r6, #10; movx r8, #12; mov r7, #11 +; CHECK-NEXT: mova r9, #9; movx r11, #2; mov r10, #8 ; CHECK-NEXT: lshl r3, r3, r6 ; CHECK-NEXT: lshl r4, r4, r7 ; CHECK-NEXT: lshl r5, r5, r8 @@ -96,9 +92,8 @@ define <16 x i64> @_Z21test_negmac_4x16_16x8Dv64_hiDv16_jiDv16_u7__acc64(<64 x ; CHECK-LABEL: _Z21test_negmac_4x16_16x8Dv64_hiDv16_jiDv16_u7__acc64: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: nopb ; mova r2, #9; nops ; nopxm ; nopv -; CHECK-NEXT: mova r3, #8 -; CHECK-NEXT: lshl r0, r0, r2 +; CHECK-NEXT: nopb ; mova r2, #9; nops ; movx r3, #8; nopm ; nopv +; CHECK-NEXT: nopa ; nopb ; lshl r0, r0, r2 ; CHECK-NEXT: lshl r1, r1, r3 ; CHECK-NEXT: or r0, r1, r0 ; CHECK-NEXT: ret lr @@ -120,9 +115,7 @@ define <16 x i64> @_Z24test_negmac_4x8_8x4_confDv32_sDv64_aDv16_u7__acc64iii(<3 ; CHECK-LABEL: _Z24test_negmac_4x8_8x4_confDv32_sDv64_aDv16_u7__acc64iii: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: mova r3, #11 -; CHECK-NEXT: mova r4, #12 -; CHECK-NEXT: mova r5, #818 +; CHECK-NEXT: mova r3, #11; nopb ; movx r5, #818; mov r4, #12 ; CHECK-NEXT: lshl r1, r1, r3 ; CHECK-NEXT: lshl r2, r2, r4 ; CHECK-NEXT: or r0, r1, r0 @@ -151,9 +144,7 @@ define <16 x i64> @_Z19test_negmac_2x4_4x8Dv32_tiDv32_siDv16_u7__acc64(<32 x i1 ; CHECK-LABEL: _Z19test_negmac_2x4_4x8Dv32_tiDv32_siDv16_u7__acc64: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: mova r2, #9; nopb ; nopx -; CHECK-NEXT: mova r3, #8 -; CHECK-NEXT: mova r4, #26 +; CHECK-NEXT: nopb ; mova r2, #9; nops ; movx r4, #26; mov r3, #8; nopv ; CHECK-NEXT: lshl r0, r0, r2 ; CHECK-NEXT: lshl r1, r1, r3 ; CHECK-NEXT: or r0, r0, r1 @@ -215,12 +206,8 @@ define <16 x i64> @_Z23test_msc_elem_16_2_confDv32_tiS_iDv16_u7__acc64iiii(<32 ; CHECK-LABEL: _Z23test_msc_elem_16_2_confDv32_tiS_iDv16_u7__acc64iiii: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: nopb ; mova r6, #10; nops ; nopxm ; nopv -; CHECK-NEXT: mova r7, #11 -; CHECK-NEXT: mova r8, #12 -; CHECK-NEXT: mova r9, #9 -; CHECK-NEXT: mova r10, #8 -; CHECK-NEXT: mova r11, #90 +; CHECK-NEXT: mova r6, #10; movx r8, #12; mov r7, #11 +; CHECK-NEXT: mova r9, #9; movx r11, #90; mov r10, #8 ; CHECK-NEXT: lshl r3, r3, r6 ; CHECK-NEXT: lshl r4, r4, r7 ; CHECK-NEXT: lshl r5, r5, r8 @@ -261,10 +248,8 @@ define <16 x i64> @_Z21test_msc_4x4_4x4_confDv32_tDv32_sDv16_u7__acc64iiii(<32 ; CHECK-LABEL: _Z21test_msc_4x4_4x4_confDv32_tDv32_sDv16_u7__acc64iiii: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: mova r4, #10; nopb ; nopx -; CHECK-NEXT: mova r5, #11 -; CHECK-NEXT: mova r6, #12 -; CHECK-NEXT: mova r7, #314 +; CHECK-NEXT: mova r4, #10; nopxm +; CHECK-NEXT: mova r5, #11; movx r7, #314; mov r6, #12 ; CHECK-NEXT: lshl r1, r1, r4 ; CHECK-NEXT: lshl r2, r2, r5 ; CHECK-NEXT: lshl r3, r3, r6 diff --git a/llvm/test/CodeGen/AIE/aie2/vmul.ll b/llvm/test/CodeGen/AIE/aie2/vmul.ll index bc81b233615d..acdcd5607739 100644 --- a/llvm/test/CodeGen/AIE/aie2/vmul.ll +++ b/llvm/test/CodeGen/AIE/aie2/vmul.ll @@ -85,9 +85,7 @@ define <16 x i64> @_Z18test_mul_elem_32_2iDv64_hi(i32 noundef %sgn_x, <64 x i8> ; CHECK-LABEL: _Z18test_mul_elem_32_2iDv64_hi: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: mova r2, #9; nopb ; nopx -; CHECK-NEXT: mova r3, #8 -; CHECK-NEXT: mova r4, #40 +; CHECK-NEXT: nopb ; mova r2, #9; nops ; movx r4, #40; mov r3, #8; nopv ; CHECK-NEXT: lshl r0, r0, r2 ; CHECK-NEXT: lshl r1, r1, r3 ; CHECK-NEXT: or r0, r0, r1 diff --git a/llvm/test/CodeGen/AIE/aie2/vnegmul.ll b/llvm/test/CodeGen/AIE/aie2/vnegmul.ll index 4ef426c1fd3d..9bd70b10550b 100644 --- a/llvm/test/CodeGen/AIE/aie2/vnegmul.ll +++ b/llvm/test/CodeGen/AIE/aie2/vnegmul.ll @@ -11,8 +11,7 @@ define <16 x i64> @_Z19test_negmul_4x8_8x8ii(i32 noundef %sgn_x, i32 noundef %sg ; CHECK-LABEL: _Z19test_negmul_4x8_8x8ii: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: mova r2, #9; nopb ; nopxm -; CHECK-NEXT: mova r3, #8 +; CHECK-NEXT: nopb ; mova r2, #9; nops ; movx r3, #8; nopm ; nopv ; CHECK-NEXT: lshl r0, r0, r2 ; CHECK-NEXT: lshl r1, r1, r3 ; CHECK-NEXT: or r0, r0, r1 diff --git a/llvm/test/CodeGen/AIE/aie2/vst_srs.ll b/llvm/test/CodeGen/AIE/aie2/vst_srs.ll index a91417a6e0fb..b81d92bf7633 100644 --- a/llvm/test/CodeGen/AIE/aie2/vst_srs.ll +++ b/llvm/test/CodeGen/AIE/aie2/vst_srs.ll @@ -13,8 +13,7 @@ define dso_local noundef <16 x i16> @_Z5test0Dv16_u7__acc32(<8 x i64> noundef %a ; CHECK-LABEL: _Z5test0Dv16_u7__acc32: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: paddb [sp], #32 -; CHECK-NEXT: mova r0, #2 +; CHECK-NEXT: padda [sp], #32; nopb ; movx r0, #2 ; CHECK-NEXT: mov p0, sp ; CHECK-NEXT: mov s0, r0 ; CHECK-NEXT: paddb [p0], #-32 @@ -49,8 +48,7 @@ define dso_local noundef <8 x i32> @_Z5test1Dv8_u7__acc64(<8 x i64> noundef %acc ; CHECK-LABEL: _Z5test1Dv8_u7__acc64: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: paddb [sp], #32 -; CHECK-NEXT: mova r0, #2 +; CHECK-NEXT: padda [sp], #32; nopb ; movx r0, #2 ; CHECK-NEXT: mov p0, sp ; CHECK-NEXT: mov s0, r0 ; CHECK-NEXT: paddb [p0], #-32 @@ -79,8 +77,7 @@ define dso_local noundef <16 x i16> @_Z5test2Dv16_u7__acc32(<8 x i64> noundef %a ; CHECK-LABEL: _Z5test2Dv16_u7__acc32: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: paddb [sp], #32 -; CHECK-NEXT: mova r0, #2 +; CHECK-NEXT: padda [sp], #32; nopb ; movx r0, #2 ; CHECK-NEXT: mov p0, sp ; CHECK-NEXT: mov s0, r0 ; CHECK-NEXT: paddb [p0], #-32 @@ -109,8 +106,7 @@ define dso_local noundef <8 x i32> @_Z5test3Dv8_u7__acc64(<8 x i64> noundef %acc ; CHECK-LABEL: _Z5test3Dv8_u7__acc64: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: paddb [sp], #32 -; CHECK-NEXT: mova r0, #2 +; CHECK-NEXT: padda [sp], #32; nopb ; movx r0, #2 ; CHECK-NEXT: mov p0, sp ; CHECK-NEXT: mov s0, r0 ; CHECK-NEXT: paddb [p0], #-32 @@ -139,8 +135,7 @@ define dso_local noundef <16 x i16> @_Z5test4Dv16_u7__acc64(<16 x i64> noundef % ; CHECK-LABEL: _Z5test4Dv16_u7__acc64: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: paddb [sp], #32 -; CHECK-NEXT: mova r0, #2 +; CHECK-NEXT: padda [sp], #32; nopb ; movx r0, #2 ; CHECK-NEXT: mov p0, sp ; CHECK-NEXT: mov s0, r0 ; CHECK-NEXT: paddb [p0], #-32 @@ -169,8 +164,7 @@ define dso_local noundef <32 x i8> @_Z5test5Dv32_u7__acc32(<16 x i64> noundef %a ; CHECK-LABEL: _Z5test5Dv32_u7__acc32: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: paddb [sp], #32 -; CHECK-NEXT: mova r0, #2 +; CHECK-NEXT: padda [sp], #32; nopb ; movx r0, #2 ; CHECK-NEXT: mov p0, sp ; CHECK-NEXT: mov s0, r0 ; CHECK-NEXT: paddb [p0], #-32 @@ -199,8 +193,7 @@ define dso_local noundef <16 x i16> @_Z5test6Dv16_u7__acc64(<16 x i64> noundef % ; CHECK-LABEL: _Z5test6Dv16_u7__acc64: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: paddb [sp], #32 -; CHECK-NEXT: mova r0, #2 +; CHECK-NEXT: padda [sp], #32; nopb ; movx r0, #2 ; CHECK-NEXT: mov p0, sp ; CHECK-NEXT: mov s0, r0 ; CHECK-NEXT: paddb [p0], #-32 @@ -229,8 +222,7 @@ define dso_local noundef <32 x i8> @_Z5test7Dv32_u7__acc32(<16 x i64> noundef %a ; CHECK-LABEL: _Z5test7Dv32_u7__acc32: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: paddb [sp], #32 -; CHECK-NEXT: mova r0, #2 +; CHECK-NEXT: padda [sp], #32; nopb ; movx r0, #2 ; CHECK-NEXT: mov p0, sp ; CHECK-NEXT: mov s0, r0 ; CHECK-NEXT: paddb [p0], #-32 @@ -259,8 +251,7 @@ define dso_local noundef <16 x i16> @_Z5test8Dv16_u7__acc64(<16 x i64> noundef % ; CHECK-LABEL: _Z5test8Dv16_u7__acc64: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: paddb [sp], #32 -; CHECK-NEXT: mova r0, #2 +; CHECK-NEXT: padda [sp], #32; nopb ; movx r0, #2 ; CHECK-NEXT: mov p0, sp ; CHECK-NEXT: mov s0, r0 ; CHECK-NEXT: paddb [p0], #-32 @@ -289,8 +280,7 @@ define dso_local noundef <32 x i8> @_Z5test9Dv32_u7__acc32(<16 x i64> noundef %a ; CHECK-LABEL: _Z5test9Dv32_u7__acc32: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: paddb [sp], #32 -; CHECK-NEXT: mova r0, #2 +; CHECK-NEXT: padda [sp], #32; nopb ; movx r0, #2 ; CHECK-NEXT: mov p0, sp ; CHECK-NEXT: mov s0, r0 ; CHECK-NEXT: paddb [p0], #-32 @@ -319,8 +309,7 @@ define dso_local noundef <16 x i16> @_Z6test10Dv16_u7__acc32(<8 x i64> noundef % ; CHECK-LABEL: _Z6test10Dv16_u7__acc32: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: paddb [sp], #32 -; CHECK-NEXT: mova r0, #4 +; CHECK-NEXT: padda [sp], #32; nopb ; movx r0, #4 ; CHECK-NEXT: mov p0, sp ; CHECK-NEXT: mov s0, r0 ; CHECK-NEXT: paddb [p0], #-32 @@ -349,8 +338,7 @@ define dso_local noundef <8 x i32> @_Z6test11Dv8_u7__acc64(<8 x i64> noundef %ac ; CHECK-LABEL: _Z6test11Dv8_u7__acc64: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: paddb [sp], #32 -; CHECK-NEXT: mova r0, #4 +; CHECK-NEXT: padda [sp], #32; nopb ; movx r0, #4 ; CHECK-NEXT: mov p0, sp ; CHECK-NEXT: mov s0, r0 ; CHECK-NEXT: paddb [p0], #-32 diff --git a/llvm/test/CodeGen/AIE/aie2/vsub_acc.ll b/llvm/test/CodeGen/AIE/aie2/vsub_acc.ll index 591c049ab628..5ab0d9b46175 100644 --- a/llvm/test/CodeGen/AIE/aie2/vsub_acc.ll +++ b/llvm/test/CodeGen/AIE/aie2/vsub_acc.ll @@ -27,10 +27,8 @@ define dso_local noundef <16 x i64> @test_sub_conf(<16 x i64> noundef %acc1, <16 ; CHECK-LABEL: test_sub_conf: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: mova r4, #10; nopb ; nopx -; CHECK-NEXT: mova r5, #12 -; CHECK-NEXT: mova r6, #13 -; CHECK-NEXT: mova r7, #2 +; CHECK-NEXT: mova r4, #10; nopxm +; CHECK-NEXT: mova r5, #12; movx r7, #2; mov r6, #13 ; CHECK-NEXT: lshl r1, r1, r4 ; CHECK-NEXT: lshl r2, r2, r5 ; CHECK-NEXT: lshl r3, r3, r6