diff --git a/llvm/lib/Target/AIE/AIE2InstructionSelector.cpp b/llvm/lib/Target/AIE/AIE2InstructionSelector.cpp index 4c56d2cd7010..bc834795fc25 100644 --- a/llvm/lib/Target/AIE/AIE2InstructionSelector.cpp +++ b/llvm/lib/Target/AIE/AIE2InstructionSelector.cpp @@ -1604,14 +1604,19 @@ bool AIE2InstructionSelector::selectG_AIE_LOAD_UNPACK( MachineInstr &UNPACKI, MachineRegisterInfo &MRI) { Register LoadResult = (std::next(UNPACKI.uses().begin()))->getReg(); MachineInstr *LoadOp = getDefIgnoringCopiesAndBitcasts(LoadResult, MRI); + bool ShouldAntecipate = false; assert(LoadOp && "Expected SSA."); // Do not try to combine if one of the load's defs is used by another // instruction between the load and the VUNPACK or if there is a store // between the load and the VUNPACK. - if (!canDelayMemOp(*LoadOp, UNPACKI, MRI)) - return false; + if (!canDelayMemOp(*LoadOp, UNPACKI, MRI)) { + if (canAntecipateOp(*LoadOp, UNPACKI, MRI)) + ShouldAntecipate = true; + else + return false; + } if (!canCombineUNPACKLoad(*LoadOp, UNPACKI, MRI) || LoadOp->getParent() != UNPACKI.getParent() || !MRI.hasOneUse(LoadResult)) @@ -1628,6 +1633,9 @@ bool AIE2InstructionSelector::selectG_AIE_LOAD_UNPACK( Register DstReg = UNPACKI.getOperand(0).getReg(); Register SignReg = UNPACKI.getOperand(3).getReg(); + if (ShouldAntecipate) + MIB.setInstr(*LoadOp); + auto NewInstr = MIB.buildInstr(LSO->ISelOpcode); NewInstr.addDef(DstReg); diff --git a/llvm/lib/Target/AIE/AIECombinerHelper.cpp b/llvm/lib/Target/AIE/AIECombinerHelper.cpp index 2272c13d047d..ca698ac6a173 100644 --- a/llvm/lib/Target/AIE/AIECombinerHelper.cpp +++ b/llvm/lib/Target/AIE/AIECombinerHelper.cpp @@ -63,11 +63,11 @@ MachineInstr *findPreIncMatch(MachineInstr &MemI, MachineRegisterInfo &MRI, return nullptr; } -/// Checks if any operand of \a Use is defined by \a MI. -/// This is not transitive: it will not look at how the uses of \a MI are +/// Checks if any operand of \a MI is defined by \a Def. +/// This is not transitive: it will not look at how the uses of \a Def are /// defined. -bool isUseOf(const MachineInstr &MI, const MachineInstr &Use) { - for (auto &Defs : Use.defs()) { +bool isUseOf(const MachineInstr &MI, const MachineInstr &Def) { + for (auto &Defs : Def.defs()) { for (auto &MIUse : MI.uses()) { if (MIUse.isReg() && Defs.getReg() == MIUse.getReg()) return true; @@ -119,6 +119,21 @@ bool llvm::canDelayMemOp(MachineInstr &MemI, MachineInstr &Dest, return none_of(InstrRange, UnsafeToMovePast); } +/// \return true if \a Dest can be moved just after \a MemI in order to allow +/// combining +bool llvm::canAntecipateOp(MachineInstr &MemI, MachineInstr &Dest, + const MachineRegisterInfo &MRI) { + if (MemI.getParent() != Dest.getParent()) + return false; + auto MII = std::next(MemI.getIterator()); + auto MIE = Dest.getIterator(); + auto InstrRange = make_range(MII, MIE); + auto UnsafeToMoveBefore = [&](const MachineInstr &MI) { + return (isUseOf(Dest, MI)); + }; + return none_of(InstrRange, UnsafeToMoveBefore); +} + /// Find the def instruction for \p Reg, folding away any trivial copies and /// bitcasts. May return nullptr if \p Reg is not a generic virtual register. MachineInstr * diff --git a/llvm/lib/Target/AIE/AIECombinerHelper.h b/llvm/lib/Target/AIE/AIECombinerHelper.h index 7daaa0c9954e..a66b7231ffaa 100644 --- a/llvm/lib/Target/AIE/AIECombinerHelper.h +++ b/llvm/lib/Target/AIE/AIECombinerHelper.h @@ -61,6 +61,10 @@ bool matchGlobalValOffset(MachineInstr &MI, MachineRegisterInfo &MRI, /// post-increment combining bool canDelayMemOp(MachineInstr &MemI, MachineInstr &Dest, MachineRegisterInfo &MRI); +/// \return true if \a Dest can be moved just after \a MemI in order to allow +/// combining +bool canAntecipateOp(MachineInstr &MemI, MachineInstr &Dest, + const MachineRegisterInfo &MRI); /// Find the def instruction for \p Reg, folding away any trivial copies and /// bitcasts. May return nullptr if \p Reg is not a generic virtual register. MachineInstr *getDefIgnoringCopiesAndBitcasts(Register Reg, diff --git a/llvm/test/CodeGen/AIE/aie2/GlobalISel/inst-select-no-combine-vldb_unpack.mir b/llvm/test/CodeGen/AIE/aie2/GlobalISel/inst-select-no-combine-vldb_unpack.mir index fd5b49611cc4..abc4e6dacc28 100644 --- a/llvm/test/CodeGen/AIE/aie2/GlobalISel/inst-select-no-combine-vldb_unpack.mir +++ b/llvm/test/CodeGen/AIE/aie2/GlobalISel/inst-select-no-combine-vldb_unpack.mir @@ -10,24 +10,23 @@ # This tests that we don't combine if one of the load's defs is used before the VUNPACK instruction --- -name: VLD_UNPACK_use +name: VLD_UNPACK_use_before alignment: 16 legalized: true regBankSelected: true body: | bb.1.entry: liveins: $p0 - ; CHECK-LABEL: name: VLD_UNPACK_use + ; CHECK-LABEL: name: VLD_UNPACK_use_before ; CHECK: liveins: $p0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:em = COPY [[COPY1]] - ; CHECK-NEXT: [[VLD_pstm_pseudo:%[0-9]+]]:vec256, [[VLD_pstm_pseudo1:%[0-9]+]]:ep = VLD_pstm_pseudo [[COPY]], [[COPY2]] :: (load (<32 x s8>)) - ; CHECK-NEXT: $m1 = COPY [[VLD_pstm_pseudo1]] + ; CHECK-NEXT: [[VLDB_UNPACK_S16_S8_ag_pstm_nrm:%[0-9]+]]:vec512, [[VLDB_UNPACK_S16_S8_ag_pstm_nrm1:%[0-9]+]]:ep = VLDB_UNPACK_S16_S8_ag_pstm_nrm [[COPY]], [[COPY2]] :: (load (<32 x s8>)) + ; CHECK-NEXT: $m1 = COPY [[VLDB_UNPACK_S16_S8_ag_pstm_nrm1]] ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ep = COPY $m1 - ; CHECK-NEXT: [[VUNPACK_S16_S8_:%[0-9]+]]:vec512 = VUNPACK_S16_S8 [[VLD_pstm_pseudo]] - ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VUNPACK_S16_S8_]], implicit [[COPY3]] + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VLDB_UNPACK_S16_S8_ag_pstm_nrm]], implicit [[COPY3]] %0:ptrregbank(p0) = COPY $p0 %1:gprregbank(s32) = COPY $r0 %7:modregbank(s20) = G_TRUNC %1 @@ -39,35 +38,30 @@ body: | PseudoRET implicit $lr, implicit %103, implicit %12 ... -# This tests that we don't combine if a store is in between the load and VUNPACK instruction +# This tests that we don't combine if one of the load's defs (data) is used after the VUNPACK instruction (no single use) --- -name: VLD_UNPACK_store +name: VLD_UNPACK_use_after +alignment: 16 legalized: true regBankSelected: true -tracksRegLiveness: true -body: | - bb.0: - liveins: $p0, $m0, $m1, $m2, $r0, $amll0, $p1 - ; CHECK-LABEL: name: VLD_UNPACK_store - ; CHECK: liveins: $p0, $m0, $m1, $m2, $r0, $amll0, $p1 +body: | + bb.1.entry: + liveins: $p0 + ; CHECK-LABEL: name: VLD_UNPACK_use_after + ; CHECK: liveins: $p0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:em = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vec256 = COPY $amll0 - ; CHECK-NEXT: [[VLD_pstm_pseudo:%[0-9]+]]:vec256, [[VLD_pstm_pseudo1:%[0-9]+]]:ep = VLD_pstm_pseudo [[COPY]], [[COPY3]] :: (load (<32 x s8>)) - ; CHECK-NEXT: VST_dmw_sts_w_ag_idx_imm [[COPY4]], [[COPY1]], 0 :: (store (<32 x s8>)) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:em = COPY [[COPY1]] + ; CHECK-NEXT: [[VLD_pstm_pseudo:%[0-9]+]]:vec256, [[VLD_pstm_pseudo1:%[0-9]+]]:ep = VLD_pstm_pseudo [[COPY]], [[COPY2]] :: (load (<32 x s8>)) ; CHECK-NEXT: [[VUNPACK_S16_S8_:%[0-9]+]]:vec512 = VUNPACK_S16_S8 [[VLD_pstm_pseudo]] - ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VUNPACK_S16_S8_]] + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VUNPACK_S16_S8_]], implicit [[VLD_pstm_pseudo]] %0:ptrregbank(p0) = COPY $p0 - %20:ptrregbank(p0) = COPY $p1 %1:gprregbank(s32) = COPY $r0 %7:modregbank(s20) = G_TRUNC %1 %102:gprregbank(s32) = G_CONSTANT i32 1 - %80:vregbank(<32 x s8>) = COPY $amll0 %25:vregbank(<32 x s8>), %19:ptrregbank(p0) = G_AIE_POSTINC_LOAD %0, %7 :: (load (<32 x s8>)) - G_STORE %80:vregbank(<32 x s8>), %20:ptrregbank(p0) :: (store (<32 x s8>)) %103:vregbank(<32 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2.unpack.I16.I8), %25:vregbank(<32 x s8>), %102:gprregbank(s32) - PseudoRET implicit $lr, implicit %103 + %12:vregbank(<32 x s8>) = COPY %25 + PseudoRET implicit $lr, implicit %103, implicit %12 ... diff --git a/llvm/test/CodeGen/AIE/aie2/GlobalISel/inst-select-postinc-vldb_unpack.mir b/llvm/test/CodeGen/AIE/aie2/GlobalISel/inst-select-postinc-vldb_unpack.mir index 0c19a10355ca..f64fefd18440 100644 --- a/llvm/test/CodeGen/AIE/aie2/GlobalISel/inst-select-postinc-vldb_unpack.mir +++ b/llvm/test/CodeGen/AIE/aie2/GlobalISel/inst-select-postinc-vldb_unpack.mir @@ -267,3 +267,34 @@ body: | %106:vregbank(<32 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2.unpack.I16.I8), %28:vregbank(<32 x s8>), %102:gprregbank(s32) PseudoRET implicit $lr, implicit %103, implicit %104, implicit %105, implicit %106 ... + +--- +name: VLDB_POSTINC_UNPACK_STORE_between +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0, $m0, $m1, $m2, $r0, $amll0, $p1 + ; CHECK-LABEL: name: VLDB_POSTINC_UNPACK_STORE_between + ; CHECK: liveins: $p0, $m0, $m1, $m2, $r0, $amll0, $p1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:em = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vec256 = COPY $amll0 + ; CHECK-NEXT: [[VLDB_UNPACK_S16_S8_ag_pstm_nrm:%[0-9]+]]:vec512, [[VLDB_UNPACK_S16_S8_ag_pstm_nrm1:%[0-9]+]]:ep = VLDB_UNPACK_S16_S8_ag_pstm_nrm [[COPY]], [[COPY3]] :: (load (<32 x s8>)) + ; CHECK-NEXT: VST_dmw_sts_w_ag_idx_imm [[COPY4]], [[COPY1]], 0 :: (store (<32 x s8>)) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VLDB_UNPACK_S16_S8_ag_pstm_nrm]] + %0:ptrregbank(p0) = COPY $p0 + %20:ptrregbank(p0) = COPY $p1 + %1:gprregbank(s32) = COPY $r0 + %7:modregbank(s20) = G_TRUNC %1 + %102:gprregbank(s32) = G_CONSTANT i32 1 + %80:vregbank(<32 x s8>) = COPY $amll0 + %25:vregbank(<32 x s8>), %19:ptrregbank(p0) = G_AIE_POSTINC_LOAD %0, %7 :: (load (<32 x s8>)) + G_STORE %80:vregbank(<32 x s8>), %20:ptrregbank(p0) :: (store (<32 x s8>)) + %103:vregbank(<32 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2.unpack.I16.I8), %25:vregbank(<32 x s8>), %102:gprregbank(s32) + PseudoRET implicit $lr, implicit %103 +...