Skip to content

Commit

Permalink
[AIE2] Enable up-combining for VLD + UNPACK
Browse files Browse the repository at this point in the history
If we can't delay VLD, we can try to antecipate UNPACK. This approach can be
extended to other selection combiners.
  • Loading branch information
andcarminati committed Nov 20, 2024
1 parent 7a8f59c commit fcdfcd4
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 31 deletions.
12 changes: 10 additions & 2 deletions llvm/lib/Target/AIE/AIE2InstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1604,14 +1604,19 @@ bool AIE2InstructionSelector::selectG_AIE_LOAD_UNPACK(
MachineInstr &UNPACKI, MachineRegisterInfo &MRI) {
Register LoadResult = (std::next(UNPACKI.uses().begin()))->getReg();
MachineInstr *LoadOp = getDefIgnoringCopiesAndBitcasts(LoadResult, MRI);
bool ShouldAntecipate = false;

assert(LoadOp && "Expected SSA.");

// Do not try to combine if one of the load's defs is used by another
// instruction between the load and the VUNPACK or if there is a store
// between the load and the VUNPACK.
if (!canDelayMemOp(*LoadOp, UNPACKI, MRI))
return false;
if (!canDelayMemOp(*LoadOp, UNPACKI, MRI)) {
if (canAntecipateOp(*LoadOp, UNPACKI, MRI))
ShouldAntecipate = true;
else
return false;
}

if (!canCombineUNPACKLoad(*LoadOp, UNPACKI, MRI) ||
LoadOp->getParent() != UNPACKI.getParent() || !MRI.hasOneUse(LoadResult))
Expand All @@ -1628,6 +1633,9 @@ bool AIE2InstructionSelector::selectG_AIE_LOAD_UNPACK(
Register DstReg = UNPACKI.getOperand(0).getReg();
Register SignReg = UNPACKI.getOperand(3).getReg();

if (ShouldAntecipate)
MIB.setInstr(*LoadOp);

auto NewInstr = MIB.buildInstr(LSO->ISelOpcode);

NewInstr.addDef(DstReg);
Expand Down
23 changes: 19 additions & 4 deletions llvm/lib/Target/AIE/AIECombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,11 @@ MachineInstr *findPreIncMatch(MachineInstr &MemI, MachineRegisterInfo &MRI,
return nullptr;
}

/// Checks if any operand of \a Use is defined by \a MI.
/// This is not transitive: it will not look at how the uses of \a MI are
/// Checks if any operand of \a MI is defined by \a Def.
/// This is not transitive: it will not look at how the uses of \a Def are
/// defined.
bool isUseOf(const MachineInstr &MI, const MachineInstr &Use) {
for (auto &Defs : Use.defs()) {
bool isUseOf(const MachineInstr &MI, const MachineInstr &Def) {
for (auto &Defs : Def.defs()) {
for (auto &MIUse : MI.uses()) {
if (MIUse.isReg() && Defs.getReg() == MIUse.getReg())
return true;
Expand Down Expand Up @@ -119,6 +119,21 @@ bool llvm::canDelayMemOp(MachineInstr &MemI, MachineInstr &Dest,
return none_of(InstrRange, UnsafeToMovePast);
}

/// \return true if \a Dest can be moved just after \a MemI in order to allow
/// combining
bool llvm::canAntecipateOp(MachineInstr &MemI, MachineInstr &Dest,
const MachineRegisterInfo &MRI) {
if (MemI.getParent() != Dest.getParent())
return false;
auto MII = std::next(MemI.getIterator());
auto MIE = Dest.getIterator();
auto InstrRange = make_range(MII, MIE);
auto UnsafeToMoveBefore = [&](const MachineInstr &MI) {
return (isUseOf(Dest, MI));
};
return none_of(InstrRange, UnsafeToMoveBefore);
}

/// Find the def instruction for \p Reg, folding away any trivial copies and
/// bitcasts. May return nullptr if \p Reg is not a generic virtual register.
MachineInstr *
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AIE/AIECombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ bool matchGlobalValOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
/// post-increment combining
bool canDelayMemOp(MachineInstr &MemI, MachineInstr &Dest,
MachineRegisterInfo &MRI);
/// \return true if \a Dest can be moved just after \a MemI in order to allow
/// combining
bool canAntecipateOp(MachineInstr &MemI, MachineInstr &Dest,
const MachineRegisterInfo &MRI);
/// Find the def instruction for \p Reg, folding away any trivial copies and
/// bitcasts. May return nullptr if \p Reg is not a generic virtual register.
MachineInstr *getDefIgnoringCopiesAndBitcasts(Register Reg,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,24 +10,23 @@

# This tests that we don't combine if one of the load's defs is used before the VUNPACK instruction
---
name: VLD_UNPACK_use
name: VLD_UNPACK_use_before
alignment: 16
legalized: true
regBankSelected: true
body: |
bb.1.entry:
liveins: $p0
; CHECK-LABEL: name: VLD_UNPACK_use
; CHECK-LABEL: name: VLD_UNPACK_use_before
; CHECK: liveins: $p0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:em = COPY [[COPY1]]
; CHECK-NEXT: [[VLD_pstm_pseudo:%[0-9]+]]:vec256, [[VLD_pstm_pseudo1:%[0-9]+]]:ep = VLD_pstm_pseudo [[COPY]], [[COPY2]] :: (load (<32 x s8>))
; CHECK-NEXT: $m1 = COPY [[VLD_pstm_pseudo1]]
; CHECK-NEXT: [[VLDB_UNPACK_S16_S8_ag_pstm_nrm:%[0-9]+]]:vec512, [[VLDB_UNPACK_S16_S8_ag_pstm_nrm1:%[0-9]+]]:ep = VLDB_UNPACK_S16_S8_ag_pstm_nrm [[COPY]], [[COPY2]] :: (load (<32 x s8>))
; CHECK-NEXT: $m1 = COPY [[VLDB_UNPACK_S16_S8_ag_pstm_nrm1]]
; CHECK-NEXT: [[COPY3:%[0-9]+]]:ep = COPY $m1
; CHECK-NEXT: [[VUNPACK_S16_S8_:%[0-9]+]]:vec512 = VUNPACK_S16_S8 [[VLD_pstm_pseudo]]
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VUNPACK_S16_S8_]], implicit [[COPY3]]
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VLDB_UNPACK_S16_S8_ag_pstm_nrm]], implicit [[COPY3]]
%0:ptrregbank(p0) = COPY $p0
%1:gprregbank(s32) = COPY $r0
%7:modregbank(s20) = G_TRUNC %1
Expand All @@ -39,35 +38,30 @@ body: |
PseudoRET implicit $lr, implicit %103, implicit %12
...

# This tests that we don't combine if a store is in between the load and VUNPACK instruction
# This tests that we don't combine if one of the load's defs (data) is used after the VUNPACK instruction (no single use)
---
name: VLD_UNPACK_store
name: VLD_UNPACK_use_after
alignment: 16
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $p0, $m0, $m1, $m2, $r0, $amll0, $p1
; CHECK-LABEL: name: VLD_UNPACK_store
; CHECK: liveins: $p0, $m0, $m1, $m2, $r0, $amll0, $p1
body: |
bb.1.entry:
liveins: $p0
; CHECK-LABEL: name: VLD_UNPACK_use_after
; CHECK: liveins: $p0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r0
; CHECK-NEXT: [[COPY3:%[0-9]+]]:em = COPY [[COPY2]]
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vec256 = COPY $amll0
; CHECK-NEXT: [[VLD_pstm_pseudo:%[0-9]+]]:vec256, [[VLD_pstm_pseudo1:%[0-9]+]]:ep = VLD_pstm_pseudo [[COPY]], [[COPY3]] :: (load (<32 x s8>))
; CHECK-NEXT: VST_dmw_sts_w_ag_idx_imm [[COPY4]], [[COPY1]], 0 :: (store (<32 x s8>))
; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:em = COPY [[COPY1]]
; CHECK-NEXT: [[VLD_pstm_pseudo:%[0-9]+]]:vec256, [[VLD_pstm_pseudo1:%[0-9]+]]:ep = VLD_pstm_pseudo [[COPY]], [[COPY2]] :: (load (<32 x s8>))
; CHECK-NEXT: [[VUNPACK_S16_S8_:%[0-9]+]]:vec512 = VUNPACK_S16_S8 [[VLD_pstm_pseudo]]
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VUNPACK_S16_S8_]]
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VUNPACK_S16_S8_]], implicit [[VLD_pstm_pseudo]]
%0:ptrregbank(p0) = COPY $p0
%20:ptrregbank(p0) = COPY $p1
%1:gprregbank(s32) = COPY $r0
%7:modregbank(s20) = G_TRUNC %1
%102:gprregbank(s32) = G_CONSTANT i32 1
%80:vregbank(<32 x s8>) = COPY $amll0
%25:vregbank(<32 x s8>), %19:ptrregbank(p0) = G_AIE_POSTINC_LOAD %0, %7 :: (load (<32 x s8>))
G_STORE %80:vregbank(<32 x s8>), %20:ptrregbank(p0) :: (store (<32 x s8>))
%103:vregbank(<32 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2.unpack.I16.I8), %25:vregbank(<32 x s8>), %102:gprregbank(s32)
PseudoRET implicit $lr, implicit %103
%12:vregbank(<32 x s8>) = COPY %25
PseudoRET implicit $lr, implicit %103, implicit %12
...
Original file line number Diff line number Diff line change
Expand Up @@ -267,3 +267,34 @@ body: |
%106:vregbank(<32 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2.unpack.I16.I8), %28:vregbank(<32 x s8>), %102:gprregbank(s32)
PseudoRET implicit $lr, implicit %103, implicit %104, implicit %105, implicit %106
...

---
name: VLDB_POSTINC_UNPACK_STORE_between
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $p0, $m0, $m1, $m2, $r0, $amll0, $p1
; CHECK-LABEL: name: VLDB_POSTINC_UNPACK_STORE_between
; CHECK: liveins: $p0, $m0, $m1, $m2, $r0, $amll0, $p1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r0
; CHECK-NEXT: [[COPY3:%[0-9]+]]:em = COPY [[COPY2]]
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vec256 = COPY $amll0
; CHECK-NEXT: [[VLDB_UNPACK_S16_S8_ag_pstm_nrm:%[0-9]+]]:vec512, [[VLDB_UNPACK_S16_S8_ag_pstm_nrm1:%[0-9]+]]:ep = VLDB_UNPACK_S16_S8_ag_pstm_nrm [[COPY]], [[COPY3]] :: (load (<32 x s8>))
; CHECK-NEXT: VST_dmw_sts_w_ag_idx_imm [[COPY4]], [[COPY1]], 0 :: (store (<32 x s8>))
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VLDB_UNPACK_S16_S8_ag_pstm_nrm]]
%0:ptrregbank(p0) = COPY $p0
%20:ptrregbank(p0) = COPY $p1
%1:gprregbank(s32) = COPY $r0
%7:modregbank(s20) = G_TRUNC %1
%102:gprregbank(s32) = G_CONSTANT i32 1
%80:vregbank(<32 x s8>) = COPY $amll0
%25:vregbank(<32 x s8>), %19:ptrregbank(p0) = G_AIE_POSTINC_LOAD %0, %7 :: (load (<32 x s8>))
G_STORE %80:vregbank(<32 x s8>), %20:ptrregbank(p0) :: (store (<32 x s8>))
%103:vregbank(<32 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2.unpack.I16.I8), %25:vregbank(<32 x s8>), %102:gprregbank(s32)
PseudoRET implicit $lr, implicit %103
...

0 comments on commit fcdfcd4

Please sign in to comment.