Skip to content

Commit

Permalink
[AIE2] Enable up-combining for VLD + UNPACK
Browse files Browse the repository at this point in the history
If we can't delay VLD, we can try to antecipate UNPACK. This approach can be
extended to other selection combiners.
  • Loading branch information
andcarminati committed Nov 26, 2024
1 parent 5796f62 commit 2a76196
Show file tree
Hide file tree
Showing 5 changed files with 158 additions and 32 deletions.
15 changes: 13 additions & 2 deletions llvm/lib/Target/AIE/AIE2InstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1604,14 +1604,22 @@ bool AIE2InstructionSelector::selectG_AIE_LOAD_UNPACK(
MachineInstr &UNPACKI, MachineRegisterInfo &MRI) {
Register LoadResult = (std::next(UNPACKI.uses().begin()))->getReg();
MachineInstr *LoadOp = getDefIgnoringCopiesAndBitcasts(LoadResult, MRI);
// Should we build the instruction at load's position?
bool ShouldAdvanceOp = false;

assert(LoadOp && "Expected SSA.");

// Do not try to combine if one of the load's defs is used by another
// instruction between the load and the VUNPACK or if there is a store
// between the load and the VUNPACK.
if (!canDelayMemOp(*LoadOp, UNPACKI, MRI))
return false;
if (!canDelayMemOp(*LoadOp, UNPACKI, MRI)) {
// If we cannot delay the load, we can try to advance the combined
// instruction to the load's position.
if (canAdvanceOp(*LoadOp, UNPACKI, MRI))
ShouldAdvanceOp = true;
else
return false;
}

if (!canCombineUNPACKLoad(*LoadOp, UNPACKI, MRI) ||
LoadOp->getParent() != UNPACKI.getParent() || !MRI.hasOneUse(LoadResult))
Expand All @@ -1628,6 +1636,9 @@ bool AIE2InstructionSelector::selectG_AIE_LOAD_UNPACK(
Register DstReg = UNPACKI.getOperand(0).getReg();
Register SignReg = UNPACKI.getOperand(3).getReg();

if (ShouldAdvanceOp)
MIB.setInstr(*LoadOp);

auto NewInstr = MIB.buildInstr(LSO->ISelOpcode);

NewInstr.addDef(DstReg);
Expand Down
34 changes: 29 additions & 5 deletions llvm/lib/Target/AIE/AIECombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,12 @@ MachineInstr *findPreIncMatch(MachineInstr &MemI, MachineRegisterInfo &MRI,
return nullptr;
}

/// Checks if any operand of \a Use is defined by \a MI.
/// This is not transitive: it will not look at how the uses of \a MI are
/// Checks if any operand of \a MI is defined by \a Def.
/// This is not transitive: it will not look at how the uses of \a Def are
/// defined.
bool isUseOf(const MachineInstr &MI, const MachineInstr &Use) {
for (auto &Defs : Use.defs()) {
for (auto &MIUse : MI.uses()) {
bool isUseOf(const MachineInstr &MI, const MachineInstr &Def) {
for (auto &Defs : Def.all_defs()) {
for (auto &MIUse : MI.all_uses()) {
if (MIUse.isReg() && Defs.getReg() == MIUse.getReg())
return true;
}
Expand Down Expand Up @@ -119,6 +119,30 @@ bool llvm::canDelayMemOp(MachineInstr &MemI, MachineInstr &Dest,
return none_of(InstrRange, UnsafeToMovePast);
}

/// \return true if \a Dest can be moved just after \a MemI in order to allow
/// combining
bool llvm::canAdvanceOp(MachineInstr &MemI, MachineInstr &Dest,
const MachineRegisterInfo &MRI) {
assert(Dest.getOpcode() != TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS &&
"Cannot advance Dest MI with side effects");
assert(!Dest.mayLoadOrStore() && "Cannot advance load/store Dest MI");
if (MemI.getParent() != Dest.getParent())
return false;
auto MII = std::next(MemI.getIterator());
auto MIE = Dest.getIterator();
auto InstrRange = make_range(MII, MIE);
auto UnsafeToMoveBefore = [&](const MachineInstr &MI) {
// Conditions that indicate it is unsafe to move:
// 1 - G_INTRINSIC_W_SIDE_EFFECTS without explicit output, which may include
// writing to a control register.
// 2 - Crossing the definition of an input operand of Dest.
return ((MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS &&
MI.defs().empty()) ||
isUseOf(Dest, MI));
};
return none_of(InstrRange, UnsafeToMoveBefore);
}

/// Find the def instruction for \p Reg, folding away any trivial copies and
/// bitcasts. May return nullptr if \p Reg is not a generic virtual register.
MachineInstr *
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AIE/AIECombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ bool matchGlobalValOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
/// post-increment combining
bool canDelayMemOp(MachineInstr &MemI, MachineInstr &Dest,
MachineRegisterInfo &MRI);
/// \return true if \a Dest can be moved just after \a MemI in order to allow
/// combining
bool canAdvanceOp(MachineInstr &MemI, MachineInstr &Dest,
const MachineRegisterInfo &MRI);
/// Find the def instruction for \p Reg, folding away any trivial copies and
/// bitcasts. May return nullptr if \p Reg is not a generic virtual register.
MachineInstr *getDefIgnoringCopiesAndBitcasts(Register Reg,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,66 +8,90 @@

# RUN: llc -mtriple aie2 -run-pass=instruction-select %s -verify-machineinstrs -o - | FileCheck %s

# This tests that we don't combine if one of the load's defs is used before the VUNPACK instruction

# This tests that we don't combine if one of the load's defs (data) is used after the VUNPACK instruction (no single use)

---
name: VLD_UNPACK_use_after
alignment: 16
legalized: true
regBankSelected: true
body: |
bb.1.entry:
liveins: $p0
; CHECK-LABEL: name: VLD_UNPACK_use_after
; CHECK: liveins: $p0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:em = COPY [[COPY1]]
; CHECK-NEXT: [[VLD_pstm_pseudo:%[0-9]+]]:vec256, [[VLD_pstm_pseudo1:%[0-9]+]]:ep = VLD_pstm_pseudo [[COPY]], [[COPY2]] :: (load (<32 x s8>))
; CHECK-NEXT: [[VUNPACK_S16_S8_:%[0-9]+]]:vec512 = VUNPACK_S16_S8 [[VLD_pstm_pseudo]]
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VUNPACK_S16_S8_]], implicit [[VLD_pstm_pseudo]]
%0:ptrregbank(p0) = COPY $p0
%1:gprregbank(s32) = COPY $r0
%7:modregbank(s20) = G_TRUNC %1
%102:gprregbank(s32) = G_CONSTANT i32 1
%25:vregbank(<32 x s8>), %19:ptrregbank(p0) = G_AIE_POSTINC_LOAD %0, %7 :: (load (<32 x s8>))
%103:vregbank(<32 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2.unpack.I16.I8), %25:vregbank(<32 x s8>), %102:gprregbank(s32)
%12:vregbank(<32 x s8>) = COPY %25
PseudoRET implicit $lr, implicit %103, implicit %12
...

# This tests that we don't combine if one of the load's defs (data) is used before the VUNPACK instruction (no single use)

---
name: VLD_UNPACK_use
name: VLD_UNPACK_use_between
alignment: 16
legalized: true
regBankSelected: true
body: |
bb.1.entry:
liveins: $p0
; CHECK-LABEL: name: VLD_UNPACK_use
; CHECK-LABEL: name: VLD_UNPACK_use_between
; CHECK: liveins: $p0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:em = COPY [[COPY1]]
; CHECK-NEXT: [[VLD_pstm_pseudo:%[0-9]+]]:vec256, [[VLD_pstm_pseudo1:%[0-9]+]]:ep = VLD_pstm_pseudo [[COPY]], [[COPY2]] :: (load (<32 x s8>))
; CHECK-NEXT: $m1 = COPY [[VLD_pstm_pseudo1]]
; CHECK-NEXT: [[COPY3:%[0-9]+]]:ep = COPY $m1
; CHECK-NEXT: [[VUNPACK_S16_S8_:%[0-9]+]]:vec512 = VUNPACK_S16_S8 [[VLD_pstm_pseudo]]
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VUNPACK_S16_S8_]], implicit [[COPY3]]
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VUNPACK_S16_S8_]], implicit [[VLD_pstm_pseudo]]
%0:ptrregbank(p0) = COPY $p0
%1:gprregbank(s32) = COPY $r0
%7:modregbank(s20) = G_TRUNC %1
%102:gprregbank(s32) = G_CONSTANT i32 1
%25:vregbank(<32 x s8>), %19:ptrregbank(p0) = G_AIE_POSTINC_LOAD %0, %7 :: (load (<32 x s8>))
$m1 = COPY %19:ptrregbank(p0)
%12:ptrregbank(p0) = COPY $m1
%12:vregbank(<32 x s8>) = COPY %25
%103:vregbank(<32 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2.unpack.I16.I8), %25:vregbank(<32 x s8>), %102:gprregbank(s32)
PseudoRET implicit $lr, implicit %103, implicit %12
...

# This tests that we don't combine if a store is in between the load and VUNPACK instruction
---
name: VLD_UNPACK_store
name: VLD_UNPACK_side_effects_between
alignment: 16
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $p0, $m0, $m1, $m2, $r0, $amll0, $p1
; CHECK-LABEL: name: VLD_UNPACK_store
; CHECK: liveins: $p0, $m0, $m1, $m2, $r0, $amll0, $p1
body: |
bb.1.entry:
liveins: $p0
; CHECK-LABEL: name: VLD_UNPACK_side_effects_between
; CHECK: liveins: $p0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r0
; CHECK-NEXT: [[COPY3:%[0-9]+]]:em = COPY [[COPY2]]
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vec256 = COPY $amll0
; CHECK-NEXT: [[VLD_pstm_pseudo:%[0-9]+]]:vec256, [[VLD_pstm_pseudo1:%[0-9]+]]:ep = VLD_pstm_pseudo [[COPY]], [[COPY3]] :: (load (<32 x s8>))
; CHECK-NEXT: VST_dmw_sts_w_ag_idx_imm [[COPY4]], [[COPY1]], 0 :: (store (<32 x s8>))
; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:em = COPY [[COPY1]]
; CHECK-NEXT: [[VLD_pstm_pseudo:%[0-9]+]]:vec256, [[VLD_pstm_pseudo1:%[0-9]+]]:ep = VLD_pstm_pseudo [[COPY]], [[COPY2]] :: (load (<32 x s8>))
; CHECK-NEXT: $crunpacksign = COPY [[COPY1]]
; CHECK-NEXT: [[VUNPACK_S16_S8_:%[0-9]+]]:vec512 = VUNPACK_S16_S8 [[VLD_pstm_pseudo]]
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VUNPACK_S16_S8_]]
%0:ptrregbank(p0) = COPY $p0
%20:ptrregbank(p0) = COPY $p1
%1:gprregbank(s32) = COPY $r0
%7:modregbank(s20) = G_TRUNC %1
%102:gprregbank(s32) = G_CONSTANT i32 1
%80:vregbank(<32 x s8>) = COPY $amll0
%25:vregbank(<32 x s8>), %19:ptrregbank(p0) = G_AIE_POSTINC_LOAD %0, %7 :: (load (<32 x s8>))
G_STORE %80:vregbank(<32 x s8>), %20:ptrregbank(p0) :: (store (<32 x s8>))
%110:gprregbank(s32) = G_CONSTANT i32 11
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2.set.ctrl.reg), %110:gprregbank(s32), %1:gprregbank(s32)
%103:vregbank(<32 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2.unpack.I16.I8), %25:vregbank(<32 x s8>), %102:gprregbank(s32)
PseudoRET implicit $lr, implicit %103
...
Original file line number Diff line number Diff line change
Expand Up @@ -267,3 +267,66 @@ body: |
%106:vregbank(<32 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2.unpack.I16.I8), %28:vregbank(<32 x s8>), %102:gprregbank(s32)
PseudoRET implicit $lr, implicit %103, implicit %104, implicit %105, implicit %106
...

# We can combine with a store in between.

---
name: VLDB_POSTINC_UNPACK_STORE_between
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $p0, $m0, $m1, $m2, $r0, $amll0, $p1
; CHECK-LABEL: name: VLDB_POSTINC_UNPACK_STORE_between
; CHECK: liveins: $p0, $m0, $m1, $m2, $r0, $amll0, $p1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r0
; CHECK-NEXT: [[COPY3:%[0-9]+]]:em = COPY [[COPY2]]
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vec256 = COPY $amll0
; CHECK-NEXT: [[VLDB_UNPACK_S16_S8_ag_pstm_nrm:%[0-9]+]]:vec512, [[VLDB_UNPACK_S16_S8_ag_pstm_nrm1:%[0-9]+]]:ep = VLDB_UNPACK_S16_S8_ag_pstm_nrm [[COPY]], [[COPY3]] :: (load (<32 x s8>))
; CHECK-NEXT: VST_dmw_sts_w_ag_idx_imm [[COPY4]], [[COPY1]], 0 :: (store (<32 x s8>))
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VLDB_UNPACK_S16_S8_ag_pstm_nrm]]
%0:ptrregbank(p0) = COPY $p0
%20:ptrregbank(p0) = COPY $p1
%1:gprregbank(s32) = COPY $r0
%7:modregbank(s20) = G_TRUNC %1
%102:gprregbank(s32) = G_CONSTANT i32 1
%80:vregbank(<32 x s8>) = COPY $amll0
%25:vregbank(<32 x s8>), %19:ptrregbank(p0) = G_AIE_POSTINC_LOAD %0, %7 :: (load (<32 x s8>))
G_STORE %80:vregbank(<32 x s8>), %20:ptrregbank(p0) :: (store (<32 x s8>))
%103:vregbank(<32 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2.unpack.I16.I8), %25:vregbank(<32 x s8>), %102:gprregbank(s32)
PseudoRET implicit $lr, implicit %103
...

# We combine if we use the resulting pointer (not the loaded data).
---
name: VLD_UNPACK_use_ptr
alignment: 16
legalized: true
regBankSelected: true
body: |
bb.1.entry:
liveins: $p0
; CHECK-LABEL: name: VLD_UNPACK_use_ptr
; CHECK: liveins: $p0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:em = COPY [[COPY1]]
; CHECK-NEXT: [[VLDB_UNPACK_S16_S8_ag_pstm_nrm:%[0-9]+]]:vec512, [[VLDB_UNPACK_S16_S8_ag_pstm_nrm1:%[0-9]+]]:ep = VLDB_UNPACK_S16_S8_ag_pstm_nrm [[COPY]], [[COPY2]] :: (load (<32 x s8>))
; CHECK-NEXT: $m1 = COPY [[VLDB_UNPACK_S16_S8_ag_pstm_nrm1]]
; CHECK-NEXT: [[COPY3:%[0-9]+]]:ep = COPY $m1
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VLDB_UNPACK_S16_S8_ag_pstm_nrm]], implicit [[COPY3]]
%0:ptrregbank(p0) = COPY $p0
%1:gprregbank(s32) = COPY $r0
%7:modregbank(s20) = G_TRUNC %1
%102:gprregbank(s32) = G_CONSTANT i32 1
%25:vregbank(<32 x s8>), %19:ptrregbank(p0) = G_AIE_POSTINC_LOAD %0, %7 :: (load (<32 x s8>))
$m1 = COPY %19:ptrregbank(p0)
%12:ptrregbank(p0) = COPY $m1
%103:vregbank(<32 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2.unpack.I16.I8), %25:vregbank(<32 x s8>), %102:gprregbank(s32)
PseudoRET implicit $lr, implicit %103, implicit %12
...

0 comments on commit 2a76196

Please sign in to comment.