Skip to content

Commit

Permalink
Combine VInsert sequence in PreLegalizerCombiner
Browse files Browse the repository at this point in the history
  • Loading branch information
abhinay-anubola committed Oct 29, 2024
1 parent 70209f9 commit 4c793d4
Show file tree
Hide file tree
Showing 11 changed files with 595 additions and 60 deletions.
66 changes: 57 additions & 9 deletions llvm/lib/Target/AIE/AIE2InstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,11 @@ class AIE2InstructionSelector : public InstructionSelector {
bool selectG_AIE_EXTRACT_VECTOR_ELT(MachineInstr &I,
MachineRegisterInfo &MRI);
bool selectG_AIE_INSERT_VECTOR_ELT(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectG_AIE_PAD_VECTOR_UNDEF(MachineInstr &I, MachineOperand &DstReg,
MachineOperand &SrcReg,
MachineRegisterInfo &MRI);
bool selectG_AIE_UNPAD_VECTOR(MachineInstr &I, Register DstReg,
Register SrcReg, MachineRegisterInfo &MRI);
bool selectSetI128(MachineInstr &I, MachineOperand &DstReg,
MachineOperand &SrcReg, MachineRegisterInfo &MRI);
bool selectExtractI128(MachineInstr &I, Register DstReg, Register SrcReg,
Expand Down Expand Up @@ -762,10 +767,11 @@ bool AIE2InstructionSelector::select(MachineInstr &I) {
case AIE2::G_AIE_INSERT_VECTOR_ELT:
return selectG_AIE_INSERT_VECTOR_ELT(I, MRI);
case AIE2::G_AIE_PAD_VECTOR_UNDEF:
return selectSetI128(I, I.getOperand(0), I.getOperand(1), MRI);
return selectG_AIE_PAD_VECTOR_UNDEF(I, I.getOperand(0), I.getOperand(1),
MRI);
case AIE2::G_AIE_UNPAD_VECTOR:
return selectExtractI128(I, I.getOperand(0).getReg(),
I.getOperand(1).getReg(), MRI);
return selectG_AIE_UNPAD_VECTOR(I, I.getOperand(0).getReg(),
I.getOperand(1).getReg(), MRI);
default:
return selectImpl(I, *CoverageInfo);
}
Expand Down Expand Up @@ -812,13 +818,13 @@ bool AIE2InstructionSelector::selectG_AIE_ADD_VECTOR_ELT_HI(
unsigned Opcode;
switch (VecEltDstTySize) {
case 8:
Opcode = AIE2::VPUSH_LO_8;
Opcode = AIE2::VPUSH_HI_8;
break;
case 16:
Opcode = AIE2::VPUSH_LO_16;
Opcode = AIE2::VPUSH_HI_16;
break;
case 32:
Opcode = AIE2::VPUSH_LO_32;
Opcode = AIE2::VPUSH_HI_32;
break;
case 64:
llvm_unreachable("Unexpected accumulator vector in selection of "
Expand All @@ -828,9 +834,7 @@ bool AIE2InstructionSelector::selectG_AIE_ADD_VECTOR_ELT_HI(
"Unexpected vector size in selection of G_AIE_ADD_VECTOR_ELT_HI");
}

// This is the opposite order from the ISA which expects vector, value. This
// is choice made in TD which takes it in this opposite order.
MachineInstr &MI = *MIB.buildInstr(Opcode, {Dst}, {Value, Src});
MachineInstr &MI = *MIB.buildInstr(Opcode, {Dst}, {Src, Value});
I.eraseFromParent();

return constrainSelectedInstRegOperands(MI, TII, TRI, RBI);
Expand Down Expand Up @@ -4980,6 +4984,27 @@ bool AIE2InstructionSelector::selectExtractI128(MachineInstr &I,
return true;
}

bool AIE2InstructionSelector::selectG_AIE_UNPAD_VECTOR(
MachineInstr &I, Register DstReg, Register SrcReg,
MachineRegisterInfo &MRI) {
const LLT DstTy = MRI.getType(DstReg);
if (DstTy.getSizeInBits() == 128)
return selectExtractI128(I, DstReg, SrcReg, MRI);

assert(DstTy.getSizeInBits() == 256);
const LLT SrcTy = MRI.getType(SrcReg);
const unsigned SrcTySize = SrcTy.getSizeInBits();
assert(SrcTySize == 512);

// Select using a COPY to a 256-bit register.
MachineInstr *CopyMI = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
.addReg(SrcReg, 0, AIE2::sub_256_lo);
constrainOperandRegClass(*MF, TRI, MRI, TII, RBI, *CopyMI,
AIE2::VEC256RegClass, CopyMI->getOperand(0));
I.eraseFromParent();
return true;
}

// Select set 128-bit Intrinsics
bool AIE2InstructionSelector::selectSetI128(MachineInstr &I,
MachineOperand &DstReg,
Expand Down Expand Up @@ -5015,6 +5040,29 @@ bool AIE2InstructionSelector::selectSetI128(MachineInstr &I,
return true;
}

bool AIE2InstructionSelector::selectG_AIE_PAD_VECTOR_UNDEF(
MachineInstr &I, MachineOperand &DstReg, MachineOperand &SrcReg,
MachineRegisterInfo &MRI) {
const LLT SrcTy = MRI.getType(SrcReg.getReg());
if (SrcTy.getSizeInBits() == 128)
return selectSetI128(I, DstReg, SrcReg, MRI);

assert(SrcTy.getSizeInBits() == 256);
const LLT DstTy = MRI.getType(DstReg.getReg());
const unsigned DstTySize = DstTy.getSizeInBits();
assert(DstTySize == 512);

// Constrain input vector to VEC256 RC, and output to VEC512
const TargetRegisterClass &OutRC = AIE2::VEC512RegClass;
constrainOperandRegClass(*MF, TRI, MRI, TII, RBI, I, AIE2::VEC256RegClass,
SrcReg);
constrainOperandRegClass(*MF, TRI, MRI, TII, RBI, I, OutRC, DstReg);
MIB.buildInstr(AIE2::REG_SEQUENCE, {DstReg}, {SrcReg})
.addImm(AIE2::sub_256_lo);
I.eraseFromParent();
return true;
}

bool AIE2InstructionSelector::selectGetSS(MachineInstr &I,
MachineRegisterInfo &MRI) {
Register ValReg = I.getOperand(0).getReg();
Expand Down
92 changes: 92 additions & 0 deletions llvm/lib/Target/AIE/AIE2PreLegalizerCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ class AIE2PreLegalizerCombinerImpl : public Combiner {
mutable CombinerHelper Helper;
const AIE2PreLegalizerCombinerImplRuleConfig &RuleConfig;
const AIE2Subtarget &STI;
std::map<unsigned, Register>
getVectorInsertIndices(MachineInstr *CurMI, unsigned SclSrcBits,
MachineRegisterInfo &MRI) const;

public:
AIE2PreLegalizerCombinerImpl(
Expand All @@ -75,6 +78,8 @@ class AIE2PreLegalizerCombinerImpl : public Combiner {

bool tryToCombineSetExtract(MachineInstr &MI) const;

bool tryToCombineVectorInserts(MachineInstr &MI, unsigned SclSrcBits) const;

bool tryToCombineIntrinsic(MachineInstr &MI) const;

private:
Expand Down Expand Up @@ -181,6 +186,90 @@ bool AIE2PreLegalizerCombinerImpl::tryToCombineSetExtract(
return true;
}

// Returns a map with InsertIndices and registers holding the insert values.
std::map<unsigned, Register>
AIE2PreLegalizerCombinerImpl::getVectorInsertIndices(
MachineInstr *CurMI, unsigned SclSrcBits, MachineRegisterInfo &MRI) const {
std::map<unsigned, Register> RegMap;
auto Is8BitVInsert = [](const MachineInstr *MI) {
return isa<GIntrinsic>(MI) && cast<GIntrinsic>(*MI).getIntrinsicID() ==
Intrinsic::aie2_vinsert8_I512;
};
auto IsSet = [](const MachineInstr *MI) {
return isa<GIntrinsic>(MI) && (cast<GIntrinsic>(*MI).getIntrinsicID() ==
Intrinsic::aie2_set_I512_I128 ||
cast<GIntrinsic>(*MI).getIntrinsicID() ==
Intrinsic::aie2_set_I512_I256);
};

while (Is8BitVInsert(CurMI)) {
// In this case of G_INTRINSIC operand 1 is target intrinsic
const Register SrcReg = CurMI->getOperand(2).getReg();
const Register IdxReg = CurMI->getOperand(3).getReg();
const Register SclSrcReg = CurMI->getOperand(4).getReg();

// Collecting registers and their indices
auto Cst = getIConstantVRegValWithLookThrough(IdxReg, MRI);
if (!Cst ||
!RegMap.try_emplace(Cst->Value.getZExtValue(), SclSrcReg).second)
return {};
CurMI = getDefIgnoringCopies(SrcReg, MRI);

// Combining Set and Extract to fetch next VInsert
if (IsSet(CurMI) && tryToCombineSetExtract(*CurMI))
CurMI = getDefIgnoringCopies(SrcReg, MRI);
}

// For 128/256-bit vectors, not all lanes are explicitly defined. If the
// source MI is identified as a Set intrinsic that sets the required lanes,
// the transformation can proceed safely.
if (!IsSet(CurMI))
return {};
unsigned DstRegBits =
MRI.getType(CurMI->getOperand(2).getReg()).getSizeInBits();
// Check for the right amount of lanes matching the size of input vector of
// Set instrinsic.
if (DstRegBits != RegMap.size() * SclSrcBits)
return {};
return RegMap;
}

/// Look for VINSERT sequence that can be rewritten as G_BUILD_VECTOR_TRUNC
bool AIE2PreLegalizerCombinerImpl::tryToCombineVectorInserts(
MachineInstr &MI, unsigned SclSrcBits) const {
std::map<unsigned, Register> RegMap;
MachineInstr *CurMI = &MI;
const Register DstReg = MI.getOperand(0).getReg();
unsigned DstRegBits = MRI.getType(DstReg).getSizeInBits();

auto InsertIndices = getVectorInsertIndices(CurMI, SclSrcBits, MRI);
unsigned DstRegLen = InsertIndices.size();
if (DstRegLen == 0)
return false;

MachineIRBuilder MIRBuilder(MI);
SmallVector<Register, 16> Regs;
// Collect registers in order for G_BUILD_VECTOR_TRUNC
for (unsigned I = 0; I < DstRegLen; I++) {
auto It = InsertIndices.find(I);
if (It == InsertIndices.end())
return false;
Regs.push_back(It->second);
}
Register DstRegTrunc = MRI.createGenericVirtualRegister(
LLT::fixed_vector(DstRegLen, SclSrcBits));
Register DstRegPad = MRI.createGenericVirtualRegister(
LLT::fixed_vector(DstRegBits / SclSrcBits, SclSrcBits));

MIRBuilder.buildBuildVectorTrunc(DstRegTrunc, Regs);
MIRBuilder.buildInstr(AIE2::G_AIE_PAD_VECTOR_UNDEF, {DstRegPad},
{DstRegTrunc});
MIRBuilder.buildBitcast(DstReg, DstRegPad);

MI.eraseFromParent();
return true;
}

bool AIE2PreLegalizerCombinerImpl::tryToCombineIntrinsic(
MachineInstr &MI) const {

Expand All @@ -194,6 +283,9 @@ bool AIE2PreLegalizerCombinerImpl::tryToCombineIntrinsic(
case Intrinsic::aie2_set_I512_I256: {
return Combine256To512SetExtract && tryToCombineSetExtract(MI);
}
case Intrinsic::aie2_vinsert8_I512: {
return tryToCombineVectorInserts(MI, 8);
}
default:
break;
}
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AIE/AIEBaseInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -720,7 +720,7 @@ AIEBaseInstrInfo::getSchedClass(const MCInstrDesc &Desc,

bool AIEBaseInstrInfo::isLegalTypeToPad(const LLT &Ty,
StringRef *ErrInfo) const {
if (Ty.isVector() && Ty.getSizeInBits() == 128)
if (Ty.isVector() && (Ty.getSizeInBits() == 128 || Ty.getSizeInBits() == 256))
return true;
if (ErrInfo)
*ErrInfo = "Operand size is illegal";
Expand Down
Loading

0 comments on commit 4c793d4

Please sign in to comment.