Skip to content

Commit

Permalink
Support for allowing direct VEXTRACT to 20-bit registers
Browse files Browse the repository at this point in the history
  • Loading branch information
abhinay-anubola committed Nov 20, 2024
1 parent 02d7e59 commit 6138a60
Show file tree
Hide file tree
Showing 4 changed files with 336 additions and 12 deletions.
5 changes: 3 additions & 2 deletions llvm/lib/Target/AIE/AIE2InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,9 @@ bool AIE2InstrInfo::verifyGenericInstruction(const MachineInstr &MI,
switch (MI.getOpcode()) {
case AIE2::G_AIE_ZEXT_EXTRACT_VECTOR_ELT:
case AIE2::G_AIE_SEXT_EXTRACT_VECTOR_ELT:
ErrInfo = "Expected 32bit scalar destination";
return MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(32);
ErrInfo = "Expected 32bit or 20bit scalar destination";
return (MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(32) ||
MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(20));
case AIE2::G_AIE_PAD_VECTOR_UNDEF:
return verifySameLaneTypes(MI, ErrInfo) &&
isLegalTypeToUnpad(MRI.getType(MI.getOperand(0).getReg()),
Expand Down
48 changes: 39 additions & 9 deletions llvm/lib/Target/AIE/AIECombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,18 @@ void llvm::applyGlobalValOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
B.buildConstant(LLT::scalar(20), -static_cast<int64_t>(Offset)));
}

/// Determine if the instruction is a generic extract vector element operation
static bool IsGenericExtractVectorElt(const MachineInstr &MI) {
const AIEBaseSubtarget &STI = AIEBaseSubtarget::get(*MI.getMF());
const AIEBaseInstrInfo *TII = STI.getInstrInfo();
const unsigned Opcode = MI.getOpcode();

if (Opcode == TII->getGenericExtractVectorEltOpcode(false) ||
Opcode == TII->getGenericExtractVectorEltOpcode(true))
return true;
return false;
}

/// Checks whether the instruction produces or can be adapted to produce
/// a single S20 output.
static bool canProduceS20(const MachineRegisterInfo &MRI,
Expand All @@ -581,9 +593,12 @@ static bool canProduceS20(const MachineRegisterInfo &MRI,
case TargetOpcode::G_CONSTANT:
case TargetOpcode::G_IMPLICIT_DEF:
return true;
default:
default: {
if (IsGenericExtractVectorElt(MI))
return true;
return false;
}
}
}

/// The function checks if the node can be adapted to produce an S20 value, and
Expand Down Expand Up @@ -901,15 +916,19 @@ bool modifyToS20(InstrNode Start, MachineRegisterInfo &MRI, MachineIRBuilder &B,
return true;
}
default: {
LLVM_DEBUG(dbgs() << "Node :" << *StartNodeMI);
llvm_unreachable("Unexpected OpCode, while modifying IR");
if (IsGenericExtractVectorElt(*StartNodeMI)) {
Observer.changingInstr(*StartNodeMI);
MRI.setType(StartNodeMI->getOperand(0).getReg(), S20);
Observer.changedInstr(*StartNodeMI);
} else {
LLVM_DEBUG(dbgs() << "Node :" << *StartNodeMI);
llvm_unreachable("Unexpected OpCode, while modifying IR");
}
}
}

switch (StartNodeMI->getOpcode()) {
case TargetOpcode::COPY:
case TargetOpcode::G_LOAD:
case TargetOpcode::G_PHI: {
// Function to handle the modification of instructions
auto ModifyInstructionUses = [&](MachineInstr *StartNodeMI) {
const auto UseInstIter =
MRI.use_nodbg_instructions(StartNodeMI->getOperand(0).getReg());
std::vector<MachineInstr *> UseInstr;
Expand All @@ -924,11 +943,22 @@ bool modifyToS20(InstrNode Start, MachineRegisterInfo &MRI, MachineIRBuilder &B,
if (!modifyToS20(NextNodeToModify, MRI, B, Observer, Helper))
llvm_unreachable("All input nodes should have updated");
}
};

switch (StartNodeMI->getOpcode()) {
case TargetOpcode::COPY:
case TargetOpcode::G_LOAD:
case TargetOpcode::G_PHI: {
ModifyInstructionUses(StartNodeMI);
break;
}
default: {
LLVM_DEBUG(dbgs() << "Node :" << *StartNodeMI);
llvm_unreachable("Unexpected OpCode, while modifying IR");
if (IsGenericExtractVectorElt(*StartNodeMI)) {
ModifyInstructionUses(StartNodeMI);
} else {
LLVM_DEBUG(dbgs() << "Node :" << *StartNodeMI);
llvm_unreachable("Unexpected OpCode, while modifying IR");
}
}
}
return true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -870,3 +870,294 @@ body: |
G_STORE %50:_(s16), %6:_(p0) :: (store (s16))
G_BR %bb.2
...

---
name: valid_vextract8_add2d
legalized: false
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $x0
; CHECK-LABEL: name: valid_vextract8_add2d
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<64 x s8>) = COPY $x0
; CHECK-NEXT: [[AIE_ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT [[COPY]](<64 x s8>), [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p0), [[INT1:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20)
; CHECK-NEXT: $p0 = COPY [[INT]](p0)
%0:_(s32) = G_CONSTANT i32 7
%1:_(s32) = G_CONSTANT i32 0
%2:_(<64 x s8>) = COPY $x0
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %2(<64 x s8>), %0(s32), %1(s32)
%4:_(s20) = G_TRUNC %3(s32)
%5:_(p0) = G_CONSTANT i20 0
%6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20)
$p0 = COPY %6
...

---
name: valid_vextract16_add2d
legalized: false
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $x0
; CHECK-LABEL: name: valid_vextract16_add2d
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s16>) = COPY $x0
; CHECK-NEXT: [[AIE_ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT [[COPY]](<32 x s16>), [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p0), [[INT1:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20)
; CHECK-NEXT: $p0 = COPY [[INT]](p0)
%0:_(s32) = G_CONSTANT i32 7
%1:_(s32) = G_CONSTANT i32 0
%2:_(<32 x s16>) = COPY $x0
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %2(<32 x s16>), %0(s32), %1(s32)
%4:_(s20) = G_TRUNC %3(s32)
%5:_(p0) = G_CONSTANT i20 0
%6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20)
$p0 = COPY %6
...

---
name: valid_vextract32_add2d
legalized: false
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $x0
; CHECK-LABEL: name: valid_vextract32_add2d
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0
; CHECK-NEXT: [[AIE_ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p0), [[INT1:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20)
; CHECK-NEXT: $p0 = COPY [[INT]](p0)
%0:_(s32) = G_CONSTANT i32 7
%1:_(s32) = G_CONSTANT i32 0
%2:_(<16 x s32>) = COPY $x0
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem32.I512), %2(<16 x s32>), %0(s32), %1(s32)
%4:_(s20) = G_TRUNC %3(s32)
%5:_(p0) = G_CONSTANT i20 0
%6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20)
$p0 = COPY %6
...

# Only one Src Node (vextract8) for G_PTR_ADD that is narrowed to S20 type, intermediate G_TRUNC is removed
---
name: valid_vextract8_PTR_ADD
legalized: false
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: valid_vextract8_PTR_ADD
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $p0, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<64 x s8>) = COPY $x0
; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY1]](<64 x s8>), [[C]](s32)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s20)
; CHECK-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32))
; CHECK-NEXT: G_BR %bb.1
bb.1:
successors: %bb.2(0x80000000); %bb.2(100.00%)
liveins: $p0, $x0
%0:_(p0) = COPY $p0
%1:_(s32) = G_CONSTANT i32 0
%2:_(s32) = G_CONSTANT i32 1
%3:_(<64 x s8>) = COPY $x0
%4:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %3(<64 x s8>), %1(s32), %2(s32)
bb.2:
successors: %bb.2(0x80000000); %bb.2(100.00%)
%5:_(s20) = G_TRUNC %4:_(s32)
%6:_(p0) = G_PTR_ADD %0:_, %5:_(s20)
G_STORE %1:_(s32), %6:_(p0) :: (store (s32))
G_BR %bb.2
...

# Only one Src Node (vextract16) for G_PTR_ADD that is narrowed to S20 type, intermediate G_TRUNC is removed
---
name: valid_vextract16_PTR_ADD
legalized: false
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: valid_vextract16_PTR_ADD
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $p0, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<32 x s16>) = COPY $x0
; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY1]](<32 x s16>), [[C]](s32)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s20)
; CHECK-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32))
; CHECK-NEXT: G_BR %bb.1
bb.1:
successors: %bb.2(0x80000000); %bb.2(100.00%)
liveins: $p0, $x0
%0:_(p0) = COPY $p0
%1:_(s32) = G_CONSTANT i32 0
%2:_(s32) = G_CONSTANT i32 1
%3:_(<32 x s16>) = COPY $x0
%4:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %3(<32 x s16>), %1(s32), %2(s32)
bb.2:
successors: %bb.2(0x80000000); %bb.2(100.00%)
%5:_(s20) = G_TRUNC %4:_(s32)
%6:_(p0) = G_PTR_ADD %0:_, %5:_(s20)
G_STORE %1:_(s32), %6:_(p0) :: (store (s32))
G_BR %bb.2
...

# Only one Src Node (vextract32) for G_PTR_ADD that is narrowed to S20 type, intermediate G_TRUNC is removed
---
name: valid_vextract32_PTR_ADD
legalized: false
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: valid_vextract32_PTR_ADD
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $p0, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s32>) = COPY $x0
; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY1]](<16 x s32>), [[C]](s32)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s20)
; CHECK-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32))
; CHECK-NEXT: G_BR %bb.1
bb.1:
successors: %bb.2(0x80000000); %bb.2(100.00%)
liveins: $p0, $x0
%0:_(p0) = COPY $p0
%1:_(s32) = G_CONSTANT i32 0
%2:_(s32) = G_CONSTANT i32 1
%3:_(<16 x s32>) = COPY $x0
%4:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem32.I512), %3(<16 x s32>), %1(s32), %2(s32)
bb.2:
successors: %bb.2(0x80000000); %bb.2(100.00%)
%5:_(s20) = G_TRUNC %4:_(s32)
%6:_(p0) = G_PTR_ADD %0:_, %5:_(s20)
G_STORE %1:_(s32), %6:_(p0) :: (store (s32))
G_BR %bb.2
...

# Negative Test Case: Narrowing to s20 is not possible because the vextract8 source node has a non-constant sign register
---
name: valid_vextract8_add2d_neg
legalized: false
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $x0, $r0
; CHECK-LABEL: name: valid_vextract8_add2d_neg
; CHECK: liveins: $x0, $r0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<64 x s8>) = COPY $x0
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), [[COPY1]](<64 x s8>), [[C]](s32), [[COPY]](s32)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[INT]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(p0), [[INT2:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20)
; CHECK-NEXT: $p0 = COPY [[INT1]](p0)
%0:_(s32) = G_CONSTANT i32 7
%1:_(s32) = COPY $r0
%2:_(<64 x s8>) = COPY $x0
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %2(<64 x s8>), %0(s32), %1(s32)
%4:_(s20) = G_TRUNC %3(s32)
%5:_(p0) = G_CONSTANT i20 0
%6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20)
$p0 = COPY %6
...

# Negative Test Case: Narrowing to s20 is not possible because the vextract16 source node has a non-constant sign register
---
name: valid_vextract16_add2d_neg
legalized: false
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $x0, $r0
; CHECK-LABEL: name: valid_vextract16_add2d_neg
; CHECK: liveins: $x0, $r0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<32 x s16>) = COPY $x0
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), [[COPY1]](<32 x s16>), [[C]](s32), [[COPY]](s32)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[INT]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(p0), [[INT2:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20)
; CHECK-NEXT: $p0 = COPY [[INT1]](p0)
%0:_(s32) = G_CONSTANT i32 7
%1:_(s32) = COPY $r0
%2:_(<32 x s16>) = COPY $x0
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %2(<32 x s16>), %0(s32), %1(s32)
%4:_(s20) = G_TRUNC %3(s32)
%5:_(p0) = G_CONSTANT i20 0
%6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20)
$p0 = COPY %6
...

# Negative Test Case: Narrowing to s20 is not possible because the vextract32 source node has a non-constant sign register
---
name: valid_vextract32_add2d_neg
legalized: false
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $x0, $r0
; CHECK-LABEL: name: valid_vextract32_add2d_neg
; CHECK: liveins: $x0, $r0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s32>) = COPY $x0
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem32.I512), [[COPY1]](<16 x s32>), [[C]](s32), [[COPY]](s32)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[INT]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(p0), [[INT2:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20)
; CHECK-NEXT: $p0 = COPY [[INT1]](p0)
%0:_(s32) = G_CONSTANT i32 7
%1:_(s32) = COPY $r0
%2:_(<16 x s32>) = COPY $x0
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem32.I512), %2(<16 x s32>), %0(s32), %1(s32)
%4:_(s20) = G_TRUNC %3(s32)
%5:_(p0) = G_CONSTANT i20 0
%6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20)
$p0 = COPY %6
...
Loading

0 comments on commit 6138a60

Please sign in to comment.