Skip to content

Commit

Permalink
Add support for bfloat in VInsert PreLegalizerCombiner
Browse files Browse the repository at this point in the history
  • Loading branch information
abhinay-anubola committed Nov 21, 2024
1 parent 0b3a94c commit d6afe1c
Show file tree
Hide file tree
Showing 2 changed files with 232 additions and 1 deletion.
6 changes: 5 additions & 1 deletion llvm/lib/Target/AIE/AIE2PreLegalizerCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ unsigned getVInsertScalarSize(unsigned IntrinsicID) {
case Intrinsic::aie2_vinsert8_I512:
return 8;
case Intrinsic::aie2_vinsert16_I512:
case Intrinsic::aie2_vinsert16_bf512:
return 16;
case Intrinsic::aie2_vinsert32_I512:
return 32;
Expand All @@ -226,7 +227,9 @@ AIE2PreLegalizerCombinerImpl::getVectorInsertIndices(
return isa<GIntrinsic>(MI) && (cast<GIntrinsic>(*MI).getIntrinsicID() ==
Intrinsic::aie2_set_I512_I128 ||
cast<GIntrinsic>(*MI).getIntrinsicID() ==
Intrinsic::aie2_set_I512_I256);
Intrinsic::aie2_set_I512_I256 ||
cast<GIntrinsic>(*MI).getIntrinsicID() ==
Intrinsic::aie2_set_bf512_bf256);
};

while (IsVInsert(CurMI, SclSrcBits)) {
Expand Down Expand Up @@ -317,6 +320,7 @@ bool AIE2PreLegalizerCombinerImpl::tryToCombineIntrinsic(
}
case Intrinsic::aie2_vinsert8_I512:
case Intrinsic::aie2_vinsert16_I512:
case Intrinsic::aie2_vinsert16_bf512:
case Intrinsic::aie2_vinsert32_I512: {
return tryToCombineVectorInserts(MI, getVInsertScalarSize(IntrinsicID));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -707,3 +707,230 @@ body: |
%24:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.I512), %23(<16 x s32>), %8(s32), %2(s32)
PseudoRET implicit $lr, implicit %24
...

---
name: vinsert16-bf256
legalized: false
body: |
bb.1.entry:
; CHECK-LABEL: name: vinsert16-bf256
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<16 x s16>) = G_BUILD_VECTOR_TRUNC [[C15]](s32), [[C13]](s32), [[C11]](s32), [[C9]](s32), [[C7]](s32), [[C5]](s32), [[C3]](s32), [[C1]](s32), [[C]](s32), [[C2]](s32), [[C4]](s32), [[C6]](s32), [[C8]](s32), [[C10]](s32), [[C12]](s32), [[C14]](s32)
; CHECK-NEXT: [[AIE_PAD_VECTOR_UNDEF:%[0-9]+]]:_(<32 x s16>) = G_AIE_PAD_VECTOR_UNDEF [[BUILD_VECTOR_TRUNC]](<16 x s16>)
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s32>) = G_BITCAST [[AIE_PAD_VECTOR_UNDEF]](<32 x s16>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<16 x s32>)
%0:_(<16 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2.v16bfloat16)
%100:_(<8 x s32>) = G_BITCAST %0(<16 x s16>)
%1:_(s32) = G_CONSTANT i32 0
%2:_(s32) = G_CONSTANT i32 1
%3:_(s32) = G_CONSTANT i32 2
%4:_(s32) = G_CONSTANT i32 3
%5:_(s32) = G_CONSTANT i32 4
%6:_(s32) = G_CONSTANT i32 5
%7:_(s32) = G_CONSTANT i32 6
%8:_(s32) = G_CONSTANT i32 7
%9:_(s32) = G_CONSTANT i32 8
%10:_(s32) = G_CONSTANT i32 9
%11:_(s32) = G_CONSTANT i32 10
%12:_(s32) = G_CONSTANT i32 11
%13:_(s32) = G_CONSTANT i32 12
%14:_(s32) = G_CONSTANT i32 13
%15:_(s32) = G_CONSTANT i32 14
%16:_(s32) = G_CONSTANT i32 15
%101:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.set.bf512.bf256), %100(<8 x s32>), %1(s32)
%17:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %101(<16 x s32>), %1(s32), %16(s32)
%18:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %17(<16 x s32>), %2(s32), %14(s32)
%19:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %18(<16 x s32>), %3(s32), %12(s32)
%20:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %19(<16 x s32>), %4(s32), %10(s32)
%21:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %20(<16 x s32>), %5(s32), %8(s32)
%22:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %21(<16 x s32>), %6(s32), %6(s32)
%23:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %22(<16 x s32>), %7(s32), %4(s32)
%24:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %23(<16 x s32>), %8(s32), %2(s32)
%25:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %24(<16 x s32>), %9(s32), %1(s32)
%26:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %25(<16 x s32>), %10(s32), %3(s32)
%27:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %26(<16 x s32>), %11(s32), %5(s32)
%28:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %27(<16 x s32>), %12(s32), %7(s32)
%29:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %28(<16 x s32>), %13(s32), %9(s32)
%30:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %29(<16 x s32>), %14(s32), %11(s32)
%31:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %30(<16 x s32>), %15(s32), %13(s32)
%32:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %31(<16 x s32>), %16(s32), %15(s32)
PseudoRET implicit $lr, implicit %32
...

# Negative Test Case: vinsert.16s did not combine into G_BUILD_VECTOR_TRUNC due to a missing vinsert.16 for an index
---
name: vinsert16-bf256_idx_miss
legalized: false
body: |
bb.1.entry:
; CHECK-LABEL: name: vinsert16-bf256_idx_miss
; CHECK: [[INT:%[0-9]+]]:_(<16 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2.v16bfloat16)
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[INT]](<16 x s16>)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.set.bf512.bf256), [[BITCAST]](<8 x s32>), [[C]](s32)
; CHECK-NEXT: [[INT2:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT1]](<16 x s32>), [[C]](s32), [[C15]](s32)
; CHECK-NEXT: [[INT3:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT2]](<16 x s32>), [[C1]](s32), [[C13]](s32)
; CHECK-NEXT: [[INT4:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT3]](<16 x s32>), [[C2]](s32), [[C11]](s32)
; CHECK-NEXT: [[INT5:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT4]](<16 x s32>), [[C3]](s32), [[C9]](s32)
; CHECK-NEXT: [[INT6:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT5]](<16 x s32>), [[C4]](s32), [[C7]](s32)
; CHECK-NEXT: [[INT7:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT6]](<16 x s32>), [[C5]](s32), [[C5]](s32)
; CHECK-NEXT: [[INT8:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT7]](<16 x s32>), [[C6]](s32), [[C3]](s32)
; CHECK-NEXT: [[INT9:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT8]](<16 x s32>), [[C7]](s32), [[C1]](s32)
; CHECK-NEXT: [[INT10:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT9]](<16 x s32>), [[C8]](s32), [[C]](s32)
; CHECK-NEXT: [[INT11:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT10]](<16 x s32>), [[C9]](s32), [[C2]](s32)
; CHECK-NEXT: [[INT12:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT11]](<16 x s32>), [[C10]](s32), [[C4]](s32)
; CHECK-NEXT: [[INT13:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT12]](<16 x s32>), [[C12]](s32), [[C8]](s32)
; CHECK-NEXT: [[INT14:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT13]](<16 x s32>), [[C13]](s32), [[C10]](s32)
; CHECK-NEXT: [[INT15:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT14]](<16 x s32>), [[C14]](s32), [[C12]](s32)
; CHECK-NEXT: [[INT16:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT15]](<16 x s32>), [[C15]](s32), [[C14]](s32)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[INT16]](<16 x s32>)
%0:_(<16 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2.v16bfloat16)
%100:_(<8 x s32>) = G_BITCAST %0(<16 x s16>)
%1:_(s32) = G_CONSTANT i32 0
%2:_(s32) = G_CONSTANT i32 1
%3:_(s32) = G_CONSTANT i32 2
%4:_(s32) = G_CONSTANT i32 3
%5:_(s32) = G_CONSTANT i32 4
%6:_(s32) = G_CONSTANT i32 5
%7:_(s32) = G_CONSTANT i32 6
%8:_(s32) = G_CONSTANT i32 7
%9:_(s32) = G_CONSTANT i32 8
%10:_(s32) = G_CONSTANT i32 9
%11:_(s32) = G_CONSTANT i32 10
%12:_(s32) = G_CONSTANT i32 11
%13:_(s32) = G_CONSTANT i32 12
%14:_(s32) = G_CONSTANT i32 13
%15:_(s32) = G_CONSTANT i32 14
%16:_(s32) = G_CONSTANT i32 15
%101:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.set.bf512.bf256), %100(<8 x s32>), %1(s32)
%17:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %101(<16 x s32>), %1(s32), %16(s32)
%18:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %17(<16 x s32>), %2(s32), %14(s32)
%19:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %18(<16 x s32>), %3(s32), %12(s32)
%20:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %19(<16 x s32>), %4(s32), %10(s32)
%21:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %20(<16 x s32>), %5(s32), %8(s32)
%22:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %21(<16 x s32>), %6(s32), %6(s32)
%23:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %22(<16 x s32>), %7(s32), %4(s32)
%24:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %23(<16 x s32>), %8(s32), %2(s32)
%25:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %24(<16 x s32>), %9(s32), %1(s32)
%26:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %25(<16 x s32>), %10(s32), %3(s32)
%27:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %26(<16 x s32>), %11(s32), %5(s32)
%28:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %27(<16 x s32>), %13(s32), %9(s32)
%29:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %28(<16 x s32>), %14(s32), %11(s32)
%30:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %29(<16 x s32>), %15(s32), %13(s32)
%31:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %30(<16 x s32>), %16(s32), %15(s32)
PseudoRET implicit $lr, implicit %31
...

# Negative Test Case: vinsert.16s did not combine into G_BUILD_VECTOR_TRUNC due to multiple vinsert.16 instructions for a single index
---
name: vinsert16-bf256_idx_multiple
legalized: false
body: |
bb.1.entry:
; CHECK-LABEL: name: vinsert16-bf256_idx_multiple
; CHECK: [[INT:%[0-9]+]]:_(<16 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2.v16bfloat16)
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[INT]](<16 x s16>)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.set.bf512.bf256), [[BITCAST]](<8 x s32>), [[C]](s32)
; CHECK-NEXT: [[INT2:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT1]](<16 x s32>), [[C]](s32), [[C15]](s32)
; CHECK-NEXT: [[INT3:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT2]](<16 x s32>), [[C1]](s32), [[C13]](s32)
; CHECK-NEXT: [[INT4:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT3]](<16 x s32>), [[C2]](s32), [[C11]](s32)
; CHECK-NEXT: [[INT5:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT4]](<16 x s32>), [[C3]](s32), [[C9]](s32)
; CHECK-NEXT: [[INT6:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT5]](<16 x s32>), [[C4]](s32), [[C7]](s32)
; CHECK-NEXT: [[INT7:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT6]](<16 x s32>), [[C5]](s32), [[C5]](s32)
; CHECK-NEXT: [[INT8:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT7]](<16 x s32>), [[C6]](s32), [[C3]](s32)
; CHECK-NEXT: [[INT9:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT8]](<16 x s32>), [[C7]](s32), [[C1]](s32)
; CHECK-NEXT: [[INT10:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT9]](<16 x s32>), [[C8]](s32), [[C]](s32)
; CHECK-NEXT: [[INT11:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT10]](<16 x s32>), [[C9]](s32), [[C2]](s32)
; CHECK-NEXT: [[INT12:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT11]](<16 x s32>), [[C10]](s32), [[C4]](s32)
; CHECK-NEXT: [[INT13:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT12]](<16 x s32>), [[C11]](s32), [[C6]](s32)
; CHECK-NEXT: [[INT14:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT13]](<16 x s32>), [[C12]](s32), [[C8]](s32)
; CHECK-NEXT: [[INT15:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT14]](<16 x s32>), [[C13]](s32), [[C10]](s32)
; CHECK-NEXT: [[INT16:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT15]](<16 x s32>), [[C13]](s32), [[C15]](s32)
; CHECK-NEXT: [[INT17:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT16]](<16 x s32>), [[C14]](s32), [[C12]](s32)
; CHECK-NEXT: [[INT18:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT17]](<16 x s32>), [[C15]](s32), [[C14]](s32)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[INT18]](<16 x s32>)
%0:_(<16 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2.v16bfloat16)
%100:_(<8 x s32>) = G_BITCAST %0(<16 x s16>)
%1:_(s32) = G_CONSTANT i32 0
%2:_(s32) = G_CONSTANT i32 1
%3:_(s32) = G_CONSTANT i32 2
%4:_(s32) = G_CONSTANT i32 3
%5:_(s32) = G_CONSTANT i32 4
%6:_(s32) = G_CONSTANT i32 5
%7:_(s32) = G_CONSTANT i32 6
%8:_(s32) = G_CONSTANT i32 7
%9:_(s32) = G_CONSTANT i32 8
%10:_(s32) = G_CONSTANT i32 9
%11:_(s32) = G_CONSTANT i32 10
%12:_(s32) = G_CONSTANT i32 11
%13:_(s32) = G_CONSTANT i32 12
%14:_(s32) = G_CONSTANT i32 13
%15:_(s32) = G_CONSTANT i32 14
%16:_(s32) = G_CONSTANT i32 15
%101:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.set.bf512.bf256), %100(<8 x s32>), %1(s32)
%17:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %101(<16 x s32>), %1(s32), %16(s32)
%18:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %17(<16 x s32>), %2(s32), %14(s32)
%19:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %18(<16 x s32>), %3(s32), %12(s32)
%20:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %19(<16 x s32>), %4(s32), %10(s32)
%21:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %20(<16 x s32>), %5(s32), %8(s32)
%22:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %21(<16 x s32>), %6(s32), %6(s32)
%23:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %22(<16 x s32>), %7(s32), %4(s32)
%24:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %23(<16 x s32>), %8(s32), %2(s32)
%25:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %24(<16 x s32>), %9(s32), %1(s32)
%26:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %25(<16 x s32>), %10(s32), %3(s32)
%27:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %26(<16 x s32>), %11(s32), %5(s32)
%28:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %27(<16 x s32>), %12(s32), %7(s32)
%29:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %28(<16 x s32>), %13(s32), %9(s32)
%30:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %29(<16 x s32>), %14(s32), %11(s32)
%31:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %30(<16 x s32>), %14(s32), %16(s32)
%32:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %31(<16 x s32>), %15(s32), %13(s32)
%33:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %32(<16 x s32>), %16(s32), %15(s32)
PseudoRET implicit $lr, implicit %33
...

0 comments on commit d6afe1c

Please sign in to comment.