diff --git a/llvm/lib/Target/AIE/AIE2PreLegalizerCombiner.cpp b/llvm/lib/Target/AIE/AIE2PreLegalizerCombiner.cpp index 63058b2f5785..49c4f94cfa80 100644 --- a/llvm/lib/Target/AIE/AIE2PreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AIE/AIE2PreLegalizerCombiner.cpp @@ -204,6 +204,7 @@ unsigned getVInsertScalarSize(unsigned IntrinsicID) { case Intrinsic::aie2_vinsert8_I512: return 8; case Intrinsic::aie2_vinsert16_I512: + case Intrinsic::aie2_vinsert16_bf512: return 16; case Intrinsic::aie2_vinsert32_I512: return 32; @@ -226,7 +227,9 @@ AIE2PreLegalizerCombinerImpl::getVectorInsertIndices( return isa(MI) && (cast(*MI).getIntrinsicID() == Intrinsic::aie2_set_I512_I128 || cast(*MI).getIntrinsicID() == - Intrinsic::aie2_set_I512_I256); + Intrinsic::aie2_set_I512_I256 || + cast(*MI).getIntrinsicID() == + Intrinsic::aie2_set_bf512_bf256); }; while (IsVInsert(CurMI, SclSrcBits)) { @@ -317,6 +320,7 @@ bool AIE2PreLegalizerCombinerImpl::tryToCombineIntrinsic( } case Intrinsic::aie2_vinsert8_I512: case Intrinsic::aie2_vinsert16_I512: + case Intrinsic::aie2_vinsert16_bf512: case Intrinsic::aie2_vinsert32_I512: { return tryToCombineVectorInserts(MI, getVInsertScalarSize(IntrinsicID)); } diff --git a/llvm/test/CodeGen/AIE/aie2/GlobalISel/combine-vinsert-sequence-prelegalizer.mir b/llvm/test/CodeGen/AIE/aie2/GlobalISel/combine-vinsert-sequence-prelegalizer.mir index fd8989ce3ca6..a48f17501b7b 100644 --- a/llvm/test/CodeGen/AIE/aie2/GlobalISel/combine-vinsert-sequence-prelegalizer.mir +++ b/llvm/test/CodeGen/AIE/aie2/GlobalISel/combine-vinsert-sequence-prelegalizer.mir @@ -707,3 +707,230 @@ body: | %24:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.I512), %23(<16 x s32>), %8(s32), %2(s32) PseudoRET implicit $lr, implicit %24 ... + +--- +name: vinsert16-bf256 +legalized: false +body: | + bb.1.entry: + ; CHECK-LABEL: name: vinsert16-bf256 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 + ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<16 x s16>) = G_BUILD_VECTOR_TRUNC [[C15]](s32), [[C13]](s32), [[C11]](s32), [[C9]](s32), [[C7]](s32), [[C5]](s32), [[C3]](s32), [[C1]](s32), [[C]](s32), [[C2]](s32), [[C4]](s32), [[C6]](s32), [[C8]](s32), [[C10]](s32), [[C12]](s32), [[C14]](s32) + ; CHECK-NEXT: [[AIE_PAD_VECTOR_UNDEF:%[0-9]+]]:_(<32 x s16>) = G_AIE_PAD_VECTOR_UNDEF [[BUILD_VECTOR_TRUNC]](<16 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s32>) = G_BITCAST [[AIE_PAD_VECTOR_UNDEF]](<32 x s16>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<16 x s32>) + %0:_(<16 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2.v16bfloat16) + %100:_(<8 x s32>) = G_BITCAST %0(<16 x s16>) + %1:_(s32) = G_CONSTANT i32 0 + %2:_(s32) = G_CONSTANT i32 1 + %3:_(s32) = G_CONSTANT i32 2 + %4:_(s32) = G_CONSTANT i32 3 + %5:_(s32) = G_CONSTANT i32 4 + %6:_(s32) = G_CONSTANT i32 5 + %7:_(s32) = G_CONSTANT i32 6 + %8:_(s32) = G_CONSTANT i32 7 + %9:_(s32) = G_CONSTANT i32 8 + %10:_(s32) = G_CONSTANT i32 9 + %11:_(s32) = G_CONSTANT i32 10 + %12:_(s32) = G_CONSTANT i32 11 + %13:_(s32) = G_CONSTANT i32 12 + %14:_(s32) = G_CONSTANT i32 13 + %15:_(s32) = G_CONSTANT i32 14 + %16:_(s32) = G_CONSTANT i32 15 + %101:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.set.bf512.bf256), %100(<8 x s32>), %1(s32) + %17:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %101(<16 x s32>), %1(s32), %16(s32) + %18:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %17(<16 x s32>), %2(s32), %14(s32) + %19:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %18(<16 x s32>), %3(s32), %12(s32) + %20:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %19(<16 x s32>), %4(s32), %10(s32) + %21:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %20(<16 x s32>), %5(s32), %8(s32) + %22:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %21(<16 x s32>), %6(s32), %6(s32) + %23:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %22(<16 x s32>), %7(s32), %4(s32) + %24:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %23(<16 x s32>), %8(s32), %2(s32) + %25:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %24(<16 x s32>), %9(s32), %1(s32) + %26:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %25(<16 x s32>), %10(s32), %3(s32) + %27:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %26(<16 x s32>), %11(s32), %5(s32) + %28:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %27(<16 x s32>), %12(s32), %7(s32) + %29:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %28(<16 x s32>), %13(s32), %9(s32) + %30:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %29(<16 x s32>), %14(s32), %11(s32) + %31:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %30(<16 x s32>), %15(s32), %13(s32) + %32:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %31(<16 x s32>), %16(s32), %15(s32) + PseudoRET implicit $lr, implicit %32 +... + +# Negative Test Case: vinsert.16s did not combine into G_BUILD_VECTOR_TRUNC due to a missing vinsert.16 for an index +--- +name: vinsert16-bf256_idx_miss +legalized: false +body: | + bb.1.entry: + + ; CHECK-LABEL: name: vinsert16-bf256_idx_miss + ; CHECK: [[INT:%[0-9]+]]:_(<16 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2.v16bfloat16) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[INT]](<16 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 + ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.set.bf512.bf256), [[BITCAST]](<8 x s32>), [[C]](s32) + ; CHECK-NEXT: [[INT2:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT1]](<16 x s32>), [[C]](s32), [[C15]](s32) + ; CHECK-NEXT: [[INT3:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT2]](<16 x s32>), [[C1]](s32), [[C13]](s32) + ; CHECK-NEXT: [[INT4:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT3]](<16 x s32>), [[C2]](s32), [[C11]](s32) + ; CHECK-NEXT: [[INT5:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT4]](<16 x s32>), [[C3]](s32), [[C9]](s32) + ; CHECK-NEXT: [[INT6:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT5]](<16 x s32>), [[C4]](s32), [[C7]](s32) + ; CHECK-NEXT: [[INT7:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT6]](<16 x s32>), [[C5]](s32), [[C5]](s32) + ; CHECK-NEXT: [[INT8:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT7]](<16 x s32>), [[C6]](s32), [[C3]](s32) + ; CHECK-NEXT: [[INT9:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT8]](<16 x s32>), [[C7]](s32), [[C1]](s32) + ; CHECK-NEXT: [[INT10:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT9]](<16 x s32>), [[C8]](s32), [[C]](s32) + ; CHECK-NEXT: [[INT11:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT10]](<16 x s32>), [[C9]](s32), [[C2]](s32) + ; CHECK-NEXT: [[INT12:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT11]](<16 x s32>), [[C10]](s32), [[C4]](s32) + ; CHECK-NEXT: [[INT13:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT12]](<16 x s32>), [[C12]](s32), [[C8]](s32) + ; CHECK-NEXT: [[INT14:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT13]](<16 x s32>), [[C13]](s32), [[C10]](s32) + ; CHECK-NEXT: [[INT15:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT14]](<16 x s32>), [[C14]](s32), [[C12]](s32) + ; CHECK-NEXT: [[INT16:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT15]](<16 x s32>), [[C15]](s32), [[C14]](s32) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[INT16]](<16 x s32>) + %0:_(<16 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2.v16bfloat16) + %100:_(<8 x s32>) = G_BITCAST %0(<16 x s16>) + %1:_(s32) = G_CONSTANT i32 0 + %2:_(s32) = G_CONSTANT i32 1 + %3:_(s32) = G_CONSTANT i32 2 + %4:_(s32) = G_CONSTANT i32 3 + %5:_(s32) = G_CONSTANT i32 4 + %6:_(s32) = G_CONSTANT i32 5 + %7:_(s32) = G_CONSTANT i32 6 + %8:_(s32) = G_CONSTANT i32 7 + %9:_(s32) = G_CONSTANT i32 8 + %10:_(s32) = G_CONSTANT i32 9 + %11:_(s32) = G_CONSTANT i32 10 + %12:_(s32) = G_CONSTANT i32 11 + %13:_(s32) = G_CONSTANT i32 12 + %14:_(s32) = G_CONSTANT i32 13 + %15:_(s32) = G_CONSTANT i32 14 + %16:_(s32) = G_CONSTANT i32 15 + %101:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.set.bf512.bf256), %100(<8 x s32>), %1(s32) + %17:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %101(<16 x s32>), %1(s32), %16(s32) + %18:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %17(<16 x s32>), %2(s32), %14(s32) + %19:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %18(<16 x s32>), %3(s32), %12(s32) + %20:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %19(<16 x s32>), %4(s32), %10(s32) + %21:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %20(<16 x s32>), %5(s32), %8(s32) + %22:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %21(<16 x s32>), %6(s32), %6(s32) + %23:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %22(<16 x s32>), %7(s32), %4(s32) + %24:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %23(<16 x s32>), %8(s32), %2(s32) + %25:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %24(<16 x s32>), %9(s32), %1(s32) + %26:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %25(<16 x s32>), %10(s32), %3(s32) + %27:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %26(<16 x s32>), %11(s32), %5(s32) + %28:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %27(<16 x s32>), %13(s32), %9(s32) + %29:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %28(<16 x s32>), %14(s32), %11(s32) + %30:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %29(<16 x s32>), %15(s32), %13(s32) + %31:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %30(<16 x s32>), %16(s32), %15(s32) + PseudoRET implicit $lr, implicit %31 +... + +# Negative Test Case: vinsert.16s did not combine into G_BUILD_VECTOR_TRUNC due to multiple vinsert.16 instructions for a single index +--- +name: vinsert16-bf256_idx_multiple +legalized: false +body: | + bb.1.entry: + ; CHECK-LABEL: name: vinsert16-bf256_idx_multiple + ; CHECK: [[INT:%[0-9]+]]:_(<16 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2.v16bfloat16) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[INT]](<16 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 + ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.set.bf512.bf256), [[BITCAST]](<8 x s32>), [[C]](s32) + ; CHECK-NEXT: [[INT2:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT1]](<16 x s32>), [[C]](s32), [[C15]](s32) + ; CHECK-NEXT: [[INT3:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT2]](<16 x s32>), [[C1]](s32), [[C13]](s32) + ; CHECK-NEXT: [[INT4:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT3]](<16 x s32>), [[C2]](s32), [[C11]](s32) + ; CHECK-NEXT: [[INT5:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT4]](<16 x s32>), [[C3]](s32), [[C9]](s32) + ; CHECK-NEXT: [[INT6:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT5]](<16 x s32>), [[C4]](s32), [[C7]](s32) + ; CHECK-NEXT: [[INT7:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT6]](<16 x s32>), [[C5]](s32), [[C5]](s32) + ; CHECK-NEXT: [[INT8:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT7]](<16 x s32>), [[C6]](s32), [[C3]](s32) + ; CHECK-NEXT: [[INT9:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT8]](<16 x s32>), [[C7]](s32), [[C1]](s32) + ; CHECK-NEXT: [[INT10:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT9]](<16 x s32>), [[C8]](s32), [[C]](s32) + ; CHECK-NEXT: [[INT11:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT10]](<16 x s32>), [[C9]](s32), [[C2]](s32) + ; CHECK-NEXT: [[INT12:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT11]](<16 x s32>), [[C10]](s32), [[C4]](s32) + ; CHECK-NEXT: [[INT13:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT12]](<16 x s32>), [[C11]](s32), [[C6]](s32) + ; CHECK-NEXT: [[INT14:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT13]](<16 x s32>), [[C12]](s32), [[C8]](s32) + ; CHECK-NEXT: [[INT15:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT14]](<16 x s32>), [[C13]](s32), [[C10]](s32) + ; CHECK-NEXT: [[INT16:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT15]](<16 x s32>), [[C13]](s32), [[C15]](s32) + ; CHECK-NEXT: [[INT17:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT16]](<16 x s32>), [[C14]](s32), [[C12]](s32) + ; CHECK-NEXT: [[INT18:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), [[INT17]](<16 x s32>), [[C15]](s32), [[C14]](s32) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[INT18]](<16 x s32>) + %0:_(<16 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2.v16bfloat16) + %100:_(<8 x s32>) = G_BITCAST %0(<16 x s16>) + %1:_(s32) = G_CONSTANT i32 0 + %2:_(s32) = G_CONSTANT i32 1 + %3:_(s32) = G_CONSTANT i32 2 + %4:_(s32) = G_CONSTANT i32 3 + %5:_(s32) = G_CONSTANT i32 4 + %6:_(s32) = G_CONSTANT i32 5 + %7:_(s32) = G_CONSTANT i32 6 + %8:_(s32) = G_CONSTANT i32 7 + %9:_(s32) = G_CONSTANT i32 8 + %10:_(s32) = G_CONSTANT i32 9 + %11:_(s32) = G_CONSTANT i32 10 + %12:_(s32) = G_CONSTANT i32 11 + %13:_(s32) = G_CONSTANT i32 12 + %14:_(s32) = G_CONSTANT i32 13 + %15:_(s32) = G_CONSTANT i32 14 + %16:_(s32) = G_CONSTANT i32 15 + %101:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.set.bf512.bf256), %100(<8 x s32>), %1(s32) + %17:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %101(<16 x s32>), %1(s32), %16(s32) + %18:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %17(<16 x s32>), %2(s32), %14(s32) + %19:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %18(<16 x s32>), %3(s32), %12(s32) + %20:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %19(<16 x s32>), %4(s32), %10(s32) + %21:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %20(<16 x s32>), %5(s32), %8(s32) + %22:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %21(<16 x s32>), %6(s32), %6(s32) + %23:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %22(<16 x s32>), %7(s32), %4(s32) + %24:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %23(<16 x s32>), %8(s32), %2(s32) + %25:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %24(<16 x s32>), %9(s32), %1(s32) + %26:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %25(<16 x s32>), %10(s32), %3(s32) + %27:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %26(<16 x s32>), %11(s32), %5(s32) + %28:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %27(<16 x s32>), %12(s32), %7(s32) + %29:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %28(<16 x s32>), %13(s32), %9(s32) + %30:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %29(<16 x s32>), %14(s32), %11(s32) + %31:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %30(<16 x s32>), %14(s32), %16(s32) + %32:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %31(<16 x s32>), %15(s32), %13(s32) + %33:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vinsert16.bf512), %32(<16 x s32>), %16(s32), %15(s32) + PseudoRET implicit $lr, implicit %33 +...