diff --git a/llvm/lib/Target/AIE/AIECombinerHelper.cpp b/llvm/lib/Target/AIE/AIECombinerHelper.cpp index 9731b7f517da..5c1b4d76042b 100644 --- a/llvm/lib/Target/AIE/AIECombinerHelper.cpp +++ b/llvm/lib/Target/AIE/AIECombinerHelper.cpp @@ -2004,6 +2004,20 @@ buildExtractSubvector(MachineIRBuilder &B, MachineRegisterInfo &MRI, return B.buildInstr(Opc, {DstVecReg}, {NewSrcReg, Cst}); } +static void buildUnmergeVector(MachineIRBuilder &B, MachineRegisterInfo &MRI, + Register DstReg, Register SrcReg, + unsigned NumSubVectors, unsigned SubIdx) { + const LLT DstTy = MRI.getType(DstReg); + SmallVector SubVecs; + for (unsigned I = 0; I < NumSubVectors; I++) { + if (I == (unsigned)SubIdx) + SubVecs.push_back(DstReg); + else + SubVecs.push_back(MRI.createGenericVirtualRegister(DstTy)); + } + B.buildUnmerge(SubVecs, SrcReg); +} + /// Match something like this: /// %1:_(<16 x s32>) = COPY $x0 /// %2:_(<16 x s32>) = COPY $x1 @@ -2020,18 +2034,10 @@ static bool matchShuffleToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, unsigned NumSubVectors) { const Register DstReg = MI.getOperand(0).getReg(); const Register Src1Reg = MI.getOperand(1).getReg(); - const LLT DstTy = MRI.getType(DstReg); // TODO: Select into G_EXTRACT_SUBVECTOR once it is more widely supported MatchInfo = [=, &MRI](MachineIRBuilder &B) { - SmallVector SubVecs; - for (unsigned I = 0; I < NumSubVectors; I++) { - if (I == (unsigned)SubIdx) - SubVecs.push_back(DstReg); - else - SubVecs.push_back(MRI.createGenericVirtualRegister(DstTy)); - } - B.buildUnmerge(SubVecs, Src1Reg); + buildUnmergeVector(B, MRI, DstReg, Src1Reg, NumSubVectors, SubIdx); }; return true; } @@ -2247,20 +2253,40 @@ static bool matchShuffleToSubvecBroadcast(MachineInstr &MI, const LLT ElemTy = Src1Ty.getElementType(); const LLT DstSubvecType = LLT::fixed_vector(SplatMaskLen, ElemTy.getSizeInBits()); + const unsigned SubIdx = SplatMaskStart / SplatMaskLen; + Register ExtractSubvecDstReg = + MRI.createGenericVirtualRegister(DstSubvecType); // Check whether we can extract the subvector - if (!checkExtractSubvectorPrerequisites(TII, DstSubvecType, Src1Ty)) - return false; + const bool CanExtractSubvector = + checkExtractSubvectorPrerequisites(TII, DstSubvecType, Src1Ty); + if (CanExtractSubvector) { + MatchInfo = [=, &MRI, &TII](MachineIRBuilder &B) { + auto Extract = buildExtractSubvector(B, MRI, TII, ExtractSubvecDstReg, + Src1Reg, SubIdx); + buildBroadcastVector(B, MRI, Extract.getReg(0), DstReg); + }; + return true; + } - MatchInfo = [=, &MRI, &TII](MachineIRBuilder &B) { - Register ExtractSubvecDstReg = - MRI.createGenericVirtualRegister(DstSubvecType); - auto Extract = - buildExtractSubvector(B, MRI, TII, ExtractSubvecDstReg, Src1Reg, - SplatMaskStart / SplatMaskLen); - buildBroadcastVector(B, MRI, Extract.getReg(0), DstReg); - }; - return true; + // If we cannot extract the subvector, we try to apply UNMERGE + CONCAT + const unsigned NumSubVectors = NumSrcElems / SplatMaskLen; + // Don't try to unmerge when we have just one subvector. + // We can overcome with a copy, but other combiners can do a + // better job for this case. + if (NumSubVectors > 1 && NumDstElems == SplatMaskLen * 2) { + MatchInfo = [=, &MRI](MachineIRBuilder &B) { + buildUnmergeVector(B, MRI, ExtractSubvecDstReg, Src1Reg, NumSubVectors, + SubIdx); + + const SmallVector ConcatOps = {ExtractSubvecDstReg, + ExtractSubvecDstReg}; + B.buildConcatVectors({DstReg}, ConcatOps); + }; + return true; + } + + return false; } /// Match something like this: diff --git a/llvm/test/CodeGen/AIE/GlobalISel/prelegalizercombiner-shuffle-vector.mir b/llvm/test/CodeGen/AIE/GlobalISel/prelegalizercombiner-shuffle-vector.mir index 4b58797ba7f3..d524de203bf5 100644 --- a/llvm/test/CodeGen/AIE/GlobalISel/prelegalizercombiner-shuffle-vector.mir +++ b/llvm/test/CodeGen/AIE/GlobalISel/prelegalizercombiner-shuffle-vector.mir @@ -990,3 +990,42 @@ body: | %0:_(<8 x s32>) = G_SHUFFLE_VECTOR %1(<8 x s32>), %2(<8 x s32>), shufflemask(0, 1, 2, 3, 4, 5, 6, 2) PseudoRET implicit $lr, implicit %0 ... + +# Test G_SHUFFLE_VECTOR to UNMERGE+CONCAT +--- +name: shuffle_vector_to_unmerge_concat_lo +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1 + ; CHECK-LABEL: name: shuffle_vector_to_unmerge_concat_lo + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0 + ; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_AIE_UNPAD_VECTOR [[COPY]](<16 x s32>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[AIE_UNPAD_VECTOR]](<8 x s32>), [[AIE_UNPAD_VECTOR]](<8 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[CONCAT_VECTORS]](<16 x s32>) + %1:_(<16 x s32>) = COPY $x0 + %2:_(<16 x s32>) = COPY $x1 + %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1(<16 x s32>), %2(<16 x s32>), shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7) + PseudoRET implicit $lr, implicit %0 +... + +--- +name: shuffle_vector_to_unmerge_concat_hi +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1 + ; CHECK-LABEL: name: shuffle_vector_to_unmerge_concat_hi + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s32>), [[UV1:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[UV1]](<8 x s32>), [[UV1]](<8 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[CONCAT_VECTORS]](<16 x s32>) + %1:_(<16 x s32>) = COPY $x0 + %2:_(<16 x s32>) = COPY $x1 + %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1(<16 x s32>), %2(<16 x s32>), shufflemask(8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15) + PseudoRET implicit $lr, implicit %0 +... diff --git a/llvm/test/CodeGen/AIE/aie2p/shufflevec.ll b/llvm/test/CodeGen/AIE/aie2p/shufflevec.ll index 4ce0eadc0c24..6af7abf935d5 100644 --- a/llvm/test/CodeGen/AIE/aie2p/shufflevec.ll +++ b/llvm/test/CodeGen/AIE/aie2p/shufflevec.ll @@ -580,3 +580,33 @@ entry: %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> ret <8 x i32> %shuffle } + +define <16 x i32> @shuffle_vector_to_unmerge_concat_lo(<16 x i32> noundef %a, <16 x i32> noundef %b) { +; CHECK-LABEL: shuffle_vector_to_unmerge_concat_lo: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv +; CHECK-NEXT: nopx // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: vmov x0, x2 // Delay Slot 3 +; CHECK-NEXT: vmov wh0, wl0 // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> + ret <16 x i32> %shuffle +} + +define <16 x i32> @shuffle_vector_to_unmerge_concat_hi(<16 x i32> noundef %a, <16 x i32> noundef %b) { +; CHECK-LABEL: shuffle_vector_to_unmerge_concat_hi: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv +; CHECK-NEXT: nopx // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: vmov x0, x2 // Delay Slot 3 +; CHECK-NEXT: vmov wl0, wh0 // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> + ret <16 x i32> %shuffle +}