From 6138a604b9afa88c318b7206210a7db20e9927ed Mon Sep 17 00:00:00 2001 From: Sai Abhinay Anubola Date: Fri, 8 Nov 2024 15:01:46 +0530 Subject: [PATCH] Support for allowing direct VEXTRACT to 20-bit registers --- llvm/lib/Target/AIE/AIE2InstrInfo.cpp | 5 +- llvm/lib/Target/AIE/AIECombinerHelper.cpp | 48 ++- .../prelegalizercombiner-s20-narrowing.mir | 291 ++++++++++++++++++ .../verifier/verify-szext-extract-vec-elt.mir | 4 +- 4 files changed, 336 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/AIE/AIE2InstrInfo.cpp b/llvm/lib/Target/AIE/AIE2InstrInfo.cpp index cdd39e6732fa..4e6df730b9a1 100644 --- a/llvm/lib/Target/AIE/AIE2InstrInfo.cpp +++ b/llvm/lib/Target/AIE/AIE2InstrInfo.cpp @@ -152,8 +152,9 @@ bool AIE2InstrInfo::verifyGenericInstruction(const MachineInstr &MI, switch (MI.getOpcode()) { case AIE2::G_AIE_ZEXT_EXTRACT_VECTOR_ELT: case AIE2::G_AIE_SEXT_EXTRACT_VECTOR_ELT: - ErrInfo = "Expected 32bit scalar destination"; - return MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(32); + ErrInfo = "Expected 32bit or 20bit scalar destination"; + return (MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(32) || + MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(20)); case AIE2::G_AIE_PAD_VECTOR_UNDEF: return verifySameLaneTypes(MI, ErrInfo) && isLegalTypeToUnpad(MRI.getType(MI.getOperand(0).getReg()), diff --git a/llvm/lib/Target/AIE/AIECombinerHelper.cpp b/llvm/lib/Target/AIE/AIECombinerHelper.cpp index 2272c13d047d..a22d4cab7d27 100644 --- a/llvm/lib/Target/AIE/AIECombinerHelper.cpp +++ b/llvm/lib/Target/AIE/AIECombinerHelper.cpp @@ -567,6 +567,18 @@ void llvm::applyGlobalValOffset(MachineInstr &MI, MachineRegisterInfo &MRI, B.buildConstant(LLT::scalar(20), -static_cast(Offset))); } +/// Determine if the instruction is a generic extract vector element operation +static bool IsGenericExtractVectorElt(const MachineInstr &MI) { + const AIEBaseSubtarget &STI = AIEBaseSubtarget::get(*MI.getMF()); + const AIEBaseInstrInfo *TII = STI.getInstrInfo(); + const unsigned Opcode = MI.getOpcode(); + + if (Opcode == TII->getGenericExtractVectorEltOpcode(false) || + Opcode == TII->getGenericExtractVectorEltOpcode(true)) + return true; + return false; +} + /// Checks whether the instruction produces or can be adapted to produce /// a single S20 output. static bool canProduceS20(const MachineRegisterInfo &MRI, @@ -581,9 +593,12 @@ static bool canProduceS20(const MachineRegisterInfo &MRI, case TargetOpcode::G_CONSTANT: case TargetOpcode::G_IMPLICIT_DEF: return true; - default: + default: { + if (IsGenericExtractVectorElt(MI)) + return true; return false; } + } } /// The function checks if the node can be adapted to produce an S20 value, and @@ -901,15 +916,19 @@ bool modifyToS20(InstrNode Start, MachineRegisterInfo &MRI, MachineIRBuilder &B, return true; } default: { - LLVM_DEBUG(dbgs() << "Node :" << *StartNodeMI); - llvm_unreachable("Unexpected OpCode, while modifying IR"); + if (IsGenericExtractVectorElt(*StartNodeMI)) { + Observer.changingInstr(*StartNodeMI); + MRI.setType(StartNodeMI->getOperand(0).getReg(), S20); + Observer.changedInstr(*StartNodeMI); + } else { + LLVM_DEBUG(dbgs() << "Node :" << *StartNodeMI); + llvm_unreachable("Unexpected OpCode, while modifying IR"); + } } } - switch (StartNodeMI->getOpcode()) { - case TargetOpcode::COPY: - case TargetOpcode::G_LOAD: - case TargetOpcode::G_PHI: { + // Function to handle the modification of instructions + auto ModifyInstructionUses = [&](MachineInstr *StartNodeMI) { const auto UseInstIter = MRI.use_nodbg_instructions(StartNodeMI->getOperand(0).getReg()); std::vector UseInstr; @@ -924,11 +943,22 @@ bool modifyToS20(InstrNode Start, MachineRegisterInfo &MRI, MachineIRBuilder &B, if (!modifyToS20(NextNodeToModify, MRI, B, Observer, Helper)) llvm_unreachable("All input nodes should have updated"); } + }; + + switch (StartNodeMI->getOpcode()) { + case TargetOpcode::COPY: + case TargetOpcode::G_LOAD: + case TargetOpcode::G_PHI: { + ModifyInstructionUses(StartNodeMI); break; } default: { - LLVM_DEBUG(dbgs() << "Node :" << *StartNodeMI); - llvm_unreachable("Unexpected OpCode, while modifying IR"); + if (IsGenericExtractVectorElt(*StartNodeMI)) { + ModifyInstructionUses(StartNodeMI); + } else { + LLVM_DEBUG(dbgs() << "Node :" << *StartNodeMI); + llvm_unreachable("Unexpected OpCode, while modifying IR"); + } } } return true; diff --git a/llvm/test/CodeGen/AIE/aie2/GlobalISel/prelegalizercombiner-s20-narrowing.mir b/llvm/test/CodeGen/AIE/aie2/GlobalISel/prelegalizercombiner-s20-narrowing.mir index cadd495db6af..834717d24ad4 100644 --- a/llvm/test/CodeGen/AIE/aie2/GlobalISel/prelegalizercombiner-s20-narrowing.mir +++ b/llvm/test/CodeGen/AIE/aie2/GlobalISel/prelegalizercombiner-s20-narrowing.mir @@ -870,3 +870,294 @@ body: | G_STORE %50:_(s16), %6:_(p0) :: (store (s16)) G_BR %bb.2 ... + +--- +name: valid_vextract8_add2d +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0 + + ; CHECK-LABEL: name: valid_vextract8_add2d + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<64 x s8>) = COPY $x0 + ; CHECK-NEXT: [[AIE_ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT [[COPY]](<64 x s8>), [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p0), [[INT1:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20) + ; CHECK-NEXT: $p0 = COPY [[INT]](p0) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(<64 x s8>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %2(<64 x s8>), %0(s32), %1(s32) + %4:_(s20) = G_TRUNC %3(s32) + %5:_(p0) = G_CONSTANT i20 0 + %6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20) + $p0 = COPY %6 +... + +--- +name: valid_vextract16_add2d +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0 + + ; CHECK-LABEL: name: valid_vextract16_add2d + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s16>) = COPY $x0 + ; CHECK-NEXT: [[AIE_ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT [[COPY]](<32 x s16>), [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p0), [[INT1:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20) + ; CHECK-NEXT: $p0 = COPY [[INT]](p0) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(<32 x s16>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %2(<32 x s16>), %0(s32), %1(s32) + %4:_(s20) = G_TRUNC %3(s32) + %5:_(p0) = G_CONSTANT i20 0 + %6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20) + $p0 = COPY %6 +... + +--- +name: valid_vextract32_add2d +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0 + + ; CHECK-LABEL: name: valid_vextract32_add2d + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0 + ; CHECK-NEXT: [[AIE_ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p0), [[INT1:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20) + ; CHECK-NEXT: $p0 = COPY [[INT]](p0) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(<16 x s32>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem32.I512), %2(<16 x s32>), %0(s32), %1(s32) + %4:_(s20) = G_TRUNC %3(s32) + %5:_(p0) = G_CONSTANT i20 0 + %6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20) + $p0 = COPY %6 +... + +# Only one Src Node (vextract8) for G_PTR_ADD that is narrowed to S20 type, intermediate G_TRUNC is removed +--- +name: valid_vextract8_PTR_ADD +legalized: false +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: valid_vextract8_PTR_ADD + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $p0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<64 x s8>) = COPY $x0 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY1]](<64 x s8>), [[C]](s32) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s20) + ; CHECK-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32)) + ; CHECK-NEXT: G_BR %bb.1 + bb.1: + successors: %bb.2(0x80000000); %bb.2(100.00%) + liveins: $p0, $x0 + %0:_(p0) = COPY $p0 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(s32) = G_CONSTANT i32 1 + %3:_(<64 x s8>) = COPY $x0 + %4:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %3(<64 x s8>), %1(s32), %2(s32) + + bb.2: + successors: %bb.2(0x80000000); %bb.2(100.00%) + + %5:_(s20) = G_TRUNC %4:_(s32) + %6:_(p0) = G_PTR_ADD %0:_, %5:_(s20) + G_STORE %1:_(s32), %6:_(p0) :: (store (s32)) + G_BR %bb.2 +... + +# Only one Src Node (vextract16) for G_PTR_ADD that is narrowed to S20 type, intermediate G_TRUNC is removed +--- +name: valid_vextract16_PTR_ADD +legalized: false +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: valid_vextract16_PTR_ADD + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $p0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<32 x s16>) = COPY $x0 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY1]](<32 x s16>), [[C]](s32) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s20) + ; CHECK-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32)) + ; CHECK-NEXT: G_BR %bb.1 + bb.1: + successors: %bb.2(0x80000000); %bb.2(100.00%) + liveins: $p0, $x0 + %0:_(p0) = COPY $p0 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(s32) = G_CONSTANT i32 1 + %3:_(<32 x s16>) = COPY $x0 + %4:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %3(<32 x s16>), %1(s32), %2(s32) + + bb.2: + successors: %bb.2(0x80000000); %bb.2(100.00%) + + %5:_(s20) = G_TRUNC %4:_(s32) + %6:_(p0) = G_PTR_ADD %0:_, %5:_(s20) + G_STORE %1:_(s32), %6:_(p0) :: (store (s32)) + G_BR %bb.2 +... + +# Only one Src Node (vextract32) for G_PTR_ADD that is narrowed to S20 type, intermediate G_TRUNC is removed +--- +name: valid_vextract32_PTR_ADD +legalized: false +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: valid_vextract32_PTR_ADD + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $p0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s32>) = COPY $x0 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY1]](<16 x s32>), [[C]](s32) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s20) + ; CHECK-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32)) + ; CHECK-NEXT: G_BR %bb.1 + bb.1: + successors: %bb.2(0x80000000); %bb.2(100.00%) + liveins: $p0, $x0 + %0:_(p0) = COPY $p0 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(s32) = G_CONSTANT i32 1 + %3:_(<16 x s32>) = COPY $x0 + %4:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem32.I512), %3(<16 x s32>), %1(s32), %2(s32) + + bb.2: + successors: %bb.2(0x80000000); %bb.2(100.00%) + + %5:_(s20) = G_TRUNC %4:_(s32) + %6:_(p0) = G_PTR_ADD %0:_, %5:_(s20) + G_STORE %1:_(s32), %6:_(p0) :: (store (s32)) + G_BR %bb.2 +... + +# Negative Test Case: Narrowing to s20 is not possible because the vextract8 source node has a non-constant sign register +--- +name: valid_vextract8_add2d_neg +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0, $r0 + + ; CHECK-LABEL: name: valid_vextract8_add2d_neg + ; CHECK: liveins: $x0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<64 x s8>) = COPY $x0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), [[COPY1]](<64 x s8>), [[C]](s32), [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[INT]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0 + ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(p0), [[INT2:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20) + ; CHECK-NEXT: $p0 = COPY [[INT1]](p0) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = COPY $r0 + %2:_(<64 x s8>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %2(<64 x s8>), %0(s32), %1(s32) + %4:_(s20) = G_TRUNC %3(s32) + %5:_(p0) = G_CONSTANT i20 0 + %6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20) + $p0 = COPY %6 +... + +# Negative Test Case: Narrowing to s20 is not possible because the vextract16 source node has a non-constant sign register +--- +name: valid_vextract16_add2d_neg +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0, $r0 + + ; CHECK-LABEL: name: valid_vextract16_add2d_neg + ; CHECK: liveins: $x0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<32 x s16>) = COPY $x0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), [[COPY1]](<32 x s16>), [[C]](s32), [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[INT]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0 + ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(p0), [[INT2:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20) + ; CHECK-NEXT: $p0 = COPY [[INT1]](p0) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = COPY $r0 + %2:_(<32 x s16>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %2(<32 x s16>), %0(s32), %1(s32) + %4:_(s20) = G_TRUNC %3(s32) + %5:_(p0) = G_CONSTANT i20 0 + %6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20) + $p0 = COPY %6 +... + +# Negative Test Case: Narrowing to s20 is not possible because the vextract32 source node has a non-constant sign register +--- +name: valid_vextract32_add2d_neg +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0, $r0 + + ; CHECK-LABEL: name: valid_vextract32_add2d_neg + ; CHECK: liveins: $x0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s32>) = COPY $x0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem32.I512), [[COPY1]](<16 x s32>), [[C]](s32), [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[INT]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0 + ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(p0), [[INT2:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20) + ; CHECK-NEXT: $p0 = COPY [[INT1]](p0) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = COPY $r0 + %2:_(<16 x s32>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem32.I512), %2(<16 x s32>), %0(s32), %1(s32) + %4:_(s20) = G_TRUNC %3(s32) + %5:_(p0) = G_CONSTANT i20 0 + %6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20) + $p0 = COPY %6 +... diff --git a/llvm/test/CodeGen/AIE/aie2/verifier/verify-szext-extract-vec-elt.mir b/llvm/test/CodeGen/AIE/aie2/verifier/verify-szext-extract-vec-elt.mir index b9f489914d56..b56fa14667f8 100644 --- a/llvm/test/CodeGen/AIE/aie2/verifier/verify-szext-extract-vec-elt.mir +++ b/llvm/test/CodeGen/AIE/aie2/verifier/verify-szext-extract-vec-elt.mir @@ -18,6 +18,8 @@ body: | %1:_(s32) = G_CONSTANT i32 1 %2:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT %0(<16 x s16>), %1(s32) %3:_(s32) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT %0(<16 x s16>), %1(s32) + %4:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT %0(<16 x s16>), %1(s32) + %5:_(s20) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT %0(<16 x s16>), %1(s32) ... --- @@ -25,7 +27,7 @@ name: nok alignment: 16 body: | bb.0 (align 16): - ; CHECK-COUNT-4: Bad machine code: Expected 32bit scalar destination + ; CHECK-COUNT-4: Bad machine code: Expected 32bit or 20bit scalar destination ; CHECK-NOT: Bad machine code %0:_(<16 x s16>) = COPY $wl0 %1:_(s32) = G_CONSTANT i32 1