From 14ccc32b7a3462ff42bba1e0ff745ee7e028157b Mon Sep 17 00:00:00 2001 From: Quentin Colombet Date: Tue, 15 Oct 2024 20:54:55 +0200 Subject: [PATCH] [H2BLB][SDISel] Start implementing the lowering of calls This patch teaches SDISel how to lower a call on the H2BLB target. This patch is currently limited to calls that expect all their arguments in registers. Note that we did not put any end-to-end test yet because we didn't teach the lowering of the stack how to handle callee saved register. We need this support to save R0 (our link register) through a call. --- llvm/lib/Target/H2BLB/H2BLBFrameLowering.cpp | 20 +++ llvm/lib/Target/H2BLB/H2BLBFrameLowering.h | 4 + llvm/lib/Target/H2BLB/H2BLBISelLowering.cpp | 146 ++++++++++++++++++ llvm/lib/Target/H2BLB/H2BLBISelLowering.h | 12 ++ llvm/lib/Target/H2BLB/H2BLBInstrInfo.cpp | 3 +- llvm/lib/Target/H2BLB/H2BLBRegisterInfo.cpp | 6 + llvm/lib/Target/H2BLB/H2BLBRegisterInfo.h | 2 + .../CodeGen/H2BLB/SDISel/isel-only-abi.ll | 69 +++++++++ 8 files changed, 261 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/H2BLB/H2BLBFrameLowering.cpp b/llvm/lib/Target/H2BLB/H2BLBFrameLowering.cpp index 70955e15cacc..a3dd041d85da 100644 --- a/llvm/lib/Target/H2BLB/H2BLBFrameLowering.cpp +++ b/llvm/lib/Target/H2BLB/H2BLBFrameLowering.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Error.h" using namespace llvm; @@ -52,3 +53,22 @@ void H2BLBFrameLowering::emitEpilogue(MachineFunction &MF, .addImm(NumBytes); } } + +MachineBasicBlock::iterator H2BLBFrameLowering::eliminateCallFramePseudoInstr( + MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const { + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + unsigned Opc = MI->getOpcode(); + + if (Opc != TII->getCallFrameSetupOpcode() && + Opc != TII->getCallFrameDestroyOpcode()) + report_fatal_error("Unexpected frame pseudo instruction"); + + if (MI->getOperand(0).getImm() != 0) + report_fatal_error("Proper frame lowering not yet implemented"); + + if (MI->getOperand(1).getImm() != 0) + report_fatal_error("Callee pop count not supported"); + + return MBB.erase(MI); +} diff --git a/llvm/lib/Target/H2BLB/H2BLBFrameLowering.h b/llvm/lib/Target/H2BLB/H2BLBFrameLowering.h index 41097c0cf65c..805ae6a3f55e 100644 --- a/llvm/lib/Target/H2BLB/H2BLBFrameLowering.h +++ b/llvm/lib/Target/H2BLB/H2BLBFrameLowering.h @@ -27,6 +27,10 @@ class H2BLBFrameLowering : public TargetFrameLowering { void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; bool hasFP(const MachineFunction &MF) const override; + + MachineBasicBlock::iterator + eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const override; }; } // namespace llvm #endif diff --git a/llvm/lib/Target/H2BLB/H2BLBISelLowering.cpp b/llvm/lib/Target/H2BLB/H2BLBISelLowering.cpp index ff5f71872aa9..996a27286680 100644 --- a/llvm/lib/Target/H2BLB/H2BLBISelLowering.cpp +++ b/llvm/lib/Target/H2BLB/H2BLBISelLowering.cpp @@ -159,6 +159,152 @@ SDValue H2BLBTargetLowering::LowerFormalArguments( return Chain; } +SDValue H2BLBTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const { + SelectionDAG &DAG = CLI.DAG; + SmallVector &Outs = CLI.Outs; + SmallVector &OutVals = CLI.OutVals; + SmallVector &Ins = CLI.Ins; + + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + CallingConv::ID CallConv = CLI.CallConv; + MachineFunction &MF = DAG.getMachineFunction(); + + // H2BLB target does not support tail call optimization. + CLI.IsTailCall = false; + + // Ditto for variadic arguments, though unlike tail calls, this is not + // an optimization, therefore if it is requested, we must bail out. + if (CLI.IsVarArg) + report_fatal_error("Var args not yet implemented"); + bool IsVarArg = false; + + switch (CallConv) { + default: + report_fatal_error("unsupported calling convention: " + Twine(CallConv)); + case CallingConv::Fast: + case CallingConv::C: + break; + } + + // Analyze operands of the call, assigning locations to each operand. + SmallVector ArgLocs; + CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); + + CCInfo.AnalyzeCallOperands(Outs, CC_H2BLB_Common); + + unsigned NumBytes = CCInfo.getStackSize(); + + // FIXME: Technically we would nedd to check that we support the + // flags requested in Outs. + for (const ISD::OutputArg &Out : Outs) { + ISD::ArgFlagsTy OutFlags = Out.Flags; + if (OutFlags.isByVal()) + report_fatal_error("Unsupported attribute"); + } + + SDValue InGlue; + + SmallVector> RegsToPass; + + // Walk arg assignments + for (size_t i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + SDValue &Arg = OutVals[i]; + + if (VA.getLocInfo() != CCValAssign::Full) + report_fatal_error("extensions not yet implemented: " + + Twine(VA.getLocInfo())); + + // Push arguments into RegsToPass vector + if (VA.isRegLoc()) { + RegsToPass.emplace_back(VA.getLocReg(), Arg); + continue; + } + assert(VA.isMemLoc() && "Expected stack argument"); + report_fatal_error("stack arguments not yet implemented"); + } + + // Now that we collected all the registers, start the call sequence. + auto PtrVT = getPointerTy(MF.getDataLayout()); + Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); + + // Build a sequence of copy-to-reg nodes chained together with token chain + // and flag operands which copy the outgoing args into the appropriate regs. + // We do this and not in the previous loop to chain the registers as close + // as possible to the actual call. + for (auto &RegToPass : RegsToPass) { + Chain = DAG.getCopyToReg(Chain, CLI.DL, RegToPass.first, RegToPass.second, + InGlue); + InGlue = Chain.getValue(1); + } + + // If the callee is a GlobalAddress node (quite common, every direct call is) + // turn it into a TargetGlobalAddress node so that all the generic code cannot + // mess with it. + if (GlobalAddressSDNode *G = dyn_cast(Callee)) + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), CLI.DL, PtrVT, + G->getOffset(), 0); + else + report_fatal_error("non-direct calls not implemented"); + + SmallVector Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + + // Add argument registers to the end of the list so that they are + // known to be live into the call. + for (auto &Reg : RegsToPass) + Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); + + const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo(); + const uint32_t *Mask = TRI.getCallPreservedMask(MF, CallConv); + + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + + if (InGlue.getNode()) + Ops.push_back(InGlue); + + // The call will return a chain & a flag for retval copies to use. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + Chain = DAG.getNode(H2BLBISD::CALL, CLI.DL, NodeTys, Ops); + InGlue = Chain.getValue(1); + + // Propagate any NoMerge attribute that we may have. + DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); + + // Finish the call sequence. + Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, CLI.DL); + InGlue = Chain.getValue(1); + + // Assign locations to each value returned by this call. + SmallVector RetValLocs; + CCState CCRetInfo(CallConv, IsVarArg, MF, RetValLocs, *DAG.getContext()); + + CCRetInfo.AnalyzeCallResult(Ins, RetCC_H2BLB_Common); + + // Copy all of the result registers out of their specified physreg. + for (size_t i = 0, e = RetValLocs.size(); i != e; ++i) { + CCValAssign &VA = RetValLocs[i]; + assert(VA.isRegLoc() && "stack return not yet implemented"); + assert(VA.getLocInfo() == CCValAssign::Full && + "extension/truncation of any sort, not yet implemented"); + + Chain = + DAG.getCopyFromReg(Chain, CLI.DL, VA.getLocReg(), VA.getValVT(), InGlue) + .getValue(1); + + // Guarantee that all emitted copies are stuck together, + // avoiding something bad. + InGlue = Chain.getValue(2); + InVals.push_back(Chain.getValue(0)); + } + + return Chain; +} + void H2BLBTargetLowering::finalizeLowering(MachineFunction &MF) const { const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); diff --git a/llvm/lib/Target/H2BLB/H2BLBISelLowering.h b/llvm/lib/Target/H2BLB/H2BLBISelLowering.h index c2b86eee9add..889ef38cf13b 100644 --- a/llvm/lib/Target/H2BLB/H2BLBISelLowering.h +++ b/llvm/lib/Target/H2BLB/H2BLBISelLowering.h @@ -56,6 +56,18 @@ class H2BLBTargetLowering : public TargetLowering { const SmallVectorImpl &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override; + /// Lower the arguments and resutls of a call (from the caller perspective) + /// as described by \p CLI. + /// The resulting values from the callee are fed in \p InVals. + /// The lowering consists in the classic: + /// CALLSEQ_START + /// arg setup (as described by CLI.Outs and CLI.OutVals) + /// CALL to callee (as described by CLI.Callee) + /// CALLSEQ_END + /// grab callee resulting values.. + SDValue LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const override; + /// Perform the last clean-ups after finishing instruction selection. void finalizeLowering(MachineFunction &MF) const override; diff --git a/llvm/lib/Target/H2BLB/H2BLBInstrInfo.cpp b/llvm/lib/Target/H2BLB/H2BLBInstrInfo.cpp index 67bb98498e0b..ca571c7ed8f1 100644 --- a/llvm/lib/Target/H2BLB/H2BLBInstrInfo.cpp +++ b/llvm/lib/Target/H2BLB/H2BLBInstrInfo.cpp @@ -26,7 +26,8 @@ using namespace llvm; -H2BLBInstrInfo::H2BLBInstrInfo() : H2BLBGenInstrInfo() {} +H2BLBInstrInfo::H2BLBInstrInfo() + : H2BLBGenInstrInfo(H2BLB::ADJCALLSTACKDOWN, H2BLB::ADJCALLSTACKUP) {} void H2BLBInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, diff --git a/llvm/lib/Target/H2BLB/H2BLBRegisterInfo.cpp b/llvm/lib/Target/H2BLB/H2BLBRegisterInfo.cpp index d87f53d9e56e..db1d1374a33f 100644 --- a/llvm/lib/Target/H2BLB/H2BLBRegisterInfo.cpp +++ b/llvm/lib/Target/H2BLB/H2BLBRegisterInfo.cpp @@ -29,6 +29,12 @@ H2BLBRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CSR_SaveList; } +const uint32_t * +H2BLBRegisterInfo::getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const { + return CSR_RegMask; +} + BitVector H2BLBRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); // Reserve the stack register so that the register allocator doesn't diff --git a/llvm/lib/Target/H2BLB/H2BLBRegisterInfo.h b/llvm/lib/Target/H2BLB/H2BLBRegisterInfo.h index f8927caf4454..2219fa6977ca 100644 --- a/llvm/lib/Target/H2BLB/H2BLBRegisterInfo.h +++ b/llvm/lib/Target/H2BLB/H2BLBRegisterInfo.h @@ -25,6 +25,8 @@ struct H2BLBRegisterInfo : public H2BLBGenRegisterInfo { H2BLBRegisterInfo(); const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + const uint32_t *getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const override; BitVector getReservedRegs(const MachineFunction &MF) const override; diff --git a/llvm/test/CodeGen/H2BLB/SDISel/isel-only-abi.ll b/llvm/test/CodeGen/H2BLB/SDISel/isel-only-abi.ll index 6593ed6f9983..2782d74fdf29 100644 --- a/llvm/test/CodeGen/H2BLB/SDISel/isel-only-abi.ll +++ b/llvm/test/CodeGen/H2BLB/SDISel/isel-only-abi.ll @@ -128,3 +128,72 @@ define i16 @fourArgsi16(i16 %arg, i16 %arg1, i16 %arg2, i16 %arg3) { ; CHECK-NEXT: RETURN implicit $r0, implicit $r1 ret i16 %arg3 } + +; Check that we properly set r1 as the input argument for the call. +define i16 @callAFctWithOneArg(i16 %arg) { + ; CHECK-LABEL: name: callAFctWithOneArg + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $r1, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr16 = COPY $r0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr16 = COPY $r1 + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: $r1 = COPY [[COPY1]] + ; CHECK-NEXT: CALL @oneArgi16, csr, implicit-def dead $r0, implicit $sp, implicit $r1, implicit-def $sp, implicit-def $r1 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr16 = COPY $r1 + ; CHECK-NEXT: $r1 = COPY [[COPY2]] + ; CHECK-NEXT: $r0 = COPY [[COPY]] + ; CHECK-NEXT: RETURN implicit $r0, implicit $r1 + %res = call i16 @oneArgi16(i16 %arg) + ret i16 %res +} + +declare i16 @arg16_32(i16, i32) + +; Check that we set r1 and d1 as the input argument for the call. +define i16 @callAFctWithArg16_32(i16 %arg, i32 %arg1) { + ; CHECK-LABEL: name: callAFctWithArg16_32 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $r1, $d1, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr16 = COPY $r0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $d1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr16 = COPY $r1 + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: $r1 = COPY [[COPY2]] + ; CHECK-NEXT: $d1 = COPY [[COPY1]] + ; CHECK-NEXT: CALL @arg16_32, csr, implicit-def dead $r0, implicit $sp, implicit $r1, implicit $d1, implicit-def $sp, implicit-def $r1 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr16 = COPY $r1 + ; CHECK-NEXT: $r1 = COPY [[COPY3]] + ; CHECK-NEXT: $r0 = COPY [[COPY]] + ; CHECK-NEXT: RETURN implicit $r0, implicit $r1 + %res = call i16 @arg16_32(i16 %arg, i32 %arg1) + ret i16 %res +} + +declare i16 @arg16_16(i16, i16) + +; Check that we swap the input arg r1, r2 to r2, r1 to match the +; desired argument sequence. +define i16 @callAFctWithTwoI16Arg(i16 %arg, i16 %arg1) { + ; CHECK-LABEL: name: callAFctWithTwoI16Arg + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $r1, $r2, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr16 = COPY $r0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr16 = COPY $r2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr16 = COPY $r1 + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: $r1 = COPY [[COPY1]] + ; CHECK-NEXT: $r2 = COPY [[COPY2]] + ; CHECK-NEXT: CALL @arg16_16, csr, implicit-def dead $r0, implicit $sp, implicit $r1, implicit $r2, implicit-def $sp, implicit-def $r1 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr16 = COPY $r1 + ; CHECK-NEXT: $r1 = COPY [[COPY3]] + ; CHECK-NEXT: $r0 = COPY [[COPY]] + ; CHECK-NEXT: RETURN implicit $r0, implicit $r1 + %res = call i16 @arg16_16(i16 %arg1, i16 %arg) + ret i16 %res +}