Skip to content

Commit

Permalink
[H2BLB][SDISel] Start implementing the lowering of calls
Browse files Browse the repository at this point in the history
This patch teaches SDISel how to lower a call on the H2BLB target.
This patch is currently limited to calls that expect all their arguments
in registers.

Note that we did not put any end-to-end test yet because we didn't
teach the lowering of the stack how to handle callee saved register.

We need this support to save R0 (our link register) through a call.
  • Loading branch information
qcolombet committed Oct 20, 2024
1 parent aeae504 commit 14ccc32
Show file tree
Hide file tree
Showing 8 changed files with 261 additions and 1 deletion.
20 changes: 20 additions & 0 deletions llvm/lib/Target/H2BLB/H2BLBFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Error.h"

using namespace llvm;

Expand Down Expand Up @@ -52,3 +53,22 @@ void H2BLBFrameLowering::emitEpilogue(MachineFunction &MF,
.addImm(NumBytes);
}
}

MachineBasicBlock::iterator H2BLBFrameLowering::eliminateCallFramePseudoInstr(
MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const {
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
unsigned Opc = MI->getOpcode();

if (Opc != TII->getCallFrameSetupOpcode() &&
Opc != TII->getCallFrameDestroyOpcode())
report_fatal_error("Unexpected frame pseudo instruction");

if (MI->getOperand(0).getImm() != 0)
report_fatal_error("Proper frame lowering not yet implemented");

if (MI->getOperand(1).getImm() != 0)
report_fatal_error("Callee pop count not supported");

return MBB.erase(MI);
}
4 changes: 4 additions & 0 deletions llvm/lib/Target/H2BLB/H2BLBFrameLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ class H2BLBFrameLowering : public TargetFrameLowering {
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;

bool hasFP(const MachineFunction &MF) const override;

MachineBasicBlock::iterator
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const override;
};
} // namespace llvm
#endif
146 changes: 146 additions & 0 deletions llvm/lib/Target/H2BLB/H2BLBISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,152 @@ SDValue H2BLBTargetLowering::LowerFormalArguments(
return Chain;
}

SDValue H2BLBTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
SelectionDAG &DAG = CLI.DAG;
SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;

SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
CallingConv::ID CallConv = CLI.CallConv;
MachineFunction &MF = DAG.getMachineFunction();

// H2BLB target does not support tail call optimization.
CLI.IsTailCall = false;

// Ditto for variadic arguments, though unlike tail calls, this is not
// an optimization, therefore if it is requested, we must bail out.
if (CLI.IsVarArg)
report_fatal_error("Var args not yet implemented");
bool IsVarArg = false;

switch (CallConv) {
default:
report_fatal_error("unsupported calling convention: " + Twine(CallConv));
case CallingConv::Fast:
case CallingConv::C:
break;
}

// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());

CCInfo.AnalyzeCallOperands(Outs, CC_H2BLB_Common);

unsigned NumBytes = CCInfo.getStackSize();

// FIXME: Technically we would nedd to check that we support the
// flags requested in Outs.
for (const ISD::OutputArg &Out : Outs) {
ISD::ArgFlagsTy OutFlags = Out.Flags;
if (OutFlags.isByVal())
report_fatal_error("Unsupported attribute");
}

SDValue InGlue;

SmallVector<std::pair<Register, SDValue>> RegsToPass;

// Walk arg assignments
for (size_t i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
SDValue &Arg = OutVals[i];

if (VA.getLocInfo() != CCValAssign::Full)
report_fatal_error("extensions not yet implemented: " +
Twine(VA.getLocInfo()));

// Push arguments into RegsToPass vector
if (VA.isRegLoc()) {
RegsToPass.emplace_back(VA.getLocReg(), Arg);
continue;
}
assert(VA.isMemLoc() && "Expected stack argument");
report_fatal_error("stack arguments not yet implemented");
}

// Now that we collected all the registers, start the call sequence.
auto PtrVT = getPointerTy(MF.getDataLayout());
Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);

// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
// We do this and not in the previous loop to chain the registers as close
// as possible to the actual call.
for (auto &RegToPass : RegsToPass) {
Chain = DAG.getCopyToReg(Chain, CLI.DL, RegToPass.first, RegToPass.second,
InGlue);
InGlue = Chain.getValue(1);
}

// If the callee is a GlobalAddress node (quite common, every direct call is)
// turn it into a TargetGlobalAddress node so that all the generic code cannot
// mess with it.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), CLI.DL, PtrVT,
G->getOffset(), 0);
else
report_fatal_error("non-direct calls not implemented");

SmallVector<SDValue, 8> Ops;
Ops.push_back(Chain);
Ops.push_back(Callee);

// Add argument registers to the end of the list so that they are
// known to be live into the call.
for (auto &Reg : RegsToPass)
Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));

const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
const uint32_t *Mask = TRI.getCallPreservedMask(MF, CallConv);

assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));

if (InGlue.getNode())
Ops.push_back(InGlue);

// The call will return a chain & a flag for retval copies to use.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
Chain = DAG.getNode(H2BLBISD::CALL, CLI.DL, NodeTys, Ops);
InGlue = Chain.getValue(1);

// Propagate any NoMerge attribute that we may have.
DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);

// Finish the call sequence.
Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, CLI.DL);
InGlue = Chain.getValue(1);

// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RetValLocs;
CCState CCRetInfo(CallConv, IsVarArg, MF, RetValLocs, *DAG.getContext());

CCRetInfo.AnalyzeCallResult(Ins, RetCC_H2BLB_Common);

// Copy all of the result registers out of their specified physreg.
for (size_t i = 0, e = RetValLocs.size(); i != e; ++i) {
CCValAssign &VA = RetValLocs[i];
assert(VA.isRegLoc() && "stack return not yet implemented");
assert(VA.getLocInfo() == CCValAssign::Full &&
"extension/truncation of any sort, not yet implemented");

Chain =
DAG.getCopyFromReg(Chain, CLI.DL, VA.getLocReg(), VA.getValVT(), InGlue)
.getValue(1);

// Guarantee that all emitted copies are stuck together,
// avoiding something bad.
InGlue = Chain.getValue(2);
InVals.push_back(Chain.getValue(0));
}

return Chain;
}

void H2BLBTargetLowering::finalizeLowering(MachineFunction &MF) const {
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Target/H2BLB/H2BLBISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,18 @@ class H2BLBTargetLowering : public TargetLowering {
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
SelectionDAG &DAG) const override;

/// Lower the arguments and resutls of a call (from the caller perspective)
/// as described by \p CLI.
/// The resulting values from the callee are fed in \p InVals.
/// The lowering consists in the classic:
/// CALLSEQ_START
/// arg setup (as described by CLI.Outs and CLI.OutVals)
/// CALL to callee (as described by CLI.Callee)
/// CALLSEQ_END
/// grab callee resulting values..
SDValue LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;

/// Perform the last clean-ups after finishing instruction selection.
void finalizeLowering(MachineFunction &MF) const override;

Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/H2BLB/H2BLBInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@

using namespace llvm;

H2BLBInstrInfo::H2BLBInstrInfo() : H2BLBGenInstrInfo() {}
H2BLBInstrInfo::H2BLBInstrInfo()
: H2BLBGenInstrInfo(H2BLB::ADJCALLSTACKDOWN, H2BLB::ADJCALLSTACKUP) {}

void H2BLBInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/H2BLB/H2BLBRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ H2BLBRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_SaveList;
}

const uint32_t *
H2BLBRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
return CSR_RegMask;
}

BitVector H2BLBRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
// Reserve the stack register so that the register allocator doesn't
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/H2BLB/H2BLBRegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ struct H2BLBRegisterInfo : public H2BLBGenRegisterInfo {
H2BLBRegisterInfo();

const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const override;

BitVector getReservedRegs(const MachineFunction &MF) const override;

Expand Down
69 changes: 69 additions & 0 deletions llvm/test/CodeGen/H2BLB/SDISel/isel-only-abi.ll
Original file line number Diff line number Diff line change
Expand Up @@ -128,3 +128,72 @@ define i16 @fourArgsi16(i16 %arg, i16 %arg1, i16 %arg2, i16 %arg3) {
; CHECK-NEXT: RETURN implicit $r0, implicit $r1
ret i16 %arg3
}

; Check that we properly set r1 as the input argument for the call.
define i16 @callAFctWithOneArg(i16 %arg) {
; CHECK-LABEL: name: callAFctWithOneArg
; CHECK: bb.0 (%ir-block.0):
; CHECK-NEXT: liveins: $r1, $r0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr16 = COPY $r0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr16 = COPY $r1
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
; CHECK-NEXT: $r1 = COPY [[COPY1]]
; CHECK-NEXT: CALL @oneArgi16, csr, implicit-def dead $r0, implicit $sp, implicit $r1, implicit-def $sp, implicit-def $r1
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr16 = COPY $r1
; CHECK-NEXT: $r1 = COPY [[COPY2]]
; CHECK-NEXT: $r0 = COPY [[COPY]]
; CHECK-NEXT: RETURN implicit $r0, implicit $r1
%res = call i16 @oneArgi16(i16 %arg)
ret i16 %res
}

declare i16 @arg16_32(i16, i32)

; Check that we set r1 and d1 as the input argument for the call.
define i16 @callAFctWithArg16_32(i16 %arg, i32 %arg1) {
; CHECK-LABEL: name: callAFctWithArg16_32
; CHECK: bb.0 (%ir-block.0):
; CHECK-NEXT: liveins: $r1, $d1, $r0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr16 = COPY $r0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $d1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr16 = COPY $r1
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
; CHECK-NEXT: $r1 = COPY [[COPY2]]
; CHECK-NEXT: $d1 = COPY [[COPY1]]
; CHECK-NEXT: CALL @arg16_32, csr, implicit-def dead $r0, implicit $sp, implicit $r1, implicit $d1, implicit-def $sp, implicit-def $r1
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr16 = COPY $r1
; CHECK-NEXT: $r1 = COPY [[COPY3]]
; CHECK-NEXT: $r0 = COPY [[COPY]]
; CHECK-NEXT: RETURN implicit $r0, implicit $r1
%res = call i16 @arg16_32(i16 %arg, i32 %arg1)
ret i16 %res
}

declare i16 @arg16_16(i16, i16)

; Check that we swap the input arg r1, r2 to r2, r1 to match the
; desired argument sequence.
define i16 @callAFctWithTwoI16Arg(i16 %arg, i16 %arg1) {
; CHECK-LABEL: name: callAFctWithTwoI16Arg
; CHECK: bb.0 (%ir-block.0):
; CHECK-NEXT: liveins: $r1, $r2, $r0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr16 = COPY $r0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr16 = COPY $r2
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr16 = COPY $r1
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
; CHECK-NEXT: $r1 = COPY [[COPY1]]
; CHECK-NEXT: $r2 = COPY [[COPY2]]
; CHECK-NEXT: CALL @arg16_16, csr, implicit-def dead $r0, implicit $sp, implicit $r1, implicit $r2, implicit-def $sp, implicit-def $r1
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr16 = COPY $r1
; CHECK-NEXT: $r1 = COPY [[COPY3]]
; CHECK-NEXT: $r0 = COPY [[COPY]]
; CHECK-NEXT: RETURN implicit $r0, implicit $r1
%res = call i16 @arg16_16(i16 %arg1, i16 %arg)
ret i16 %res
}

0 comments on commit 14ccc32

Please sign in to comment.