diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index c4458b14f36ece..b6e9d51709b577 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -19696,11 +19696,14 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't // split it and then direct call can be matched by PseudoCALL. + bool CalleeIsLargeExternalSymbol = false; if (getTargetMachine().getCodeModel() == CodeModel::Large) { if (auto *S = dyn_cast(Callee)) Callee = getLargeGlobalAddress(S, DL, PtrVT, DAG); - else if (auto *S = dyn_cast(Callee)) + else if (auto *S = dyn_cast(Callee)) { Callee = getLargeExternalSymbol(S, DL, PtrVT, DAG); + CalleeIsLargeExternalSymbol = true; + } } else if (GlobalAddressSDNode *S = dyn_cast(Callee)) { const GlobalValue *GV = S->getGlobal(); Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL); @@ -19736,16 +19739,28 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, // Emit the call. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + // Use software guarded branch for large code model non-indirect calls + // Tail call to external symbol will have a null CLI.CB and we need another + // way to determine the callsite type + bool NeedSWGuarded = false; + if (getTargetMachine().getCodeModel() == CodeModel::Large && + Subtarget.hasStdExtZicfilp() && + ((CLI.CB && !CLI.CB->isIndirectCall()) || CalleeIsLargeExternalSymbol)) + NeedSWGuarded = true; + if (IsTailCall) { MF.getFrameInfo().setHasTailCall(); - SDValue Ret = DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops); + unsigned CallOpc = + NeedSWGuarded ? RISCVISD::SW_GUARDED_TAIL : RISCVISD::TAIL; + SDValue Ret = DAG.getNode(CallOpc, DL, NodeTys, Ops); if (CLI.CFIType) Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue()); DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge); return Ret; } - Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); + unsigned CallOpc = NeedSWGuarded ? RISCVISD::SW_GUARDED_CALL : RISCVISD::CALL; + Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops); if (CLI.CFIType) Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue()); DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); @@ -20193,6 +20208,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(CZERO_EQZ) NODE_NAME_CASE(CZERO_NEZ) NODE_NAME_CASE(SW_GUARDED_BRIND) + NODE_NAME_CASE(SW_GUARDED_CALL) + NODE_NAME_CASE(SW_GUARDED_TAIL) NODE_NAME_CASE(TUPLE_INSERT) NODE_NAME_CASE(TUPLE_EXTRACT) NODE_NAME_CASE(SF_VC_XV_SE) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index ceb9d499002846..05581552ab6041 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -411,9 +411,12 @@ enum NodeType : unsigned { CZERO_EQZ, // vt.maskc for XVentanaCondOps. CZERO_NEZ, // vt.maskcn for XVentanaCondOps. - /// Software guarded BRIND node. Operand 0 is the chain operand and - /// operand 1 is the target address. + // Software guarded BRIND node. Operand 0 is the chain operand and + // operand 1 is the target address. SW_GUARDED_BRIND, + // Software guarded calls for large code model + SW_GUARDED_CALL, + SW_GUARDED_TAIL, SF_VC_XV_SE, SF_VC_IV_SE, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index fe5623e2920e22..ed1b3227748a14 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -57,6 +57,9 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_CallSeqEnd, def riscv_call : SDNode<"RISCVISD::CALL", SDT_RISCVCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def riscv_sw_guarded_call : SDNode<"RISCVISD::SW_GUARDED_CALL", SDT_RISCVCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; def riscv_ret_glue : SDNode<"RISCVISD::RET_GLUE", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def riscv_sret_glue : SDNode<"RISCVISD::SRET_GLUE", SDTNone, @@ -69,6 +72,9 @@ def riscv_brcc : SDNode<"RISCVISD::BR_CC", SDT_RISCVBrCC, def riscv_tail : SDNode<"RISCVISD::TAIL", SDT_RISCVCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def riscv_sw_guarded_tail : SDNode<"RISCVISD::SW_GUARDED_TAIL", SDT_RISCVCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; def riscv_sw_guarded_brind : SDNode<"RISCVISD::SW_GUARDED_BRIND", SDTBrind, [SDNPHasChain]>; def riscv_sllw : SDNode<"RISCVISD::SLLW", SDT_RISCVIntBinOpW>; @@ -1555,10 +1561,15 @@ let Predicates = [NoStdExtZicfilp] in def PseudoCALLIndirect : Pseudo<(outs), (ins GPRJALR:$rs1), [(riscv_call GPRJALR:$rs1)]>, PseudoInstExpansion<(JALR X1, GPR:$rs1, 0)>; -let Predicates = [HasStdExtZicfilp] in +let Predicates = [HasStdExtZicfilp] in { def PseudoCALLIndirectNonX7 : Pseudo<(outs), (ins GPRJALRNonX7:$rs1), [(riscv_call GPRJALRNonX7:$rs1)]>, PseudoInstExpansion<(JALR X1, GPR:$rs1, 0)>; +// For large code model, non-indirect calls could be software-guarded +def PseudoCALLIndirectX7 : Pseudo<(outs), (ins GPRX7:$rs1), + [(riscv_sw_guarded_call GPRX7:$rs1)]>, + PseudoInstExpansion<(JALR X1, GPR:$rs1, 0)>; +} } let isBarrier = 1, isReturn = 1, isTerminator = 1 in @@ -1579,10 +1590,15 @@ let Predicates = [NoStdExtZicfilp] in def PseudoTAILIndirect : Pseudo<(outs), (ins GPRTC:$rs1), [(riscv_tail GPRTC:$rs1)]>, PseudoInstExpansion<(JALR X0, GPR:$rs1, 0)>; -let Predicates = [HasStdExtZicfilp] in +let Predicates = [HasStdExtZicfilp] in { def PseudoTAILIndirectNonX7 : Pseudo<(outs), (ins GPRTCNonX7:$rs1), [(riscv_tail GPRTCNonX7:$rs1)]>, PseudoInstExpansion<(JALR X0, GPR:$rs1, 0)>; +// For large code model, non-indirect calls could be software-guarded +def PseudoTAILIndirectX7 : Pseudo<(outs), (ins GPRX7:$rs1), + [(riscv_sw_guarded_tail GPRX7:$rs1)]>, + PseudoInstExpansion<(JALR X0, GPR:$rs1, 0)>; +} } def : Pat<(riscv_tail (iPTR tglobaladdr:$dst)), diff --git a/llvm/test/CodeGen/RISCV/calls.ll b/llvm/test/CodeGen/RISCV/calls.ll index 598a026fb95526..f18bbb4ed84ee3 100644 --- a/llvm/test/CodeGen/RISCV/calls.ll +++ b/llvm/test/CodeGen/RISCV/calls.ll @@ -11,6 +11,8 @@ ; RUN: | FileCheck -check-prefix=RV64I-MEDIUM %s ; RUN: llc -code-model=large -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I-LARGE %s +; RUN: llc -code-model=large -mtriple=riscv64 -mattr=experimental-zicfilp -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I-LARGE-ZICFILP %s declare i32 @external_function(i32) @@ -62,6 +64,19 @@ define i32 @test_call_external(i32 %a) nounwind { ; RV64I-LARGE-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-LARGE-NEXT: addi sp, sp, 16 ; RV64I-LARGE-NEXT: ret +; +; RV64I-LARGE-ZICFILP-LABEL: test_call_external: +; RV64I-LARGE-ZICFILP: # %bb.0: +; RV64I-LARGE-ZICFILP-NEXT: lpad 0 +; RV64I-LARGE-ZICFILP-NEXT: addi sp, sp, -16 +; RV64I-LARGE-ZICFILP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LARGE-ZICFILP-NEXT: .Lpcrel_hi0: +; RV64I-LARGE-ZICFILP-NEXT: auipc a1, %pcrel_hi(.LCPI0_0) +; RV64I-LARGE-ZICFILP-NEXT: ld t2, %pcrel_lo(.Lpcrel_hi0)(a1) +; RV64I-LARGE-ZICFILP-NEXT: jalr t2 +; RV64I-LARGE-ZICFILP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LARGE-ZICFILP-NEXT: addi sp, sp, 16 +; RV64I-LARGE-ZICFILP-NEXT: ret %1 = call i32 @external_function(i32 %a) ret i32 %1 } @@ -116,6 +131,19 @@ define i32 @test_call_dso_local(i32 %a) nounwind { ; RV64I-LARGE-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-LARGE-NEXT: addi sp, sp, 16 ; RV64I-LARGE-NEXT: ret +; +; RV64I-LARGE-ZICFILP-LABEL: test_call_dso_local: +; RV64I-LARGE-ZICFILP: # %bb.0: +; RV64I-LARGE-ZICFILP-NEXT: lpad 0 +; RV64I-LARGE-ZICFILP-NEXT: addi sp, sp, -16 +; RV64I-LARGE-ZICFILP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LARGE-ZICFILP-NEXT: .Lpcrel_hi1: +; RV64I-LARGE-ZICFILP-NEXT: auipc a1, %pcrel_hi(.LCPI1_0) +; RV64I-LARGE-ZICFILP-NEXT: ld t2, %pcrel_lo(.Lpcrel_hi1)(a1) +; RV64I-LARGE-ZICFILP-NEXT: jalr t2 +; RV64I-LARGE-ZICFILP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LARGE-ZICFILP-NEXT: addi sp, sp, 16 +; RV64I-LARGE-ZICFILP-NEXT: ret %1 = call i32 @dso_local_function(i32 %a) ret i32 %1 } @@ -145,6 +173,12 @@ define i32 @defined_function(i32 %a) nounwind { ; RV64I-LARGE: # %bb.0: ; RV64I-LARGE-NEXT: addiw a0, a0, 1 ; RV64I-LARGE-NEXT: ret +; +; RV64I-LARGE-ZICFILP-LABEL: defined_function: +; RV64I-LARGE-ZICFILP: # %bb.0: +; RV64I-LARGE-ZICFILP-NEXT: lpad 0 +; RV64I-LARGE-ZICFILP-NEXT: addiw a0, a0, 1 +; RV64I-LARGE-ZICFILP-NEXT: ret %1 = add i32 %a, 1 ret i32 %1 } @@ -197,6 +231,19 @@ define i32 @test_call_defined(i32 %a) nounwind { ; RV64I-LARGE-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-LARGE-NEXT: addi sp, sp, 16 ; RV64I-LARGE-NEXT: ret +; +; RV64I-LARGE-ZICFILP-LABEL: test_call_defined: +; RV64I-LARGE-ZICFILP: # %bb.0: +; RV64I-LARGE-ZICFILP-NEXT: lpad 0 +; RV64I-LARGE-ZICFILP-NEXT: addi sp, sp, -16 +; RV64I-LARGE-ZICFILP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LARGE-ZICFILP-NEXT: .Lpcrel_hi2: +; RV64I-LARGE-ZICFILP-NEXT: auipc a1, %pcrel_hi(.LCPI3_0) +; RV64I-LARGE-ZICFILP-NEXT: ld t2, %pcrel_lo(.Lpcrel_hi2)(a1) +; RV64I-LARGE-ZICFILP-NEXT: jalr t2 +; RV64I-LARGE-ZICFILP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LARGE-ZICFILP-NEXT: addi sp, sp, 16 +; RV64I-LARGE-ZICFILP-NEXT: ret %1 = call i32 @defined_function(i32 %a) ret i32 %1 } @@ -256,6 +303,18 @@ define i32 @test_call_indirect(ptr %a, i32 %b) nounwind { ; RV64I-LARGE-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-LARGE-NEXT: addi sp, sp, 16 ; RV64I-LARGE-NEXT: ret +; +; RV64I-LARGE-ZICFILP-LABEL: test_call_indirect: +; RV64I-LARGE-ZICFILP: # %bb.0: +; RV64I-LARGE-ZICFILP-NEXT: lpad 0 +; RV64I-LARGE-ZICFILP-NEXT: addi sp, sp, -16 +; RV64I-LARGE-ZICFILP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LARGE-ZICFILP-NEXT: mv a2, a0 +; RV64I-LARGE-ZICFILP-NEXT: mv a0, a1 +; RV64I-LARGE-ZICFILP-NEXT: jalr a2 +; RV64I-LARGE-ZICFILP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LARGE-ZICFILP-NEXT: addi sp, sp, 16 +; RV64I-LARGE-ZICFILP-NEXT: ret %1 = call i32 %a(i32 %b) ret i32 %1 } @@ -347,6 +406,24 @@ define i32 @test_call_indirect_no_t0(ptr %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 ; RV64I-LARGE-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-LARGE-NEXT: addi sp, sp, 16 ; RV64I-LARGE-NEXT: ret +; +; RV64I-LARGE-ZICFILP-LABEL: test_call_indirect_no_t0: +; RV64I-LARGE-ZICFILP: # %bb.0: +; RV64I-LARGE-ZICFILP-NEXT: lpad 0 +; RV64I-LARGE-ZICFILP-NEXT: addi sp, sp, -16 +; RV64I-LARGE-ZICFILP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LARGE-ZICFILP-NEXT: mv t1, a0 +; RV64I-LARGE-ZICFILP-NEXT: mv a0, a1 +; RV64I-LARGE-ZICFILP-NEXT: mv a1, a2 +; RV64I-LARGE-ZICFILP-NEXT: mv a2, a3 +; RV64I-LARGE-ZICFILP-NEXT: mv a3, a4 +; RV64I-LARGE-ZICFILP-NEXT: mv a4, a5 +; RV64I-LARGE-ZICFILP-NEXT: mv a5, a6 +; RV64I-LARGE-ZICFILP-NEXT: mv a6, a7 +; RV64I-LARGE-ZICFILP-NEXT: jalr t1 +; RV64I-LARGE-ZICFILP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LARGE-ZICFILP-NEXT: addi sp, sp, 16 +; RV64I-LARGE-ZICFILP-NEXT: ret %1 = call i32 %a(i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) ret i32 %1 } @@ -379,6 +456,12 @@ define fastcc i32 @fastcc_function(i32 %a, i32 %b) nounwind { ; RV64I-LARGE: # %bb.0: ; RV64I-LARGE-NEXT: addw a0, a0, a1 ; RV64I-LARGE-NEXT: ret +; +; RV64I-LARGE-ZICFILP-LABEL: fastcc_function: +; RV64I-LARGE-ZICFILP: # %bb.0: +; RV64I-LARGE-ZICFILP-NEXT: lpad 0 +; RV64I-LARGE-ZICFILP-NEXT: addw a0, a0, a1 +; RV64I-LARGE-ZICFILP-NEXT: ret %1 = add i32 %a, %b ret i32 %1 } @@ -452,6 +535,24 @@ define i32 @test_call_fastcc(i32 %a, i32 %b) nounwind { ; RV64I-LARGE-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64I-LARGE-NEXT: addi sp, sp, 16 ; RV64I-LARGE-NEXT: ret +; +; RV64I-LARGE-ZICFILP-LABEL: test_call_fastcc: +; RV64I-LARGE-ZICFILP: # %bb.0: +; RV64I-LARGE-ZICFILP-NEXT: lpad 0 +; RV64I-LARGE-ZICFILP-NEXT: addi sp, sp, -16 +; RV64I-LARGE-ZICFILP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LARGE-ZICFILP-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-LARGE-ZICFILP-NEXT: mv s0, a0 +; RV64I-LARGE-ZICFILP-NEXT: .Lpcrel_hi3: +; RV64I-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI7_0) +; RV64I-LARGE-ZICFILP-NEXT: ld t2, %pcrel_lo(.Lpcrel_hi3)(a0) +; RV64I-LARGE-ZICFILP-NEXT: mv a0, s0 +; RV64I-LARGE-ZICFILP-NEXT: jalr t2 +; RV64I-LARGE-ZICFILP-NEXT: mv a0, s0 +; RV64I-LARGE-ZICFILP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LARGE-ZICFILP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-LARGE-ZICFILP-NEXT: addi sp, sp, 16 +; RV64I-LARGE-ZICFILP-NEXT: ret %1 = call fastcc i32 @fastcc_function(i32 %a, i32 %b) ret i32 %a } @@ -572,6 +673,33 @@ define i32 @test_call_external_many_args(i32 %a) nounwind { ; RV64I-LARGE-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-LARGE-NEXT: addi sp, sp, 32 ; RV64I-LARGE-NEXT: ret +; +; RV64I-LARGE-ZICFILP-LABEL: test_call_external_many_args: +; RV64I-LARGE-ZICFILP: # %bb.0: +; RV64I-LARGE-ZICFILP-NEXT: lpad 0 +; RV64I-LARGE-ZICFILP-NEXT: addi sp, sp, -32 +; RV64I-LARGE-ZICFILP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-LARGE-ZICFILP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-LARGE-ZICFILP-NEXT: mv s0, a0 +; RV64I-LARGE-ZICFILP-NEXT: .Lpcrel_hi4: +; RV64I-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI8_0) +; RV64I-LARGE-ZICFILP-NEXT: ld t2, %pcrel_lo(.Lpcrel_hi4)(a0) +; RV64I-LARGE-ZICFILP-NEXT: sd s0, 8(sp) +; RV64I-LARGE-ZICFILP-NEXT: sd s0, 0(sp) +; RV64I-LARGE-ZICFILP-NEXT: mv a0, s0 +; RV64I-LARGE-ZICFILP-NEXT: mv a1, s0 +; RV64I-LARGE-ZICFILP-NEXT: mv a2, s0 +; RV64I-LARGE-ZICFILP-NEXT: mv a3, s0 +; RV64I-LARGE-ZICFILP-NEXT: mv a4, s0 +; RV64I-LARGE-ZICFILP-NEXT: mv a5, s0 +; RV64I-LARGE-ZICFILP-NEXT: mv a6, s0 +; RV64I-LARGE-ZICFILP-NEXT: mv a7, s0 +; RV64I-LARGE-ZICFILP-NEXT: jalr t2 +; RV64I-LARGE-ZICFILP-NEXT: mv a0, s0 +; RV64I-LARGE-ZICFILP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-LARGE-ZICFILP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-LARGE-ZICFILP-NEXT: addi sp, sp, 32 +; RV64I-LARGE-ZICFILP-NEXT: ret %1 = call i32 @external_many_args(i32 %a, i32 %a, i32 %a, i32 %a, i32 %a, i32 %a, i32 %a, i32 %a, i32 %a, i32 %a) ret i32 %a @@ -607,6 +735,13 @@ define i32 @defined_many_args(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 % ; RV64I-LARGE-NEXT: lw a0, 8(sp) ; RV64I-LARGE-NEXT: addiw a0, a0, 1 ; RV64I-LARGE-NEXT: ret +; +; RV64I-LARGE-ZICFILP-LABEL: defined_many_args: +; RV64I-LARGE-ZICFILP: # %bb.0: +; RV64I-LARGE-ZICFILP-NEXT: lpad 0 +; RV64I-LARGE-ZICFILP-NEXT: lw a0, 8(sp) +; RV64I-LARGE-ZICFILP-NEXT: addiw a0, a0, 1 +; RV64I-LARGE-ZICFILP-NEXT: ret %added = add i32 %j, 1 ret i32 %added } @@ -704,6 +839,28 @@ define i32 @test_call_defined_many_args(i32 %a) nounwind { ; RV64I-LARGE-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-LARGE-NEXT: addi sp, sp, 32 ; RV64I-LARGE-NEXT: ret +; +; RV64I-LARGE-ZICFILP-LABEL: test_call_defined_many_args: +; RV64I-LARGE-ZICFILP: # %bb.0: +; RV64I-LARGE-ZICFILP-NEXT: lpad 0 +; RV64I-LARGE-ZICFILP-NEXT: addi sp, sp, -32 +; RV64I-LARGE-ZICFILP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-LARGE-ZICFILP-NEXT: .Lpcrel_hi5: +; RV64I-LARGE-ZICFILP-NEXT: auipc a1, %pcrel_hi(.LCPI10_0) +; RV64I-LARGE-ZICFILP-NEXT: ld t2, %pcrel_lo(.Lpcrel_hi5)(a1) +; RV64I-LARGE-ZICFILP-NEXT: sd a0, 8(sp) +; RV64I-LARGE-ZICFILP-NEXT: sd a0, 0(sp) +; RV64I-LARGE-ZICFILP-NEXT: mv a1, a0 +; RV64I-LARGE-ZICFILP-NEXT: mv a2, a0 +; RV64I-LARGE-ZICFILP-NEXT: mv a3, a0 +; RV64I-LARGE-ZICFILP-NEXT: mv a4, a0 +; RV64I-LARGE-ZICFILP-NEXT: mv a5, a0 +; RV64I-LARGE-ZICFILP-NEXT: mv a6, a0 +; RV64I-LARGE-ZICFILP-NEXT: mv a7, a0 +; RV64I-LARGE-ZICFILP-NEXT: jalr t2 +; RV64I-LARGE-ZICFILP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-LARGE-ZICFILP-NEXT: addi sp, sp, 32 +; RV64I-LARGE-ZICFILP-NEXT: ret %1 = call i32 @defined_many_args(i32 %a, i32 %a, i32 %a, i32 %a, i32 %a, i32 %a, i32 %a, i32 %a, i32 %a, i32 %a) ret i32 %1 diff --git a/llvm/test/CodeGen/RISCV/tail-calls.ll b/llvm/test/CodeGen/RISCV/tail-calls.ll index d3e495bb723ad8..4dd6ed68ff9811 100644 --- a/llvm/test/CodeGen/RISCV/tail-calls.ll +++ b/llvm/test/CodeGen/RISCV/tail-calls.ll @@ -1,5 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple riscv32-unknown-linux-gnu -o - %s | FileCheck %s +; RUN: llc -mtriple riscv32-unknown-linux-gnu -mattr=experimental-zicfilp \ +; RUN: -code-model=large -o - %s \ +; RUN: | FileCheck %s -check-prefix=CHECK-LARGE-ZICFILP ; RUN: llc -mtriple riscv32-unknown-elf -o - %s | FileCheck %s ; Perform tail call optimization for global address. @@ -8,6 +11,14 @@ define i32 @caller_tail(i32 %i) nounwind { ; CHECK-LABEL: caller_tail: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: tail callee_tail +; +; CHECK-LARGE-ZICFILP-LABEL: caller_tail: +; CHECK-LARGE-ZICFILP: # %bb.0: # %entry +; CHECK-LARGE-ZICFILP-NEXT: lpad 0 +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi0: +; CHECK-LARGE-ZICFILP-NEXT: auipc a1, %pcrel_hi(.LCPI0_0) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi0)(a1) +; CHECK-LARGE-ZICFILP-NEXT: jr t2 entry: %r = tail call i32 @callee_tail(i32 %i) ret i32 %r @@ -26,6 +37,21 @@ define void @caller_extern(ptr %src) optsize { ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: mv a1, a3 ; CHECK-NEXT: tail memcpy +; +; CHECK-LARGE-ZICFILP-LABEL: caller_extern: +; CHECK-LARGE-ZICFILP: # %bb.0: # %entry +; CHECK-LARGE-ZICFILP-NEXT: lpad 0 +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi1: +; CHECK-LARGE-ZICFILP-NEXT: auipc a1, %pcrel_hi(.LCPI1_0) +; CHECK-LARGE-ZICFILP-NEXT: lw a1, %pcrel_lo(.Lpcrel_hi1)(a1) +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi2: +; CHECK-LARGE-ZICFILP-NEXT: auipc a2, %pcrel_hi(.LCPI1_1) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi2)(a2) +; CHECK-LARGE-ZICFILP-NEXT: li a2, 7 +; CHECK-LARGE-ZICFILP-NEXT: mv a3, a0 +; CHECK-LARGE-ZICFILP-NEXT: mv a0, a1 +; CHECK-LARGE-ZICFILP-NEXT: mv a1, a3 +; CHECK-LARGE-ZICFILP-NEXT: jr t2 entry: tail call void @llvm.memcpy.p0.p0.i32(ptr @dest, ptr %src, i32 7, i1 false) ret void @@ -43,6 +69,21 @@ define void @caller_extern_pgso(ptr %src) !prof !14 { ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: mv a1, a3 ; CHECK-NEXT: tail memcpy +; +; CHECK-LARGE-ZICFILP-LABEL: caller_extern_pgso: +; CHECK-LARGE-ZICFILP: # %bb.0: # %entry +; CHECK-LARGE-ZICFILP-NEXT: lpad 0 +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi3: +; CHECK-LARGE-ZICFILP-NEXT: auipc a1, %pcrel_hi(.LCPI2_0) +; CHECK-LARGE-ZICFILP-NEXT: lw a1, %pcrel_lo(.Lpcrel_hi3)(a1) +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi4: +; CHECK-LARGE-ZICFILP-NEXT: auipc a2, %pcrel_hi(.LCPI2_1) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi4)(a2) +; CHECK-LARGE-ZICFILP-NEXT: li a2, 7 +; CHECK-LARGE-ZICFILP-NEXT: mv a3, a0 +; CHECK-LARGE-ZICFILP-NEXT: mv a0, a1 +; CHECK-LARGE-ZICFILP-NEXT: mv a1, a3 +; CHECK-LARGE-ZICFILP-NEXT: jr t2 entry: tail call void @llvm.memcpy.p0.p0.i32(ptr @dest_pgso, ptr %src, i32 7, i1 false) ret void @@ -63,8 +104,21 @@ define void @caller_indirect_tail(i32 %a) nounwind { ; CHECK-NEXT: lui t1, %hi(callee_indirect1) ; CHECK-NEXT: addi t1, t1, %lo(callee_indirect1) ; CHECK-NEXT: jr t1 - - +; +; CHECK-LARGE-ZICFILP-LABEL: caller_indirect_tail: +; CHECK-LARGE-ZICFILP: # %bb.0: # %entry +; CHECK-LARGE-ZICFILP-NEXT: lpad 0 +; CHECK-LARGE-ZICFILP-NEXT: beqz a0, .LBB3_2 +; CHECK-LARGE-ZICFILP-NEXT: # %bb.1: # %entry +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi6: +; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI3_0) +; CHECK-LARGE-ZICFILP-NEXT: lw t1, %pcrel_lo(.Lpcrel_hi6)(a0) +; CHECK-LARGE-ZICFILP-NEXT: jr t1 +; CHECK-LARGE-ZICFILP-NEXT: .LBB3_2: +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi5: +; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI3_1) +; CHECK-LARGE-ZICFILP-NEXT: lw t1, %pcrel_lo(.Lpcrel_hi5)(a0) +; CHECK-LARGE-ZICFILP-NEXT: jr t1 entry: %tobool = icmp eq i32 %a, 0 %callee = select i1 %tobool, ptr @callee_indirect1, ptr @callee_indirect2 @@ -86,6 +140,19 @@ define i32 @caller_indirect_no_t0(ptr %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5 ; CHECK-NEXT: mv a5, a6 ; CHECK-NEXT: mv a6, a7 ; CHECK-NEXT: jr t1 +; +; CHECK-LARGE-ZICFILP-LABEL: caller_indirect_no_t0: +; CHECK-LARGE-ZICFILP: # %bb.0: +; CHECK-LARGE-ZICFILP-NEXT: lpad 0 +; CHECK-LARGE-ZICFILP-NEXT: mv t1, a0 +; CHECK-LARGE-ZICFILP-NEXT: mv a0, a1 +; CHECK-LARGE-ZICFILP-NEXT: mv a1, a2 +; CHECK-LARGE-ZICFILP-NEXT: mv a2, a3 +; CHECK-LARGE-ZICFILP-NEXT: mv a3, a4 +; CHECK-LARGE-ZICFILP-NEXT: mv a4, a5 +; CHECK-LARGE-ZICFILP-NEXT: mv a5, a6 +; CHECK-LARGE-ZICFILP-NEXT: mv a6, a7 +; CHECK-LARGE-ZICFILP-NEXT: jr t1 %9 = tail call i32 %0(i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) ret i32 %9 } @@ -108,6 +175,26 @@ define void @caller_varargs(i32 %a, i32 %b) nounwind { ; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret +; +; CHECK-LARGE-ZICFILP-LABEL: caller_varargs: +; CHECK-LARGE-ZICFILP: # %bb.0: # %entry +; CHECK-LARGE-ZICFILP-NEXT: lpad 0 +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -16 +; CHECK-LARGE-ZICFILP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi7: +; CHECK-LARGE-ZICFILP-NEXT: auipc a2, %pcrel_hi(.LCPI5_0) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi7)(a2) +; CHECK-LARGE-ZICFILP-NEXT: sw a0, 0(sp) +; CHECK-LARGE-ZICFILP-NEXT: mv a2, a1 +; CHECK-LARGE-ZICFILP-NEXT: mv a3, a0 +; CHECK-LARGE-ZICFILP-NEXT: mv a4, a0 +; CHECK-LARGE-ZICFILP-NEXT: mv a5, a1 +; CHECK-LARGE-ZICFILP-NEXT: mv a6, a1 +; CHECK-LARGE-ZICFILP-NEXT: mv a7, a0 +; CHECK-LARGE-ZICFILP-NEXT: jalr t2 +; CHECK-LARGE-ZICFILP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 16 +; CHECK-LARGE-ZICFILP-NEXT: ret entry: %call = tail call i32 (i32, ...) @callee_varargs(i32 %a, i32 %b, i32 %b, i32 %a, i32 %a, i32 %b, i32 %b, i32 %a, i32 %a) ret void @@ -136,6 +223,31 @@ define i32 @caller_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g ; CHECK-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: ret +; +; CHECK-LARGE-ZICFILP-LABEL: caller_args: +; CHECK-LARGE-ZICFILP: # %bb.0: # %entry +; CHECK-LARGE-ZICFILP-NEXT: lpad 0 +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -32 +; CHECK-LARGE-ZICFILP-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: lw t0, 32(sp) +; CHECK-LARGE-ZICFILP-NEXT: lw t1, 36(sp) +; CHECK-LARGE-ZICFILP-NEXT: lw t3, 40(sp) +; CHECK-LARGE-ZICFILP-NEXT: lw t4, 44(sp) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, 48(sp) +; CHECK-LARGE-ZICFILP-NEXT: lw t5, 52(sp) +; CHECK-LARGE-ZICFILP-NEXT: sw t5, 20(sp) +; CHECK-LARGE-ZICFILP-NEXT: sw t2, 16(sp) +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi8: +; CHECK-LARGE-ZICFILP-NEXT: auipc t2, %pcrel_hi(.LCPI6_0) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi8)(t2) +; CHECK-LARGE-ZICFILP-NEXT: sw t4, 12(sp) +; CHECK-LARGE-ZICFILP-NEXT: sw t3, 8(sp) +; CHECK-LARGE-ZICFILP-NEXT: sw t1, 4(sp) +; CHECK-LARGE-ZICFILP-NEXT: sw t0, 0(sp) +; CHECK-LARGE-ZICFILP-NEXT: jalr t2 +; CHECK-LARGE-ZICFILP-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 32 +; CHECK-LARGE-ZICFILP-NEXT: ret entry: %r = tail call i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n) ret i32 %r @@ -158,6 +270,25 @@ define void @caller_indirect_args() nounwind { ; CHECK-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: ret +; +; CHECK-LARGE-ZICFILP-LABEL: caller_indirect_args: +; CHECK-LARGE-ZICFILP: # %bb.0: # %entry +; CHECK-LARGE-ZICFILP-NEXT: lpad 0 +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -32 +; CHECK-LARGE-ZICFILP-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: lui a0, 262128 +; CHECK-LARGE-ZICFILP-NEXT: sw a0, 12(sp) +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi9: +; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI7_0) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi9)(a0) +; CHECK-LARGE-ZICFILP-NEXT: sw zero, 8(sp) +; CHECK-LARGE-ZICFILP-NEXT: sw zero, 4(sp) +; CHECK-LARGE-ZICFILP-NEXT: mv a0, sp +; CHECK-LARGE-ZICFILP-NEXT: sw zero, 0(sp) +; CHECK-LARGE-ZICFILP-NEXT: jalr t2 +; CHECK-LARGE-ZICFILP-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 32 +; CHECK-LARGE-ZICFILP-NEXT: ret entry: %call = tail call i32 @callee_indirect_args(fp128 0xL00000000000000003FFF000000000000) ret void @@ -169,6 +300,14 @@ define void @caller_weak() nounwind { ; CHECK-LABEL: caller_weak: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: tail callee_weak +; +; CHECK-LARGE-ZICFILP-LABEL: caller_weak: +; CHECK-LARGE-ZICFILP: # %bb.0: # %entry +; CHECK-LARGE-ZICFILP-NEXT: lpad 0 +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi10: +; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI8_0) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi10)(a0) +; CHECK-LARGE-ZICFILP-NEXT: jr t2 entry: tail call void @callee_weak() ret void @@ -217,6 +356,48 @@ define void @caller_irq() nounwind "interrupt"="machine" { ; CHECK-NEXT: lw t6, 0(sp) # 4-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 64 ; CHECK-NEXT: mret +; +; CHECK-LARGE-ZICFILP-LABEL: caller_irq: +; CHECK-LARGE-ZICFILP: # %bb.0: # %entry +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -64 +; CHECK-LARGE-ZICFILP-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: sw t0, 56(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: sw t1, 52(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: sw t2, 48(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: sw a0, 44(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: sw a1, 40(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: sw a2, 36(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: sw a3, 32(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: sw a4, 28(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: sw a5, 24(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: sw a6, 20(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: sw a7, 16(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: sw t3, 12(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: sw t4, 8(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: sw t5, 4(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: sw t6, 0(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi11: +; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI9_0) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi11)(a0) +; CHECK-LARGE-ZICFILP-NEXT: jalr t2 +; CHECK-LARGE-ZICFILP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: lw t0, 56(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: lw t1, 52(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: lw t2, 48(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: lw a0, 44(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: lw a1, 40(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: lw a2, 36(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: lw a3, 32(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: lw a4, 28(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: lw a5, 24(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: lw a6, 20(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: lw a7, 16(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: lw t3, 12(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: lw t4, 8(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: lw t5, 4(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: lw t6, 0(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 64 +; CHECK-LARGE-ZICFILP-NEXT: mret entry: tail call void @callee_irq() ret void @@ -238,6 +419,22 @@ define i32 @caller_byval() nounwind { ; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret +; +; CHECK-LARGE-ZICFILP-LABEL: caller_byval: +; CHECK-LARGE-ZICFILP: # %bb.0: # %entry +; CHECK-LARGE-ZICFILP-NEXT: lpad 0 +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -16 +; CHECK-LARGE-ZICFILP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: lw a0, 8(sp) +; CHECK-LARGE-ZICFILP-NEXT: sw a0, 4(sp) +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi12: +; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI10_0) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi12)(a0) +; CHECK-LARGE-ZICFILP-NEXT: addi a0, sp, 4 +; CHECK-LARGE-ZICFILP-NEXT: jalr t2 +; CHECK-LARGE-ZICFILP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 16 +; CHECK-LARGE-ZICFILP-NEXT: ret entry: %a = alloca ptr %r = tail call i32 @callee_byval(ptr byval(ptr) %a) @@ -260,6 +457,22 @@ define void @caller_nostruct() nounwind { ; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret +; +; CHECK-LARGE-ZICFILP-LABEL: caller_nostruct: +; CHECK-LARGE-ZICFILP: # %bb.0: # %entry +; CHECK-LARGE-ZICFILP-NEXT: lpad 0 +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -16 +; CHECK-LARGE-ZICFILP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi13: +; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI11_0) +; CHECK-LARGE-ZICFILP-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi13)(a0) +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi14: +; CHECK-LARGE-ZICFILP-NEXT: auipc a1, %pcrel_hi(.LCPI11_1) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi14)(a1) +; CHECK-LARGE-ZICFILP-NEXT: jalr t2 +; CHECK-LARGE-ZICFILP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 16 +; CHECK-LARGE-ZICFILP-NEXT: ret entry: tail call void @callee_struct(ptr sret(%struct.A) @a) ret void @@ -276,6 +489,19 @@ define void @caller_struct(ptr sret(%struct.A) %a) nounwind { ; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret +; +; CHECK-LARGE-ZICFILP-LABEL: caller_struct: +; CHECK-LARGE-ZICFILP: # %bb.0: # %entry +; CHECK-LARGE-ZICFILP-NEXT: lpad 0 +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -16 +; CHECK-LARGE-ZICFILP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi15: +; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI12_0) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi15)(a0) +; CHECK-LARGE-ZICFILP-NEXT: jalr t2 +; CHECK-LARGE-ZICFILP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 16 +; CHECK-LARGE-ZICFILP-NEXT: ret entry: tail call void @callee_nostruct() ret void @@ -291,6 +517,19 @@ define i32 @disable_tail_calls(i32 %i) nounwind "disable-tail-calls"="true" { ; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret +; +; CHECK-LARGE-ZICFILP-LABEL: disable_tail_calls: +; CHECK-LARGE-ZICFILP: # %bb.0: # %entry +; CHECK-LARGE-ZICFILP-NEXT: lpad 0 +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -16 +; CHECK-LARGE-ZICFILP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi16: +; CHECK-LARGE-ZICFILP-NEXT: auipc a1, %pcrel_hi(.LCPI13_0) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi16)(a1) +; CHECK-LARGE-ZICFILP-NEXT: jalr t2 +; CHECK-LARGE-ZICFILP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 16 +; CHECK-LARGE-ZICFILP-NEXT: ret entry: %rv = tail call i32 @callee_tail(i32 %i) ret i32 %rv @@ -317,6 +556,35 @@ define i32 @duplicate_returns(i32 %a, i32 %b) nounwind { ; CHECK-NEXT: tail test1 ; CHECK-NEXT: .LBB14_6: # %if.else8 ; CHECK-NEXT: tail test3 +; +; CHECK-LARGE-ZICFILP-LABEL: duplicate_returns: +; CHECK-LARGE-ZICFILP: # %bb.0: # %entry +; CHECK-LARGE-ZICFILP-NEXT: lpad 0 +; CHECK-LARGE-ZICFILP-NEXT: beqz a0, .LBB14_4 +; CHECK-LARGE-ZICFILP-NEXT: # %bb.1: # %if.else +; CHECK-LARGE-ZICFILP-NEXT: beqz a1, .LBB14_5 +; CHECK-LARGE-ZICFILP-NEXT: # %bb.2: # %if.else4 +; CHECK-LARGE-ZICFILP-NEXT: bge a1, a0, .LBB14_6 +; CHECK-LARGE-ZICFILP-NEXT: # %bb.3: # %if.then6 +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi19: +; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI14_1) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi19)(a0) +; CHECK-LARGE-ZICFILP-NEXT: jr t2 +; CHECK-LARGE-ZICFILP-NEXT: .LBB14_4: # %if.then +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi17: +; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI14_3) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi17)(a0) +; CHECK-LARGE-ZICFILP-NEXT: jr t2 +; CHECK-LARGE-ZICFILP-NEXT: .LBB14_5: # %if.then2 +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi18: +; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI14_2) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi18)(a0) +; CHECK-LARGE-ZICFILP-NEXT: jr t2 +; CHECK-LARGE-ZICFILP-NEXT: .LBB14_6: # %if.else8 +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi20: +; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI14_0) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi20)(a0) +; CHECK-LARGE-ZICFILP-NEXT: jr t2 entry: %cmp = icmp eq i32 %a, 0 br i1 %cmp, label %if.then, label %if.else