From ece8814f7b637e66c0d472b213feb9eccd6a1aea Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Mon, 26 Feb 2024 15:06:31 -0500 Subject: [PATCH 1/2] codegen: optimize const fields of mutable objects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For example, we seek to eliminate the gc frame from this function, as observed here: ```julia julia> code_llvm((BitSet,), raw=true) do x; r = x.bits; GC.safepoint(); @inbounds r[1]; end ; Function Signature: var"#3"(Base.BitSet) ; @ REPL[1]:1 within `#3` define swiftcc i64 @"julia_#3_494"(ptr nonnull swiftself %pgcstack, ptr noundef nonnull align 8 dereferenceable(16) %"x::BitSet") #0 !dbg !5 { top: call void @llvm.dbg.declare(metadata ptr %"x::BitSet", metadata !21, metadata !DIExpression()), !dbg !22 %ptls_field = getelementptr inbounds ptr, ptr %pgcstack, i64 2 %ptls_load = load ptr, ptr %ptls_field, align 8, !tbaa !23 %0 = getelementptr inbounds ptr, ptr %ptls_load, i64 2 %safepoint = load ptr, ptr %0, align 8, !tbaa !27 fence syncscope("singlethread") seq_cst %1 = load volatile i64, ptr %safepoint, align 8, !dbg !22 fence syncscope("singlethread") seq_cst ; ┌ @ Base.jl:49 within `getproperty` %"x::BitSet.bits" = load atomic ptr, ptr %"x::BitSet" unordered, align 8, !dbg !29, !tbaa !27, !alias.scope !33, !noalias !36, !nonnull !11, !dereferenceable !41, !align !42 ; └ ; ┌ @ gcutils.jl:253 within `safepoint` %ptls_load4 = load ptr, ptr %ptls_field, align 8, !dbg !43, !tbaa !23 %2 = getelementptr inbounds ptr, ptr %ptls_load4, i64 2, !dbg !43 %safepoint5 = load ptr, ptr %2, align 8, !dbg !43, !tbaa !27 fence syncscope("singlethread") seq_cst, !dbg !43 %3 = load volatile i64, ptr %safepoint5, align 8, !dbg !43 fence syncscope("singlethread") seq_cst, !dbg !43 ; └ ; ┌ @ essentials.jl:892 within `getindex` %4 = load ptr, ptr %"x::BitSet.bits", align 8, !dbg !46, !tbaa !49, !alias.scope !52, !noalias !53 %5 = load i64, ptr %4, align 8, !dbg !46, !tbaa !54, !alias.scope !57, !noalias !58 ret i64 %5, !dbg !46 ; └ } ``` --- src/cgutils.cpp | 2 ++ src/codegen.cpp | 72 +++++++++++++++++++++---------------------------- 2 files changed, 32 insertions(+), 42 deletions(-) diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 7bbf7a84a0385..11bf835e30020 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -2664,6 +2664,8 @@ static MDNode *best_field_tbaa(jl_codectx_t &ctx, const jl_cgval_t &strct, jl_da return ctx.tbaa().tbaa_arraysize; } } + if (strct.V && jl_field_isconst(jt, idx) && isa(strct.V->stripInBoundsOffsets())) // TODO: we use our isLoadFromConstGV helper here instead instead of just isa (but without PhiNode handling) + return ctx.tbaa().tbaa_const; return tbaa; } diff --git a/src/codegen.cpp b/src/codegen.cpp index 59c7763350fff..faa9de2762471 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -8267,7 +8267,7 @@ static jl_llvm_functions_t // step 7. allocate local variables slots // must be in the first basic block for the llvm mem2reg pass to work - auto allocate_local = [&](jl_varinfo_t &varinfo, jl_sym_t *s) { + auto allocate_local = [&ctx, &dbuilder, &debuginfo, topdebugloc, va, debug_enabled, M](jl_varinfo_t &varinfo, jl_sym_t *s, int i) { jl_value_t *jt = varinfo.value.typ; assert(!varinfo.boxroot); // variables shouldn't have memory locs already if (varinfo.value.constant) { @@ -8275,10 +8275,10 @@ static jl_llvm_functions_t alloc_def_flag(ctx, varinfo); return; } - else if (varinfo.isArgument && !(specsig && i == (size_t)ctx.vaSlot)) { - // if we can unbox it, just use the input pointer - if (i != (size_t)ctx.vaSlot && jl_is_concrete_immutable(jt)) - return; + else if (varinfo.isArgument && (!va || ctx.vaSlot == -1 || i != ctx.vaSlot)) { + // just use the input pointer, if we have it + // (we will need to attach debuginfo later to it) + return; } else if (jl_is_uniontype(jt)) { bool allunbox; @@ -8289,6 +8289,7 @@ static jl_llvm_functions_t varinfo.value = mark_julia_slot(lv, jt, NULL, ctx.tbaa().tbaa_stack); varinfo.pTIndex = emit_static_alloca(ctx, getInt8Ty(ctx.builder.getContext())); setName(ctx.emission_context, varinfo.pTIndex, "tindex"); + // TODO: attach debug metadata to this variable } else if (allunbox) { // all ghost values just need a selector allocated @@ -8297,6 +8298,7 @@ static jl_llvm_functions_t varinfo.pTIndex = lv; varinfo.value.tbaa = NULL; varinfo.value.isboxed = false; + // TODO: attach debug metadata to this variable } if (lv || allunbox) alloc_def_flag(ctx, varinfo); @@ -8323,29 +8325,21 @@ static jl_llvm_functions_t } return; } - if (!varinfo.isArgument || // always need a slot if the variable is assigned - specsig || // for arguments, give them stack slots if they aren't in `argArray` (otherwise, will use that pointer) - (va && (int)i == ctx.vaSlot) || // or it's the va arg tuple - i == 0) { // or it is the first argument (which isn't in `argArray`) - AllocaInst *av = new AllocaInst(ctx.types().T_prjlvalue, M->getDataLayout().getAllocaAddrSpace(), - nullptr, Align(sizeof(jl_value_t*)), jl_symbol_name(s), /*InsertBefore*/ctx.topalloca); - StoreInst *SI = new StoreInst(Constant::getNullValue(ctx.types().T_prjlvalue), av, false, Align(sizeof(void*))); - SI->insertAfter(ctx.topalloca); - varinfo.boxroot = av; - if (debug_enabled && varinfo.dinfo) { - DIExpression *expr; - if ((Metadata*)varinfo.dinfo->getType() == debuginfo.jl_pvalue_dillvmt) { - expr = dbuilder.createExpression(); - } - else { - SmallVector addr; - addr.push_back(llvm::dwarf::DW_OP_deref); - expr = dbuilder.createExpression(addr); - } - dbuilder.insertDeclare(av, varinfo.dinfo, expr, - topdebugloc, - ctx.builder.GetInsertBlock()); - } + // otherwise give it a boxroot in this function + AllocaInst *av = new AllocaInst(ctx.types().T_prjlvalue, M->getDataLayout().getAllocaAddrSpace(), + nullptr, Align(sizeof(jl_value_t*)), jl_symbol_name(s), /*InsertBefore*/ctx.topalloca); + StoreInst *SI = new StoreInst(Constant::getNullValue(ctx.types().T_prjlvalue), av, false, Align(sizeof(void*))); + SI->insertAfter(ctx.topalloca); + varinfo.boxroot = av; + if (debug_enabled && varinfo.dinfo) { + SmallVector addr; + DIExpression *expr; + if ((Metadata*)varinfo.dinfo->getType() != debuginfo.jl_pvalue_dillvmt) + addr.push_back(llvm::dwarf::DW_OP_deref); + expr = dbuilder.createExpression(addr); + dbuilder.insertDeclare(av, varinfo.dinfo, expr, + topdebugloc, + ctx.builder.GetInsertBlock()); } }; @@ -8359,7 +8353,7 @@ static jl_llvm_functions_t varinfo.usedUndef = false; continue; } - allocate_local(varinfo, s); + allocate_local(varinfo, s, (int)i); } std::map upsilon_to_phic; @@ -8402,7 +8396,7 @@ static jl_llvm_functions_t vi.used = true; vi.isVolatile = true; vi.value = mark_julia_type(ctx, (Value*)NULL, false, typ); - allocate_local(vi, jl_symbol("phic")); + allocate_local(vi, jl_symbol("phic"), -1); } } } @@ -8542,7 +8536,7 @@ static jl_llvm_functions_t ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, argPtr, Align(sizeof(void*))), false, vi.value.typ)); theArg = mark_julia_type(ctx, load, true, vi.value.typ); - if (debug_enabled && vi.dinfo && !vi.boxroot && !vi.value.V) { + if (debug_enabled && vi.dinfo && !vi.boxroot) { SmallVector addr; addr.push_back(llvm::dwarf::DW_OP_deref); addr.push_back(llvm::dwarf::DW_OP_plus_uconst); @@ -8561,21 +8555,15 @@ static jl_llvm_functions_t assert(vi.value.V == NULL && "unexpected variable slot created for argument"); // keep track of original (possibly boxed) value to avoid re-boxing or moving vi.value = theArg; - if (specsig && theArg.V && debug_enabled && vi.dinfo) { - SmallVector addr; - Value *parg; + if (debug_enabled && vi.dinfo && theArg.V) { if (theArg.ispointer()) { - parg = theArg.V; - if ((Metadata*)vi.dinfo->getType() != debuginfo.jl_pvalue_dillvmt) - addr.push_back(llvm::dwarf::DW_OP_deref); + dbuilder.insertDeclare(theArg.V, vi.dinfo, dbuilder.createExpression(), + topdebugloc, ctx.builder.GetInsertBlock()); } else { - parg = ctx.builder.CreateAlloca(theArg.V->getType(), NULL, jl_symbol_name(s)); - ctx.builder.CreateStore(theArg.V, parg); + dbuilder.insertDbgValueIntrinsic(theArg.V, vi.dinfo, dbuilder.createExpression(), + topdebugloc, ctx.builder.GetInsertBlock()); } - dbuilder.insertDeclare(parg, vi.dinfo, dbuilder.createExpression(addr), - topdebugloc, - ctx.builder.GetInsertBlock()); } } else { From 50114d7a1fce1701cf53dafcfc6bafb193afb2e5 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Mon, 26 Feb 2024 15:49:39 -0500 Subject: [PATCH 2/2] codegen: optimize const fields of mutable objects even more Make this analysis even stronger, using a function from llvm-late-gc-lowering.cpp that implements it more aggressively --- src/cgutils.cpp | 100 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 99 insertions(+), 1 deletion(-) diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 11bf835e30020..b17b174dd8a8d 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -2644,6 +2644,104 @@ static jl_cgval_t emit_unionload(jl_codectx_t &ctx, Value *addr, Value *ptindex, return mark_julia_slot(fsz > 0 ? addr : nullptr, jfty, tindex, tbaa); } +static bool isTBAA(MDNode *TBAA, std::initializer_list const strset) +{ + if (!TBAA) + return false; + while (TBAA->getNumOperands() > 1) { + TBAA = cast(TBAA->getOperand(1).get()); + auto str = cast(TBAA->getOperand(0))->getString(); + for (auto str2 : strset) { + if (str == str2) { + return true; + } + } + } + return false; +} + +// Check if this is a load from an immutable value. The easiest +// way to do so is to look at the tbaa and see if it derives from +// jtbaa_immut. +static bool isLoadFromImmut(LoadInst *LI) +{ + if (LI->getMetadata(LLVMContext::MD_invariant_load)) + return true; + MDNode *TBAA = LI->getMetadata(LLVMContext::MD_tbaa); + if (isTBAA(TBAA, {"jtbaa_immut", "jtbaa_const", "jtbaa_datatype", "jtbaa_memoryptr", "jtbaa_memorylen", "jtbaa_memoryown"})) + return true; + return false; +} + +static bool isConstGV(GlobalVariable *gv) +{ + return gv->isConstant() || gv->getMetadata("julia.constgv"); +} + +// Check if this is can be traced through constant loads to an constant global +// or otherwise globally rooted value. +// Almost all `tbaa_const` loads satisfies this with the exception of +// task local constants which are constant as far as the code is concerned but aren't +// global constants. For task local constant `task_local` will be true when this function +// returns. +// Unlike this function in llvm-late-gc-lowering, we do not examine PhiNode, as those are not emitted yet +static bool isLoadFromConstGV(LoadInst *LI); +static bool isLoadFromConstGV(Value *v) +{ + v = v->stripInBoundsOffsets(); + if (auto LI = dyn_cast(v)) + return isLoadFromConstGV(LI); + if (auto gv = dyn_cast(v)) + return isConstGV(gv); + // null pointer + if (isa(v)) + return true; + // literal pointers + if (auto CE = dyn_cast(v)) + return (CE->getOpcode() == Instruction::IntToPtr && + isa(CE->getOperand(0))); + if (auto SL = dyn_cast(v)) + return (isLoadFromConstGV(SL->getTrueValue()) && + isLoadFromConstGV(SL->getFalseValue())); + if (auto call = dyn_cast(v)) { + auto callee = call->getCalledFunction(); + if (callee && callee->getName() == "julia.typeof") { + return true; + } + if (callee && callee->getName() == "julia.get_pgcstack") { + return true; + } + if (callee && callee->getName() == "julia.gc_loaded") { + return isLoadFromConstGV(call->getArgOperand(0)) && + isLoadFromConstGV(call->getArgOperand(1)); + } + } + if (isa(v)) { + return true; + } + return false; +} + +// The white list implemented here and above in `isLoadFromConstGV(Value*)` should +// cover all the cases we and LLVM generates. +static bool isLoadFromConstGV(LoadInst *LI) +{ + // We only emit single slot GV in codegen + // but LLVM global merging can change the pointer operands to GEPs/bitcasts + auto load_base = LI->getPointerOperand()->stripInBoundsOffsets(); + assert(load_base); // Static analyzer + auto gv = dyn_cast(load_base); + if (isLoadFromImmut(LI)) { + if (gv) + return true; + return isLoadFromConstGV(load_base); + } + if (gv) + return isConstGV(gv); + return false; +} + + static MDNode *best_field_tbaa(jl_codectx_t &ctx, const jl_cgval_t &strct, jl_datatype_t *jt, unsigned idx, size_t byte_offset) { auto tbaa = strct.tbaa; @@ -2664,7 +2762,7 @@ static MDNode *best_field_tbaa(jl_codectx_t &ctx, const jl_cgval_t &strct, jl_da return ctx.tbaa().tbaa_arraysize; } } - if (strct.V && jl_field_isconst(jt, idx) && isa(strct.V->stripInBoundsOffsets())) // TODO: we use our isLoadFromConstGV helper here instead instead of just isa (but without PhiNode handling) + if (strct.V && jl_field_isconst(jt, idx) && isLoadFromConstGV(strct.V)) return ctx.tbaa().tbaa_const; return tbaa; }