From 1e1e710ddd7a7c0fec3c79b69024fa3b7cb2a14c Mon Sep 17 00:00:00 2001 From: William Moses Date: Tue, 6 Aug 2024 05:01:59 -0400 Subject: [PATCH] AllocOpt: Fix stack lowering where alloca continas boxed and unboxed data (#55306) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Valentin Churavy Co-authored-by: Mosè Giordano Co-authored-by: Gabriel Baraldi --- src/llvm-alloc-helpers.cpp | 10 ++++++++- src/llvm-alloc-helpers.h | 7 ++++++ src/llvm-alloc-opt.cpp | 15 +++++++++++++ test/llvmpasses/alloc-opt-bits.ll | 37 +++++++++++++++++++++++++++++++ 4 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 test/llvmpasses/alloc-opt-bits.ll diff --git a/src/llvm-alloc-helpers.cpp b/src/llvm-alloc-helpers.cpp index 953ecc1830142..9d2fba832839c 100644 --- a/src/llvm-alloc-helpers.cpp +++ b/src/llvm-alloc-helpers.cpp @@ -88,6 +88,8 @@ bool AllocUseInfo::addMemOp(Instruction *inst, unsigned opno, uint32_t offset, memop.isaggr = isa(elty) || isa(elty) || isa(elty); memop.isobjref = hasObjref(elty); auto &field = getField(offset, size, elty); + field.second.hasunboxed |= !hasObjref(elty) || (hasObjref(elty) && !isa(elty)); + if (field.second.hasobjref != memop.isobjref) field.second.multiloc = true; // can't split this field, since it contains a mix of references and bits if (!isstore) @@ -198,6 +200,7 @@ void jl_alloc::runEscapeAnalysis(llvm::CallInst *I, EscapeAnalysisRequiredArgs r auto elty = inst->getType(); required.use_info.has_unknown_objref |= hasObjref(elty); required.use_info.has_unknown_objrefaggr |= hasObjref(elty) && !isa(elty); + required.use_info.has_unknown_unboxed |= !hasObjref(elty) || (hasObjref(elty) && !isa(elty)); required.use_info.hasunknownmem = true; } else if (!required.use_info.addMemOp(inst, 0, cur.offset, inst->getType(), @@ -289,6 +292,7 @@ void jl_alloc::runEscapeAnalysis(llvm::CallInst *I, EscapeAnalysisRequiredArgs r auto elty = storev->getType(); required.use_info.has_unknown_objref |= hasObjref(elty); required.use_info.has_unknown_objrefaggr |= hasObjref(elty) && !isa(elty); + required.use_info.has_unknown_unboxed |= !hasObjref(elty) || (hasObjref(elty) && !isa(elty)); required.use_info.hasunknownmem = true; } else if (!required.use_info.addMemOp(inst, use->getOperandNo(), cur.offset, storev->getType(), @@ -310,10 +314,14 @@ void jl_alloc::runEscapeAnalysis(llvm::CallInst *I, EscapeAnalysisRequiredArgs r } required.use_info.hasload = true; auto storev = isa(inst) ? cast(inst)->getNewValOperand() : cast(inst)->getValOperand(); + Type *elty = storev->getType(); if (cur.offset == UINT32_MAX || !required.use_info.addMemOp(inst, use->getOperandNo(), - cur.offset, storev->getType(), + cur.offset, elty, true, required.DL)) { LLVM_DEBUG(dbgs() << "Atomic inst has unknown offset\n"); + required.use_info.has_unknown_objref |= hasObjref(elty); + required.use_info.has_unknown_objrefaggr |= hasObjref(elty) && !isa(elty); + required.use_info.has_unknown_unboxed |= !hasObjref(elty) || (hasObjref(elty) && !isa(elty)); required.use_info.hasunknownmem = true; } required.use_info.refload = true; diff --git a/src/llvm-alloc-helpers.h b/src/llvm-alloc-helpers.h index 49c3b15332a56..20e9132d10b4c 100644 --- a/src/llvm-alloc-helpers.h +++ b/src/llvm-alloc-helpers.h @@ -46,6 +46,8 @@ namespace jl_alloc { bool hasaggr:1; bool multiloc:1; bool hasload:1; + // The alloc has a unboxed object at this offset. + bool hasunboxed:1; llvm::Type *elty; llvm::SmallVector accesses; Field(uint32_t size, llvm::Type *elty) @@ -54,6 +56,7 @@ namespace jl_alloc { hasaggr(false), multiloc(false), hasload(false), + hasunboxed(false), elty(elty) { } @@ -95,6 +98,9 @@ namespace jl_alloc { // The alloc has an aggregate Julia object reference not in an explicit field. bool has_unknown_objrefaggr:1; + // The alloc has an unboxed object at an unknown offset. + bool has_unknown_unboxed:1; + void reset() { escaped = false; @@ -110,6 +116,7 @@ namespace jl_alloc { allockind = llvm::AllocFnKind::Unknown; has_unknown_objref = false; has_unknown_objrefaggr = false; + has_unknown_unboxed = false; uses.clear(); preserves.clear(); memops.clear(); diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp index e0cde7206b6b9..5984ad55d221c 100644 --- a/src/llvm-alloc-opt.cpp +++ b/src/llvm-alloc-opt.cpp @@ -252,10 +252,12 @@ void Optimizer::optimizeAll() removeAlloc(orig); continue; } + bool has_unboxed = use_info.has_unknown_unboxed; bool has_ref = use_info.has_unknown_objref; bool has_refaggr = use_info.has_unknown_objrefaggr; for (auto memop: use_info.memops) { auto &field = memop.second; + has_unboxed |= field.hasunboxed; if (field.hasobjref) { has_ref = true; // This can be relaxed a little based on hasload @@ -284,6 +286,19 @@ void Optimizer::optimizeAll() splitOnStack(orig); continue; } + // The move to stack code below, if has_ref is set, changes the allocation to an array of jlvalue_t's. This is fine + // if all objects are jlvalue_t's. However, if part of the allocation is an unboxed value (e.g. it is a { float, jlvaluet }), + // then moveToStack will create a [2 x jlvaluet] bitcast to { float, jlvaluet }. + // This later causes the GC rooting pass, to miss-characterize the float as a pointer to a GC value + if (has_unboxed && has_ref) { + REMARK([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "Escaped", orig) + << "GC allocation could not be split since it contains both boxed and unboxed values, unable to move to stack " << ore::NV("GC Allocation", orig); + }); + if (use_info.hastypeof) + optimizeTag(orig); + continue; + } REMARK([&](){ return OptimizationRemark(DEBUG_TYPE, "Stack Move Allocation", orig) << "GC allocation moved to stack " << ore::NV("GC Allocation", orig); diff --git a/test/llvmpasses/alloc-opt-bits.ll b/test/llvmpasses/alloc-opt-bits.ll new file mode 100644 index 0000000000000..e19093f46f815 --- /dev/null +++ b/test/llvmpasses/alloc-opt-bits.ll @@ -0,0 +1,37 @@ +; This file is a part of Julia. License is MIT: https://julialang.org/license + +; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s + + +@tag = external addrspace(10) global {} + +@glob = external addrspace(10) global {} + +; Test that the gc_preserve intrinsics are deleted directly. + +; CHECK-LABEL: @ptr_and_bits +; CHECK-NOT: alloca +; CHECK: call noalias ptr addrspace(10) @julia.gc_alloc_obj + +define void @ptr_and_bits(ptr %fptr, i1 %b, i1 %b2, i32 %idx) { + %pgcstack = call ptr @julia.get_pgcstack() + %ptls = call ptr @julia.ptls_states() + %ptls_i8 = bitcast ptr %ptls to ptr + %v = call noalias ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 16, ptr addrspace(10) @tag) + + %g0 = getelementptr { i64, ptr addrspace(10) }, ptr addrspace(10) %v, i32 %idx, i32 1 + store ptr addrspace(10) @glob, ptr addrspace(10) %g0 + + %g1 = getelementptr { i64, ptr addrspace(10) }, ptr addrspace(10) %v, i32 %idx, i32 0 + store i64 7, ptr addrspace(10) %g1 + + %res = load ptr addrspace(10), ptr addrspace(10) %g0 + %res2 = load i64, ptr addrspace(10) %g1 + ret void +} + +declare noalias ptr addrspace(10) @julia.gc_alloc_obj(ptr, i64, ptr addrspace(10)) + +declare ptr @julia.ptls_states() + +declare ptr @julia.get_pgcstack()