diff --git a/src/gc.c b/src/gc.c index f39769b1c9213..8a856c9a003fb 100644 --- a/src/gc.c +++ b/src/gc.c @@ -3343,18 +3343,27 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) JL_DLLEXPORT void jl_gc_wb1_noinline(const void *parent) JL_NOTSAFEPOINT { + jl_unreachable(); } JL_DLLEXPORT void jl_gc_wb2_noinline(const void *parent, const void *ptr) JL_NOTSAFEPOINT { + jl_unreachable(); +} + +JL_DLLEXPORT void jl_gc_wb_binding_noinline(const void *parent, const void *ptr) JL_NOTSAFEPOINT +{ + jl_unreachable(); } JL_DLLEXPORT void jl_gc_wb1_slow(const void *parent) JL_NOTSAFEPOINT { + jl_unreachable(); } JL_DLLEXPORT void jl_gc_wb2_slow(const void *parent, const void* ptr) JL_NOTSAFEPOINT { + jl_unreachable(); } #ifdef __cplusplus diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index d97838c2c738f..32ec4866792f8 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -193,6 +193,7 @@ XX(jl_gc_queue_root) \ XX(jl_gc_wb1_noinline) \ XX(jl_gc_wb2_noinline) \ + XX(jl_gc_wb_binding_noinline) \ XX(jl_gc_wb1_slow) \ XX(jl_gc_wb2_slow) \ XX(jl_gc_safepoint) \ diff --git a/src/julia.h b/src/julia.h index d1a5332b53796..a93ad4877f3da 100644 --- a/src/julia.h +++ b/src/julia.h @@ -965,6 +965,7 @@ STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_ } #else // MMTK_GC STATIC_INLINE void mmtk_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT; +STATIC_INLINE void mmtk_gc_wb_binding(const void *parent, const void *ptr) JL_NOTSAFEPOINT; STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT { @@ -2317,25 +2318,43 @@ STATIC_INLINE void mmtk_gc_wb_full(const void *parent, const void *ptr) JL_NOTSA mmtk_object_reference_write_post(&ptls->mmtk_mutator, parent, ptr); } -// Inlined fastpath -STATIC_INLINE void mmtk_gc_wb_fast(const void *parent, const void *ptr) JL_NOTSAFEPOINT +// Fastpath. Return 1 if we should go to slowpath +STATIC_INLINE int mmtk_gc_wb_fast_check(const void *parent, const void *ptr) JL_NOTSAFEPOINT { if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) { intptr_t addr = (intptr_t) (void*) parent; uint8_t* meta_addr = (uint8_t*) (MMTK_SIDE_LOG_BIT_BASE_ADDRESS) + (addr >> 6); intptr_t shift = (addr >> 3) & 0b111; uint8_t byte_val = *meta_addr; - if (((byte_val >> shift) & 1) == 1) { - jl_task_t *ct = jl_current_task; - jl_ptls_t ptls = ct->ptls; - mmtk_object_reference_write_slow(&ptls->mmtk_mutator, parent, ptr); - } + return ((byte_val >> shift) & 1) == 1; + } else { + return 0; + } +} + +// Slowpath. +STATIC_INLINE void mmtk_gc_wb_slow(const void *parent, const void *ptr) JL_NOTSAFEPOINT +{ + if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) { + jl_task_t *ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; + mmtk_object_reference_write_slow(&ptls->mmtk_mutator, parent, ptr); } } STATIC_INLINE void mmtk_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT { - mmtk_gc_wb_fast(parent, ptr); + if (mmtk_gc_wb_fast_check(parent, ptr)) { + mmtk_gc_wb_slow(parent, ptr); + } +} + +STATIC_INLINE void mmtk_gc_wb_binding(const void *bnd, const void *val) JL_NOTSAFEPOINT +{ + if (mmtk_gc_wb_fast_check(bnd, val)) { + jl_astaggedvalue(bnd)->bits.gc = 2; // to indicate that the buffer is a binding + mmtk_gc_wb_slow(bnd, val); + } } #define MMTK_MIN_ALIGNMENT 4 diff --git a/src/julia_internal.h b/src/julia_internal.h index 89fef75723a98..81867f244408e 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -624,8 +624,7 @@ STATIC_INLINE void jl_gc_wb_buf(void *parent, void *bufptr, size_t minsz) JL_NOT STATIC_INLINE void jl_gc_wb_binding(jl_binding_t *bnd, void *val) JL_NOTSAFEPOINT // val isa jl_value_t* { - jl_astaggedvalue(bnd)->bits.gc = 2; // to indicate that the buffer is a binding - mmtk_gc_wb(bnd, val); + mmtk_gc_wb_binding(bnd, val); } STATIC_INLINE void jl_gc_wb_buf(void *parent, void *bufptr, size_t minsz) JL_NOTSAFEPOINT // parent isa jl_value_t* diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index b96aac41a7023..990bd92f3b499 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -54,6 +54,7 @@ struct FinalLowerGC: private JuliaPassContext { #ifdef MMTK_GC Function *writeBarrier1Func; Function *writeBarrier2Func; + Function *writeBarrierBindingFunc; Function *writeBarrier1SlowFunc; Function *writeBarrier2SlowFunc; #endif @@ -86,6 +87,7 @@ struct FinalLowerGC: private JuliaPassContext { #ifdef MMTK_GC Value *lowerWriteBarrier1(CallInst *target, Function &F); Value *lowerWriteBarrier2(CallInst *target, Function &F); + Value *lowerWriteBarrierBinding(CallInst *target, Function &F); Value *lowerWriteBarrier1Slow(CallInst *target, Function &F); Value *lowerWriteBarrier2Slow(CallInst *target, Function &F); #endif @@ -246,6 +248,13 @@ Value *FinalLowerGC::lowerWriteBarrier2(CallInst *target, Function &F) return target; } +Value *FinalLowerGC::lowerWriteBarrierBinding(CallInst *target, Function &F) +{ + assert(target->arg_size() == 2); + target->setCalledFunction(writeBarrierBindingFunc); + return target; +} + Value *FinalLowerGC::lowerWriteBarrier1Slow(CallInst *target, Function &F) { assert(target->arg_size() == 1); @@ -400,9 +409,10 @@ bool FinalLowerGC::doInitialization(Module &M) { #ifdef MMTK_GC writeBarrier1Func = getOrDeclare(jl_well_known::GCWriteBarrier1); writeBarrier2Func = getOrDeclare(jl_well_known::GCWriteBarrier2); + writeBarrierBindingFunc = getOrDeclare(jl_well_known::GCWriteBarrierBinding); writeBarrier1SlowFunc = getOrDeclare(jl_well_known::GCWriteBarrier1Slow); writeBarrier2SlowFunc = getOrDeclare(jl_well_known::GCWriteBarrier2Slow); - GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, writeBarrier1Func, writeBarrier2Func, writeBarrier1SlowFunc, writeBarrier2SlowFunc}; + GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, writeBarrier1Func, writeBarrier2Func, writeBarrierBindingFunc, writeBarrier1SlowFunc, writeBarrier2SlowFunc}; #else GlobalValue *functionList[] = {queueRootFunc, queueBindingFunc, poolAllocFunc, bigAllocFunc, allocTypedFunc}; #endif @@ -422,8 +432,8 @@ bool FinalLowerGC::doInitialization(Module &M) { bool FinalLowerGC::doFinalization(Module &M) { #ifdef MMTK_GC - GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, writeBarrier1Func, writeBarrier2Func, writeBarrier1SlowFunc, writeBarrier2SlowFunc}; - queueRootFunc = poolAllocFunc = bigAllocFunc = writeBarrier1Func = writeBarrier2Func = writeBarrier1SlowFunc = writeBarrier2SlowFunc = nullptr; + GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, writeBarrier1Func, writeBarrier2Func, writeBarrierBindingFunc, writeBarrier1SlowFunc, writeBarrier2SlowFunc}; + queueRootFunc = poolAllocFunc = bigAllocFunc = writeBarrier1Func = writeBarrier2Func = writeBarrierBindingFunc = writeBarrier1SlowFunc = writeBarrier2SlowFunc = nullptr; #else GlobalValue *functionList[] = {queueRootFunc, queueBindingFunc, poolAllocFunc, bigAllocFunc, allocTypedFunc}; queueRootFunc = queueBindingFunc = poolAllocFunc = bigAllocFunc = allocTypedFunc = nullptr; @@ -501,6 +511,7 @@ bool FinalLowerGC::runOnFunction(Function &F) #ifdef MMTK_GC auto writeBarrier1Func = getOrNull(jl_intrinsics::writeBarrier1); auto writeBarrier2Func = getOrNull(jl_intrinsics::writeBarrier2); + auto writeBarrierBindingFunc = getOrNull(jl_intrinsics::writeBarrierBinding); auto writeBarrier1SlowFunc = getOrNull(jl_intrinsics::writeBarrier1Slow); auto writeBarrier2SlowFunc = getOrNull(jl_intrinsics::writeBarrier2Slow); #endif @@ -545,6 +556,9 @@ bool FinalLowerGC::runOnFunction(Function &F) else if (callee == writeBarrier2Func) { replaceInstruction(CI, lowerWriteBarrier2(CI, F), it); } + else if (callee == writeBarrierBindingFunc) { + replaceInstruction(CI, lowerWriteBarrierBinding(CI, F), it); + } else if (callee == writeBarrier1SlowFunc) { replaceInstruction(CI, lowerWriteBarrier1Slow(CI, F), it); } diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 56c4de99f202d..b76f4c38227f2 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -2535,9 +2535,6 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) { assert(false); } #else - // FIXME: Currently we call write barrier with the src object (parent). - // This works fine for object barrier for generational plans (such as stickyimmix), which does not use the target object at all. - // But for other MMTk plans, we need to be careful. const bool INLINE_WRITE_BARRIER = true; if (CI->getCalledOperand() == write_barrier_func || CI->getCalledOperand() == write_barrier_binding_func) { if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) { @@ -2572,10 +2569,37 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) { SmallVector Weights{1, 9}; auto mayTriggerSlowpath = SplitBlockAndInsertIfThen(is_unlogged, CI, false, MDB.createBranchWeights(Weights)); builder.SetInsertPoint(mayTriggerSlowpath); + + // for binding write barrier, we also set gc bits to 2 (see mmtk_gc_wb_binding) + if (CI->getCalledOperand() == write_barrier_binding_func) { + auto tag = EmitLoadTag(builder, parent); + auto cleared_bits = builder.CreateAnd(tag, ConstantInt::get(T_size, ~0x3)); + auto new_tag = builder.CreateOr(cleared_bits, ConstantInt::get(T_size, 2)); + auto store = builder.CreateAlignedStore(new_tag, EmitTagPtr(builder, T_size, parent), Align(sizeof(size_t))); + store->setOrdering(AtomicOrdering::Unordered); + store->setMetadata(LLVMContext::MD_tbaa, tbaa_tag); + } + + // We just need the src object (parent) builder.CreateCall(getOrDeclare(jl_intrinsics::writeBarrier1Slow), { parent }); } else { - Function *wb_func = getOrDeclare(jl_intrinsics::writeBarrier1); - builder.CreateCall(wb_func, { parent }); + // Do not inlie write barrier -- just call into each function. + // For object remembering barrier, we just need the src object (parent) + if (CI->getCalledOperand() == write_barrier_func) { + Function *wb_func = getOrDeclare(jl_intrinsics::writeBarrier1); + builder.CreateCall(wb_func, { parent }); + } else { + assert(CI->getCalledOperand() == write_barrier_binding_func); + assert(CI->arg_size() == 2); + auto val = CI->getArgOperand(1); + Function *wb_func = getOrDeclare(jl_intrinsics::writeBarrierBinding); + builder.CreateCall(wb_func, { parent, val }); + } + } + } else { + if (MMTK_NEEDS_WRITE_BARRIER != 0) { + jl_printf(JL_STDERR, "ERROR: only object barrier fastpath is implemented"); + assert(false); } } } else { diff --git a/src/llvm-pass-helpers.cpp b/src/llvm-pass-helpers.cpp index 3b75c5b78cbb4..73d765f61e856 100644 --- a/src/llvm-pass-helpers.cpp +++ b/src/llvm-pass-helpers.cpp @@ -123,6 +123,7 @@ namespace jl_intrinsics { #ifdef MMTK_GC static const char *WRITE_BARRIER_1_NAME = "julia.write_barrier1_noinline"; static const char *WRITE_BARRIER_2_NAME = "julia.write_barrier2_noinline"; + static const char *WRITE_BARRIER_BINDING_NAME = "julia.write_barrier_binding_noinline"; static const char *WRITE_BARRIER_1_SLOW_NAME = "julia.write_barrier_1_slow"; static const char *WRITE_BARRIER_2_SLOW_NAME = "julia.write_barrier_2_slow"; #endif @@ -281,6 +282,20 @@ namespace jl_intrinsics { intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); return intrinsic; }); + const IntrinsicDescription writeBarrierBinding( + WRITE_BARRIER_BINDING_NAME, + [](const JuliaPassContext &context) { + auto T_prjlvalue = JuliaType::get_prjlvalue_ty(context.getLLVMContext()); + auto intrinsic = Function::Create( + FunctionType::get( + Type::getVoidTy(context.getLLVMContext()), + { T_prjlvalue, T_prjlvalue }, + false), + Function::ExternalLinkage, + WRITE_BARRIER_BINDING_NAME); + intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); + return intrinsic; + }); const IntrinsicDescription writeBarrier1Slow( WRITE_BARRIER_1_SLOW_NAME, [](const JuliaPassContext &context) { @@ -321,6 +336,7 @@ namespace jl_well_known { #ifdef MMTK_GC static const char *GC_WB_1_NAME = XSTR(jl_gc_wb1_noinline); static const char *GC_WB_2_NAME = XSTR(jl_gc_wb2_noinline); + static const char *GC_WB_BINDING_NAME = XSTR(jl_gc_wb_binding_noinline); static const char *GC_WB_1_SLOW_NAME = XSTR(jl_gc_wb1_slow); static const char *GC_WB_2_SLOW_NAME = XSTR(jl_gc_wb2_slow); #endif @@ -438,6 +454,21 @@ namespace jl_well_known { return func; }); + const WellKnownFunctionDescription GCWriteBarrierBinding( + GC_WB_BINDING_NAME, + [](const JuliaPassContext &context) { + auto T_prjlvalue = JuliaType::get_prjlvalue_ty(context.getLLVMContext()); + auto func = Function::Create( + FunctionType::get( + Type::getVoidTy(context.getLLVMContext()), + { T_prjlvalue, T_prjlvalue }, + false), + Function::ExternalLinkage, + GC_WB_BINDING_NAME); + func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); + return func; + }); + const WellKnownFunctionDescription GCWriteBarrier1Slow( GC_WB_1_SLOW_NAME, [](const JuliaPassContext &context) { diff --git a/src/llvm-pass-helpers.h b/src/llvm-pass-helpers.h index 155629c9cab63..21aaed8e9ba75 100644 --- a/src/llvm-pass-helpers.h +++ b/src/llvm-pass-helpers.h @@ -136,6 +136,7 @@ namespace jl_intrinsics { #ifdef MMTK_GC extern const IntrinsicDescription writeBarrier1; extern const IntrinsicDescription writeBarrier2; + extern const IntrinsicDescription writeBarrierBinding; extern const IntrinsicDescription writeBarrier1Slow; extern const IntrinsicDescription writeBarrier2Slow; #endif @@ -169,6 +170,7 @@ namespace jl_well_known { #ifdef MMTK_GC extern const WellKnownFunctionDescription GCWriteBarrier1; extern const WellKnownFunctionDescription GCWriteBarrier2; + extern const WellKnownFunctionDescription GCWriteBarrierBinding; extern const WellKnownFunctionDescription GCWriteBarrier1Slow; extern const WellKnownFunctionDescription GCWriteBarrier2Slow; #endif diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index 8e888f57cf449..87da49cc2fa4d 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -240,6 +240,7 @@ JL_DLLEXPORT void jl_gc_queue_multiroot(const jl_value_t *parent, const jl_value JL_DLLEXPORT void jl_gc_queue_binding(jl_binding_t *bnd) { + mmtk_unreachable(); } @@ -565,6 +566,11 @@ JL_DLLEXPORT void jl_gc_wb2_noinline(const void *parent, const void *ptr) JL_NOT jl_gc_wb(parent, ptr); } +JL_DLLEXPORT void jl_gc_wb_binding_noinline(const void *bnd, const void *val) JL_NOTSAFEPOINT +{ + jl_gc_wb_binding((jl_binding_t*)bnd, (void*)val); +} + JL_DLLEXPORT void jl_gc_wb1_slow(const void *parent) JL_NOTSAFEPOINT { jl_task_t *ct = jl_current_task;