Skip to content

Commit

Permalink
Fix binding write barrier for code gen (#34)
Browse files Browse the repository at this point in the history
This PR makes a few changes about write barrier for bindings.
* Move the code that sets gc bits to 2 in the binding write barrier into the slowpath
* Generate code to set gc bits for binding write barrier.
* Add `unreachable()` in a few `jl_gc_wb` methods in Julia to make sure they won't be accidentally called (Julia's GC does not inline write barrier, and won't need them).
  • Loading branch information
qinsoon authored Jan 31, 2024
1 parent dc7b07e commit 874179e
Show file tree
Hide file tree
Showing 9 changed files with 123 additions and 18 deletions.
9 changes: 9 additions & 0 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -3343,18 +3343,27 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p)

JL_DLLEXPORT void jl_gc_wb1_noinline(const void *parent) JL_NOTSAFEPOINT
{
jl_unreachable();
}

JL_DLLEXPORT void jl_gc_wb2_noinline(const void *parent, const void *ptr) JL_NOTSAFEPOINT
{
jl_unreachable();
}

JL_DLLEXPORT void jl_gc_wb_binding_noinline(const void *parent, const void *ptr) JL_NOTSAFEPOINT
{
jl_unreachable();
}

JL_DLLEXPORT void jl_gc_wb1_slow(const void *parent) JL_NOTSAFEPOINT
{
jl_unreachable();
}

JL_DLLEXPORT void jl_gc_wb2_slow(const void *parent, const void* ptr) JL_NOTSAFEPOINT
{
jl_unreachable();
}

#ifdef __cplusplus
Expand Down
1 change: 1 addition & 0 deletions src/jl_exported_funcs.inc
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@
XX(jl_gc_queue_root) \
XX(jl_gc_wb1_noinline) \
XX(jl_gc_wb2_noinline) \
XX(jl_gc_wb_binding_noinline) \
XX(jl_gc_wb1_slow) \
XX(jl_gc_wb2_slow) \
XX(jl_gc_safepoint) \
Expand Down
35 changes: 27 additions & 8 deletions src/julia.h
Original file line number Diff line number Diff line change
Expand Up @@ -965,6 +965,7 @@ STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_
}
#else // MMTK_GC
STATIC_INLINE void mmtk_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT;
STATIC_INLINE void mmtk_gc_wb_binding(const void *parent, const void *ptr) JL_NOTSAFEPOINT;

STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT
{
Expand Down Expand Up @@ -2317,25 +2318,43 @@ STATIC_INLINE void mmtk_gc_wb_full(const void *parent, const void *ptr) JL_NOTSA
mmtk_object_reference_write_post(&ptls->mmtk_mutator, parent, ptr);
}

// Inlined fastpath
STATIC_INLINE void mmtk_gc_wb_fast(const void *parent, const void *ptr) JL_NOTSAFEPOINT
// Fastpath. Return 1 if we should go to slowpath
STATIC_INLINE int mmtk_gc_wb_fast_check(const void *parent, const void *ptr) JL_NOTSAFEPOINT
{
if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) {
intptr_t addr = (intptr_t) (void*) parent;
uint8_t* meta_addr = (uint8_t*) (MMTK_SIDE_LOG_BIT_BASE_ADDRESS) + (addr >> 6);
intptr_t shift = (addr >> 3) & 0b111;
uint8_t byte_val = *meta_addr;
if (((byte_val >> shift) & 1) == 1) {
jl_task_t *ct = jl_current_task;
jl_ptls_t ptls = ct->ptls;
mmtk_object_reference_write_slow(&ptls->mmtk_mutator, parent, ptr);
}
return ((byte_val >> shift) & 1) == 1;
} else {
return 0;
}
}

// Slowpath.
STATIC_INLINE void mmtk_gc_wb_slow(const void *parent, const void *ptr) JL_NOTSAFEPOINT
{
if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) {
jl_task_t *ct = jl_current_task;
jl_ptls_t ptls = ct->ptls;
mmtk_object_reference_write_slow(&ptls->mmtk_mutator, parent, ptr);
}
}

STATIC_INLINE void mmtk_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT
{
mmtk_gc_wb_fast(parent, ptr);
if (mmtk_gc_wb_fast_check(parent, ptr)) {
mmtk_gc_wb_slow(parent, ptr);
}
}

STATIC_INLINE void mmtk_gc_wb_binding(const void *bnd, const void *val) JL_NOTSAFEPOINT
{
if (mmtk_gc_wb_fast_check(bnd, val)) {
jl_astaggedvalue(bnd)->bits.gc = 2; // to indicate that the buffer is a binding
mmtk_gc_wb_slow(bnd, val);
}
}

#define MMTK_MIN_ALIGNMENT 4
Expand Down
3 changes: 1 addition & 2 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -624,8 +624,7 @@ STATIC_INLINE void jl_gc_wb_buf(void *parent, void *bufptr, size_t minsz) JL_NOT

STATIC_INLINE void jl_gc_wb_binding(jl_binding_t *bnd, void *val) JL_NOTSAFEPOINT // val isa jl_value_t*
{
jl_astaggedvalue(bnd)->bits.gc = 2; // to indicate that the buffer is a binding
mmtk_gc_wb(bnd, val);
mmtk_gc_wb_binding(bnd, val);
}

STATIC_INLINE void jl_gc_wb_buf(void *parent, void *bufptr, size_t minsz) JL_NOTSAFEPOINT // parent isa jl_value_t*
Expand Down
20 changes: 17 additions & 3 deletions src/llvm-final-gc-lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ struct FinalLowerGC: private JuliaPassContext {
#ifdef MMTK_GC
Function *writeBarrier1Func;
Function *writeBarrier2Func;
Function *writeBarrierBindingFunc;
Function *writeBarrier1SlowFunc;
Function *writeBarrier2SlowFunc;
#endif
Expand Down Expand Up @@ -86,6 +87,7 @@ struct FinalLowerGC: private JuliaPassContext {
#ifdef MMTK_GC
Value *lowerWriteBarrier1(CallInst *target, Function &F);
Value *lowerWriteBarrier2(CallInst *target, Function &F);
Value *lowerWriteBarrierBinding(CallInst *target, Function &F);
Value *lowerWriteBarrier1Slow(CallInst *target, Function &F);
Value *lowerWriteBarrier2Slow(CallInst *target, Function &F);
#endif
Expand Down Expand Up @@ -246,6 +248,13 @@ Value *FinalLowerGC::lowerWriteBarrier2(CallInst *target, Function &F)
return target;
}

Value *FinalLowerGC::lowerWriteBarrierBinding(CallInst *target, Function &F)
{
assert(target->arg_size() == 2);
target->setCalledFunction(writeBarrierBindingFunc);
return target;
}

Value *FinalLowerGC::lowerWriteBarrier1Slow(CallInst *target, Function &F)
{
assert(target->arg_size() == 1);
Expand Down Expand Up @@ -400,9 +409,10 @@ bool FinalLowerGC::doInitialization(Module &M) {
#ifdef MMTK_GC
writeBarrier1Func = getOrDeclare(jl_well_known::GCWriteBarrier1);
writeBarrier2Func = getOrDeclare(jl_well_known::GCWriteBarrier2);
writeBarrierBindingFunc = getOrDeclare(jl_well_known::GCWriteBarrierBinding);
writeBarrier1SlowFunc = getOrDeclare(jl_well_known::GCWriteBarrier1Slow);
writeBarrier2SlowFunc = getOrDeclare(jl_well_known::GCWriteBarrier2Slow);
GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, writeBarrier1Func, writeBarrier2Func, writeBarrier1SlowFunc, writeBarrier2SlowFunc};
GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, writeBarrier1Func, writeBarrier2Func, writeBarrierBindingFunc, writeBarrier1SlowFunc, writeBarrier2SlowFunc};
#else
GlobalValue *functionList[] = {queueRootFunc, queueBindingFunc, poolAllocFunc, bigAllocFunc, allocTypedFunc};
#endif
Expand All @@ -422,8 +432,8 @@ bool FinalLowerGC::doInitialization(Module &M) {
bool FinalLowerGC::doFinalization(Module &M)
{
#ifdef MMTK_GC
GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, writeBarrier1Func, writeBarrier2Func, writeBarrier1SlowFunc, writeBarrier2SlowFunc};
queueRootFunc = poolAllocFunc = bigAllocFunc = writeBarrier1Func = writeBarrier2Func = writeBarrier1SlowFunc = writeBarrier2SlowFunc = nullptr;
GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, writeBarrier1Func, writeBarrier2Func, writeBarrierBindingFunc, writeBarrier1SlowFunc, writeBarrier2SlowFunc};
queueRootFunc = poolAllocFunc = bigAllocFunc = writeBarrier1Func = writeBarrier2Func = writeBarrierBindingFunc = writeBarrier1SlowFunc = writeBarrier2SlowFunc = nullptr;
#else
GlobalValue *functionList[] = {queueRootFunc, queueBindingFunc, poolAllocFunc, bigAllocFunc, allocTypedFunc};
queueRootFunc = queueBindingFunc = poolAllocFunc = bigAllocFunc = allocTypedFunc = nullptr;
Expand Down Expand Up @@ -501,6 +511,7 @@ bool FinalLowerGC::runOnFunction(Function &F)
#ifdef MMTK_GC
auto writeBarrier1Func = getOrNull(jl_intrinsics::writeBarrier1);
auto writeBarrier2Func = getOrNull(jl_intrinsics::writeBarrier2);
auto writeBarrierBindingFunc = getOrNull(jl_intrinsics::writeBarrierBinding);
auto writeBarrier1SlowFunc = getOrNull(jl_intrinsics::writeBarrier1Slow);
auto writeBarrier2SlowFunc = getOrNull(jl_intrinsics::writeBarrier2Slow);
#endif
Expand Down Expand Up @@ -545,6 +556,9 @@ bool FinalLowerGC::runOnFunction(Function &F)
else if (callee == writeBarrier2Func) {
replaceInstruction(CI, lowerWriteBarrier2(CI, F), it);
}
else if (callee == writeBarrierBindingFunc) {
replaceInstruction(CI, lowerWriteBarrierBinding(CI, F), it);
}
else if (callee == writeBarrier1SlowFunc) {
replaceInstruction(CI, lowerWriteBarrier1Slow(CI, F), it);
}
Expand Down
34 changes: 29 additions & 5 deletions src/llvm-late-gc-lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2535,9 +2535,6 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
assert(false);
}
#else
// FIXME: Currently we call write barrier with the src object (parent).
// This works fine for object barrier for generational plans (such as stickyimmix), which does not use the target object at all.
// But for other MMTk plans, we need to be careful.
const bool INLINE_WRITE_BARRIER = true;
if (CI->getCalledOperand() == write_barrier_func || CI->getCalledOperand() == write_barrier_binding_func) {
if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) {
Expand Down Expand Up @@ -2572,10 +2569,37 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
SmallVector<uint32_t, 2> Weights{1, 9};
auto mayTriggerSlowpath = SplitBlockAndInsertIfThen(is_unlogged, CI, false, MDB.createBranchWeights(Weights));
builder.SetInsertPoint(mayTriggerSlowpath);

// for binding write barrier, we also set gc bits to 2 (see mmtk_gc_wb_binding)
if (CI->getCalledOperand() == write_barrier_binding_func) {
auto tag = EmitLoadTag(builder, parent);
auto cleared_bits = builder.CreateAnd(tag, ConstantInt::get(T_size, ~0x3));
auto new_tag = builder.CreateOr(cleared_bits, ConstantInt::get(T_size, 2));
auto store = builder.CreateAlignedStore(new_tag, EmitTagPtr(builder, T_size, parent), Align(sizeof(size_t)));
store->setOrdering(AtomicOrdering::Unordered);
store->setMetadata(LLVMContext::MD_tbaa, tbaa_tag);
}

// We just need the src object (parent)
builder.CreateCall(getOrDeclare(jl_intrinsics::writeBarrier1Slow), { parent });
} else {
Function *wb_func = getOrDeclare(jl_intrinsics::writeBarrier1);
builder.CreateCall(wb_func, { parent });
// Do not inlie write barrier -- just call into each function.
// For object remembering barrier, we just need the src object (parent)
if (CI->getCalledOperand() == write_barrier_func) {
Function *wb_func = getOrDeclare(jl_intrinsics::writeBarrier1);
builder.CreateCall(wb_func, { parent });
} else {
assert(CI->getCalledOperand() == write_barrier_binding_func);
assert(CI->arg_size() == 2);
auto val = CI->getArgOperand(1);
Function *wb_func = getOrDeclare(jl_intrinsics::writeBarrierBinding);
builder.CreateCall(wb_func, { parent, val });
}
}
} else {
if (MMTK_NEEDS_WRITE_BARRIER != 0) {
jl_printf(JL_STDERR, "ERROR: only object barrier fastpath is implemented");
assert(false);
}
}
} else {
Expand Down
31 changes: 31 additions & 0 deletions src/llvm-pass-helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ namespace jl_intrinsics {
#ifdef MMTK_GC
static const char *WRITE_BARRIER_1_NAME = "julia.write_barrier1_noinline";
static const char *WRITE_BARRIER_2_NAME = "julia.write_barrier2_noinline";
static const char *WRITE_BARRIER_BINDING_NAME = "julia.write_barrier_binding_noinline";
static const char *WRITE_BARRIER_1_SLOW_NAME = "julia.write_barrier_1_slow";
static const char *WRITE_BARRIER_2_SLOW_NAME = "julia.write_barrier_2_slow";
#endif
Expand Down Expand Up @@ -281,6 +282,20 @@ namespace jl_intrinsics {
intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
return intrinsic;
});
const IntrinsicDescription writeBarrierBinding(
WRITE_BARRIER_BINDING_NAME,
[](const JuliaPassContext &context) {
auto T_prjlvalue = JuliaType::get_prjlvalue_ty(context.getLLVMContext());
auto intrinsic = Function::Create(
FunctionType::get(
Type::getVoidTy(context.getLLVMContext()),
{ T_prjlvalue, T_prjlvalue },
false),
Function::ExternalLinkage,
WRITE_BARRIER_BINDING_NAME);
intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
return intrinsic;
});
const IntrinsicDescription writeBarrier1Slow(
WRITE_BARRIER_1_SLOW_NAME,
[](const JuliaPassContext &context) {
Expand Down Expand Up @@ -321,6 +336,7 @@ namespace jl_well_known {
#ifdef MMTK_GC
static const char *GC_WB_1_NAME = XSTR(jl_gc_wb1_noinline);
static const char *GC_WB_2_NAME = XSTR(jl_gc_wb2_noinline);
static const char *GC_WB_BINDING_NAME = XSTR(jl_gc_wb_binding_noinline);
static const char *GC_WB_1_SLOW_NAME = XSTR(jl_gc_wb1_slow);
static const char *GC_WB_2_SLOW_NAME = XSTR(jl_gc_wb2_slow);
#endif
Expand Down Expand Up @@ -438,6 +454,21 @@ namespace jl_well_known {
return func;
});

const WellKnownFunctionDescription GCWriteBarrierBinding(
GC_WB_BINDING_NAME,
[](const JuliaPassContext &context) {
auto T_prjlvalue = JuliaType::get_prjlvalue_ty(context.getLLVMContext());
auto func = Function::Create(
FunctionType::get(
Type::getVoidTy(context.getLLVMContext()),
{ T_prjlvalue, T_prjlvalue },
false),
Function::ExternalLinkage,
GC_WB_BINDING_NAME);
func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
return func;
});

const WellKnownFunctionDescription GCWriteBarrier1Slow(
GC_WB_1_SLOW_NAME,
[](const JuliaPassContext &context) {
Expand Down
2 changes: 2 additions & 0 deletions src/llvm-pass-helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ namespace jl_intrinsics {
#ifdef MMTK_GC
extern const IntrinsicDescription writeBarrier1;
extern const IntrinsicDescription writeBarrier2;
extern const IntrinsicDescription writeBarrierBinding;
extern const IntrinsicDescription writeBarrier1Slow;
extern const IntrinsicDescription writeBarrier2Slow;
#endif
Expand Down Expand Up @@ -169,6 +170,7 @@ namespace jl_well_known {
#ifdef MMTK_GC
extern const WellKnownFunctionDescription GCWriteBarrier1;
extern const WellKnownFunctionDescription GCWriteBarrier2;
extern const WellKnownFunctionDescription GCWriteBarrierBinding;
extern const WellKnownFunctionDescription GCWriteBarrier1Slow;
extern const WellKnownFunctionDescription GCWriteBarrier2Slow;
#endif
Expand Down
6 changes: 6 additions & 0 deletions src/mmtk-gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ JL_DLLEXPORT void jl_gc_queue_multiroot(const jl_value_t *parent, const jl_value

JL_DLLEXPORT void jl_gc_queue_binding(jl_binding_t *bnd)
{
mmtk_unreachable();
}


Expand Down Expand Up @@ -565,6 +566,11 @@ JL_DLLEXPORT void jl_gc_wb2_noinline(const void *parent, const void *ptr) JL_NOT
jl_gc_wb(parent, ptr);
}

JL_DLLEXPORT void jl_gc_wb_binding_noinline(const void *bnd, const void *val) JL_NOTSAFEPOINT
{
jl_gc_wb_binding((jl_binding_t*)bnd, (void*)val);
}

JL_DLLEXPORT void jl_gc_wb1_slow(const void *parent) JL_NOTSAFEPOINT
{
jl_task_t *ct = jl_current_task;
Expand Down

0 comments on commit 874179e

Please sign in to comment.