Skip to content

Setting up post alloc function to set VO bit #59

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jul 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Make.inc
Original file line number Diff line number Diff line change
Expand Up @@ -767,6 +767,10 @@ ifeq (${MMTK_PLAN},StickyImmix)
JCXXFLAGS += -DMMTK_PLAN_STICKYIMMIX
JCFLAGS += -DMMTK_PLAN_STICKYIMMIX
endif
ifeq (${MMTK_CONSERVATIVE},1)
JCXXFLAGS += -DMMTK_CONSERVATIVE_SCAN
JCFLAGS += -DMMTK_CONSERVATIVE_SCAN
endif
MMTK_DIR = ${MMTK_JULIA_DIR}/mmtk
MMTK_API_INC = $(MMTK_DIR)/api
MMTK_JULIA_INC = ${MMTK_JULIA_DIR}/julia
Expand Down
8 changes: 8 additions & 0 deletions src/datatype.c
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,10 @@ static jl_datatype_layout_t *jl_get_layout(uint32_t sz,
if ((void*)ret == HT_NOTFOUND) {
if (!should_malloc) {
char *perm_mem = (char *)jl_gc_perm_alloc(flddesc_sz, 0, 4, 0);
#ifdef MMTK_GC
jl_ptls_t ptls = jl_current_task->ptls;
mmtk_immortal_post_alloc_fast(&ptls->mmtk_mutator, jl_valueof(perm_mem), flddesc_sz);
#endif
assert(perm_mem);
ret = (jl_datatype_layout_t *)perm_mem;
memcpy(perm_mem, flddesc, flddesc_sz);
Expand Down Expand Up @@ -815,6 +819,10 @@ JL_DLLEXPORT jl_datatype_t * jl_new_foreign_type(jl_sym_t *name,
jl_datatype_layout_t *layout = (jl_datatype_layout_t *)
jl_gc_perm_alloc(sizeof(jl_datatype_layout_t) + sizeof(jl_fielddescdyn_t),
0, 4, 0);
#ifdef MMTK_GC
jl_ptls_t ptls = jl_current_task->ptls;
mmtk_immortal_post_alloc_fast(&ptls->mmtk_mutator, jl_valueof(layout), sizeof(jl_datatype_layout_t) + sizeof(jl_fielddescdyn_t));
#endif
layout->size = large ? GC_MAX_SZCLASS+1 : 0;
layout->nfields = 0;
layout->alignment = sizeof(void *);
Expand Down
5 changes: 5 additions & 0 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -3578,6 +3578,11 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p)
return NULL;
}

JL_DLLEXPORT void jl_gc_post_alloc_slow(void* obj, int size) JL_NOTSAFEPOINT
{
jl_unreachable();
}

JL_DLLEXPORT void jl_gc_wb1_noinline(const void *parent) JL_NOTSAFEPOINT
{
jl_unreachable();
Expand Down
1 change: 1 addition & 0 deletions src/jl_exported_funcs.inc
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@
XX(jl_gc_wb_binding_noinline) \
XX(jl_gc_wb1_slow) \
XX(jl_gc_wb2_slow) \
XX(jl_gc_post_alloc_slow) \
XX(jl_gc_safepoint) \
XX(jl_gc_schedule_foreign_sweepfunc) \
XX(jl_gc_set_cb_notify_external_alloc) \
Expand Down
33 changes: 31 additions & 2 deletions src/julia.h
Original file line number Diff line number Diff line change
Expand Up @@ -2438,8 +2438,11 @@ extern JL_DLLEXPORT int jl_default_debug_info_kind;
extern void mmtk_object_reference_write_post(void* mutator, const void* parent, const void* ptr);
extern void mmtk_object_reference_write_slow(void* mutator, const void* parent, const void* ptr);
extern void* mmtk_alloc(void* mutator, size_t size, size_t align, size_t offset, int allocator);
extern void mmtk_post_alloc(void* mutator, void* refer, size_t bytes, int allocator);


extern const void* MMTK_SIDE_LOG_BIT_BASE_ADDRESS;
extern const void* MMTK_SIDE_VO_BIT_BASE_ADDRESS;

// These need to be constants.

Expand All @@ -2452,6 +2455,12 @@ extern const void* MMTK_SIDE_LOG_BIT_BASE_ADDRESS;
#define MMTK_NEEDS_WRITE_BARRIER (1)
#endif

#ifdef MMTK_CONSERVATIVE_SCAN
#define MMTK_NEEDS_VO_BIT (1)
#else
#define MMTK_NEEDS_VO_BIT (0)
#endif

#define MMTK_DEFAULT_IMMIX_ALLOCATOR (0)
#define MMTK_IMMORTAL_BUMP_ALLOCATOR (0)

Expand Down Expand Up @@ -2526,8 +2535,23 @@ STATIC_INLINE void* mmtk_immix_alloc_fast(MMTkMutatorContext* mutator, size_t si
return bump_alloc_fast(mutator, (uintptr_t*)&allocator->cursor, (intptr_t)allocator->limit, size, align, offset, 0);
}

STATIC_INLINE void mmtk_immix_post_alloc_slow(MMTkMutatorContext* mutator, void* obj, size_t size) {
mmtk_post_alloc(mutator, obj, size, 0);
}

STATIC_INLINE void mmtk_set_vo_bit(void* obj) {
intptr_t addr = (intptr_t) obj;
intptr_t shift = (addr >> 3) & 0b111;
uint8_t* vo_meta_addr = (uint8_t*) (MMTK_SIDE_VO_BIT_BASE_ADDRESS) + (addr >> 6);
uint8_t new_val = (*vo_meta_addr) | (1 << shift);
(*vo_meta_addr) = new_val;
}

STATIC_INLINE void mmtk_immix_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) {
// We do not need post alloc for immix objects in immix/stickyimmix
if (MMTK_NEEDS_VO_BIT) {
// set VO bit
mmtk_set_vo_bit(obj);
}
}

STATIC_INLINE void* mmtk_immortal_alloc_fast(MMTkMutatorContext* mutator, size_t size, size_t align, size_t offset) {
Expand All @@ -2536,10 +2560,15 @@ STATIC_INLINE void* mmtk_immortal_alloc_fast(MMTkMutatorContext* mutator, size_t
}

STATIC_INLINE void mmtk_immortal_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) {
if (MMTK_NEEDS_VO_BIT) {
// set VO bit
mmtk_set_vo_bit(obj);
}

if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) {
intptr_t addr = (intptr_t) obj;
uint8_t* meta_addr = (uint8_t*) (MMTK_SIDE_LOG_BIT_BASE_ADDRESS) + (addr >> 6);
intptr_t shift = (addr >> 3) & 0b111;
uint8_t* meta_addr = (uint8_t*) (MMTK_SIDE_LOG_BIT_BASE_ADDRESS) + (addr >> 6);
while(1) {
uint8_t old_val = *meta_addr;
uint8_t new_val = old_val | (1 << shift);
Expand Down
47 changes: 44 additions & 3 deletions src/llvm-final-gc-lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ struct FinalLowerGC: private JuliaPassContext {
Function *writeBarrierBindingFunc;
Function *writeBarrier1SlowFunc;
Function *writeBarrier2SlowFunc;
Function *postAllocSlowFunc;
#endif
Instruction *pgcstack;

Expand Down Expand Up @@ -373,6 +374,45 @@ Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)

auto v_raw = builder.CreateNSWAdd(result, ConstantInt::get(Type::getInt64Ty(target->getContext()), sizeof(jl_taggedvalue_t)));
auto v_as_ptr = builder.CreateIntToPtr(v_raw, poolAllocFunc->getReturnType());

if (MMTK_NEEDS_VO_BIT) {
// Should we generate fastpath post alloc sequence here?
// Setting this to false will increase allocation overhead a lot, and should only be used for debugging.
const bool INLINE_FASTPATH_POST_ALLOCATION = true;

// set VO bit
if (INLINE_FASTPATH_POST_ALLOCATION) {
auto intptr_ty = Type::getInt64Ty(target->getContext());
auto i8_ty = Type::getInt8Ty(F.getContext());
intptr_t metadata_base_address = reinterpret_cast<intptr_t>(MMTK_SIDE_VO_BIT_BASE_ADDRESS);
auto metadata_base_val = ConstantInt::get(intptr_ty, metadata_base_address);
auto metadata_base_ptr = ConstantExpr::getIntToPtr(metadata_base_val, PointerType::get(i8_ty, 0));

// intptr_t addr = (intptr_t) v;
auto addr = v_raw;

// uint8_t* vo_meta_addr = (uint8_t*) (MMTK_SIDE_VO_BIT_BASE_ADDRESS) + (addr >> 6);
auto shr = builder.CreateLShr(addr, ConstantInt::get(intptr_ty, 6));
auto metadata_ptr = builder.CreateGEP(i8_ty, metadata_base_ptr, shr);

// intptr_t shift = (addr >> 3) & 0b111;
auto shift = builder.CreateAnd(builder.CreateLShr(addr, ConstantInt::get(intptr_ty, 3)), ConstantInt::get(intptr_ty, 7));

// uint8_t byte_val = *vo_meta_addr;
auto byte_val = builder.CreateAlignedLoad(i8_ty, metadata_ptr, Align());

// uint8_t new_val = byte_val | (1 << shift);
auto shifted_val = builder.CreateShl(ConstantInt::get(intptr_ty, 1), shift);
auto shifted_val_i8 = builder.CreateTruncOrBitCast(shifted_val, i8_ty);
auto new_val = builder.CreateOr(byte_val, shifted_val_i8);

// (*vo_meta_addr) = new_val;
builder.CreateStore(new_val, metadata_ptr);
} else {
builder.CreateCall(postAllocSlowFunc, { v_as_ptr, pool_osize_i32 });
}
}

builder.CreateBr(top_cont);

phiNode->addIncoming(new_call, slowpath);
Expand Down Expand Up @@ -416,7 +456,8 @@ bool FinalLowerGC::doInitialization(Module &M) {
writeBarrierBindingFunc = getOrDeclare(jl_well_known::GCWriteBarrierBinding);
writeBarrier1SlowFunc = getOrDeclare(jl_well_known::GCWriteBarrier1Slow);
writeBarrier2SlowFunc = getOrDeclare(jl_well_known::GCWriteBarrier2Slow);
GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, gcPreserveBeginHookFunc, gcPreserveEndHookFunc, writeBarrier1Func, writeBarrier2Func, writeBarrierBindingFunc, writeBarrier1SlowFunc, writeBarrier2SlowFunc};
postAllocSlowFunc = getOrDeclare(jl_well_known::GCPostAllocSlow);
GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, gcPreserveBeginHookFunc, gcPreserveEndHookFunc, writeBarrier1Func, writeBarrier2Func, writeBarrierBindingFunc, writeBarrier1SlowFunc, writeBarrier2SlowFunc, postAllocSlowFunc};
#else
GlobalValue *functionList[] = {queueRootFunc, queueBindingFunc, poolAllocFunc, bigAllocFunc, allocTypedFunc};
#endif
Expand All @@ -436,8 +477,8 @@ bool FinalLowerGC::doInitialization(Module &M) {
bool FinalLowerGC::doFinalization(Module &M)
{
#ifdef MMTK_GC
GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, gcPreserveBeginHookFunc, gcPreserveEndHookFunc, writeBarrier1Func, writeBarrier2Func, writeBarrierBindingFunc, writeBarrier1SlowFunc, writeBarrier2SlowFunc};
queueRootFunc = poolAllocFunc = bigAllocFunc = gcPreserveBeginHookFunc = gcPreserveEndHookFunc = writeBarrier1Func = writeBarrier2Func = writeBarrierBindingFunc = writeBarrier1SlowFunc = writeBarrier2SlowFunc = nullptr;
GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, gcPreserveBeginHookFunc, gcPreserveEndHookFunc, writeBarrier1Func, writeBarrier2Func, writeBarrierBindingFunc, writeBarrier1SlowFunc, writeBarrier2SlowFunc, postAllocSlowFunc};
queueRootFunc = poolAllocFunc = bigAllocFunc = gcPreserveBeginHookFunc = gcPreserveEndHookFunc = writeBarrier1Func = writeBarrier2Func = writeBarrierBindingFunc = writeBarrier1SlowFunc = writeBarrier2SlowFunc = postAllocSlowFunc = nullptr;
#else
GlobalValue *functionList[] = {queueRootFunc, queueBindingFunc, poolAllocFunc, bigAllocFunc, allocTypedFunc};
queueRootFunc = queueBindingFunc = poolAllocFunc = bigAllocFunc = allocTypedFunc = nullptr;
Expand Down
15 changes: 15 additions & 0 deletions src/llvm-pass-helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,7 @@ namespace jl_well_known {
static const char *GC_WB_BINDING_NAME = XSTR(jl_gc_wb_binding_noinline);
static const char *GC_WB_1_SLOW_NAME = XSTR(jl_gc_wb1_slow);
static const char *GC_WB_2_SLOW_NAME = XSTR(jl_gc_wb2_slow);
static const char *GC_POST_ALLOC_SLOW_NAME = XSTR(jl_gc_post_alloc_slow);
#endif

static auto T_size_t(const JuliaPassContext &context) {
Expand Down Expand Up @@ -528,5 +529,19 @@ namespace jl_well_known {
func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
return func;
});

const WellKnownFunctionDescription GCPostAllocSlow (
GC_POST_ALLOC_SLOW_NAME,
[](const JuliaPassContext &context) {
auto func = Function::Create(
FunctionType::get(
Type::getVoidTy(context.getLLVMContext()),
{ context.T_prjlvalue, Type::getInt32Ty(context.getLLVMContext()) },
false),
Function::ExternalLinkage,
GC_POST_ALLOC_SLOW_NAME);
func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
return func;
});
#endif
}
1 change: 1 addition & 0 deletions src/llvm-pass-helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ namespace jl_well_known {
extern const WellKnownFunctionDescription GCWriteBarrierBinding;
extern const WellKnownFunctionDescription GCWriteBarrier1Slow;
extern const WellKnownFunctionDescription GCWriteBarrier2Slow;
extern const WellKnownFunctionDescription GCPostAllocSlow;
#endif
}

Expand Down
6 changes: 6 additions & 0 deletions src/mmtk-gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,12 @@ JL_DLLEXPORT void jl_gc_wb2_slow(const void *parent, const void* ptr) JL_NOTSAFE
mmtk_object_reference_write_slow(&ptls->mmtk_mutator, parent, ptr);
}

JL_DLLEXPORT void jl_gc_post_alloc_slow(void* obj, int size) JL_NOTSAFEPOINT {
jl_task_t *ct = jl_current_task;
jl_ptls_t ptls = ct->ptls;
mmtk_immix_post_alloc_slow(&ptls->mmtk_mutator, obj, size);
}

void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset)
{
jl_ptls_t ptls = jl_current_task->ptls;
Expand Down