From c6a61f7aed8d9776999b4eeb4720f254031c2af8 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Fri, 11 Feb 2022 11:04:07 -0500 Subject: [PATCH 001/114] mixing in-place & copyin --- runtime/gc/hierarchical-heap-collection.c | 770 +++++++++++++--------- 1 file changed, 467 insertions(+), 303 deletions(-) diff --git a/runtime/gc/hierarchical-heap-collection.c b/runtime/gc/hierarchical-heap-collection.c index 78587e02d..72d96322f 100644 --- a/runtime/gc/hierarchical-heap-collection.c +++ b/runtime/gc/hierarchical-heap-collection.c @@ -40,21 +40,26 @@ void unfreezeDisentangledDepthAfter( #endif void tryUnpinOrKeepPinned( - GC_state s, - HM_remembered remElem, - void* rawArgs); + GC_state s, + HM_remembered remElem, + void *rawArgs); + +void LGC_markAndScan(GC_state s, objptr *opp, void *rawArgs); +void unmark(GC_state s, objptr *opp, void *rawArgs); void copySuspect(GC_state s, objptr *opp, void *rawArghh); void forwardObjptrsOfRemembered( - GC_state s, - HM_remembered remElem, - void* rawArgs); + GC_state s, + HM_remembered remElem, + void *rawArgs); + +void unmarkWrapper(GC_state s, HM_remembered remElem, void *args); // void scavengeChunkOfPinnedObject(GC_state s, objptr op, void* rawArgs); #if ASSERT -void checkRememberedEntry(GC_state s, HM_remembered remElem, void* args); +void checkRememberedEntry(GC_state s, HM_remembered remElem, void *args); bool hhContainsChunk(HM_HierarchicalHeap hh, HM_chunk theChunk); #endif @@ -87,68 +92,69 @@ pointer copyObject(pointer p, * @note This function takes as additional arguments the * struct SSATOPredicateArgs */ -struct SSATOPredicateArgs { +struct SSATOPredicateArgs +{ pointer expectedStackPointer; pointer expectedThreadPointer; }; bool skipStackAndThreadObjptrPredicate(GC_state s, pointer p, - void* rawArgs); + void *rawArgs); /************************/ /* Function Definitions */ /************************/ -#if (defined (MLTON_GC_INTERNAL_BASIS)) +#if (defined(MLTON_GC_INTERNAL_BASIS)) #endif /* MLTON_GC_INTERNAL_BASIS */ -#if (defined (MLTON_GC_INTERNAL_FUNCS)) +#if (defined(MLTON_GC_INTERNAL_FUNCS)) -enum LGC_freedChunkType { +enum LGC_freedChunkType +{ LGC_FREED_REMSET_CHUNK, LGC_FREED_STACK_CHUNK, LGC_FREED_NORMAL_CHUNK, LGC_FREED_SUSPECT_CHUNK }; -static const char* LGC_freedChunkTypeToString[] = { - "LGC_FREED_REMSET_CHUNK", - "LGC_FREED_STACK_CHUNK", - "LGC_FREED_NORMAL_CHUNK", - "LGC_FREED_SUSPECT_CHUNK" -}; +static const char *LGC_freedChunkTypeToString[] = { + "LGC_FREED_REMSET_CHUNK", + "LGC_FREED_STACK_CHUNK", + "LGC_FREED_NORMAL_CHUNK", + "LGC_FREED_SUSPECT_CHUNK"}; -struct LGC_chunkInfo { +struct LGC_chunkInfo +{ uint32_t depth; int32_t procNum; uintmax_t collectionNumber; enum LGC_freedChunkType freedType; }; - void LGC_writeFreeChunkInfo( - __attribute__((unused)) GC_state s, - char* infoBuffer, - size_t bufferLen, - void* env) + __attribute__((unused)) GC_state s, + char *infoBuffer, + size_t bufferLen, + void *env) { struct LGC_chunkInfo *info = env; snprintf(infoBuffer, bufferLen, - "freed %s at depth %u by LGC %d:%zu", - LGC_freedChunkTypeToString[info->freedType], - info->depth, - info->procNum, - info->collectionNumber); + "freed %s at depth %u by LGC %d:%zu", + LGC_freedChunkTypeToString[info->freedType], + info->depth, + info->procNum, + info->collectionNumber); } - -uint32_t minDepthWithoutCC(GC_thread thread) { +uint32_t minDepthWithoutCC(GC_thread thread) +{ assert(thread != NULL); assert(thread->hierarchicalHeap != NULL); HM_HierarchicalHeap cursor = thread->hierarchicalHeap; if (cursor->subHeapForCC != NULL) - return thread->currentDepth+1; + return thread->currentDepth + 1; while (cursor->nextAncestor != NULL && cursor->nextAncestor->subHeapForCC == NULL) @@ -166,17 +172,19 @@ uint32_t minDepthWithoutCC(GC_thread thread) { return HM_HH_getDepth(cursor); } -void HM_HHC_collectLocal(uint32_t desiredScope) { +void HM_HHC_collectLocal(uint32_t desiredScope) +{ GC_state s = pthread_getspecific(gcstate_key); GC_thread thread = getThreadCurrent(s); - struct HM_HierarchicalHeap* hh = thread->hierarchicalHeap; + struct HM_HierarchicalHeap *hh = thread->hierarchicalHeap; struct rusage ru_start; struct timespec startTime; struct timespec stopTime; uint64_t oldObjectCopied; - if (NONE == s->controls->collectionType) { + if (NONE == s->controls->collectionType) + { /* collection disabled */ return; } @@ -188,12 +196,13 @@ void HM_HHC_collectLocal(uint32_t desiredScope) { // return; // } - if (s->wsQueueTop == BOGUS_OBJPTR || s->wsQueueBot == BOGUS_OBJPTR) { + if (s->wsQueueTop == BOGUS_OBJPTR || s->wsQueueBot == BOGUS_OBJPTR) + { LOG(LM_HH_COLLECTION, LL_DEBUG, "Skipping collection, deque not registered yet"); return; } - uint64_t topval = *(uint64_t*)objptrToPointer(s->wsQueueTop, NULL); + uint64_t topval = *(uint64_t *)objptrToPointer(s->wsQueueTop, NULL); uint32_t potentialLocalScope = UNPACK_IDX(topval); uint32_t originalLocalScope = pollCurrentLocalScope(s); @@ -212,28 +221,29 @@ void HM_HHC_collectLocal(uint32_t desiredScope) { minOkay = max(minOkay, thread->minLocalCollectionDepth); minOkay = max(minOkay, minNoCC); uint32_t minDepth = originalLocalScope; - while (minDepth > minOkay && tryClaimLocalScope(s)) { + while (minDepth > minOkay && tryClaimLocalScope(s)) + { minDepth--; assert(minDepth == pollCurrentLocalScope(s)); } assert(minDepth == pollCurrentLocalScope(s)); - if ( minDepth == 0 || - minOkay > minDepth || - minDepth > thread->currentDepth ) + if (minDepth == 0 || + minOkay > minDepth || + minDepth > thread->currentDepth) { LOG(LM_HH_COLLECTION, LL_DEBUG, - "Skipping collection:\n" - " minDepth %u\n" - " currentDepth %u\n" - " minNoCC %u\n" - " desiredScope %u\n" - " potentialLocalScope %u\n", - minDepth, - thread->currentDepth, - minNoCC, - desiredScope, - potentialLocalScope); + "Skipping collection:\n" + " minDepth %u\n" + " currentDepth %u\n" + " minNoCC %u\n" + " desiredScope %u\n" + " potentialLocalScope %u\n", + minDepth, + thread->currentDepth, + minNoCC, + desiredScope, + potentialLocalScope); releaseLocalScope(s, originalLocalScope); return; @@ -250,33 +260,33 @@ void HM_HHC_collectLocal(uint32_t desiredScope) { s->cumulativeStatistics->numHHLocalGCs++; /* used needs to be set because the mutator has changed s->stackTop. */ - getStackCurrent(s)->used = sizeofGCStateCurrentStackUsed (s); + getStackCurrent(s)->used = sizeofGCStateCurrentStackUsed(s); getThreadCurrent(s)->exnStack = s->exnStack; assertInvariants(thread); - if (SUPERLOCAL == s->controls->collectionType) { + if (SUPERLOCAL == s->controls->collectionType) + { minDepth = maxDepth; } /* copy roots */ struct ForwardHHObjptrArgs forwardHHObjptrArgs = { - .hh = hh, - .minDepth = minDepth, - .maxDepth = maxDepth, - .toDepth = HM_HH_INVALID_DEPTH, - .fromSpace = NULL, - .toSpace = NULL, - .pinned = NULL, - .containingObject = BOGUS_OBJPTR, - .bytesCopied = 0, - .objectsCopied = 0, - .stacksCopied = 0, - .bytesMoved = 0, - .objectsMoved = 0 - }; + .hh = hh, + .minDepth = minDepth, + .maxDepth = maxDepth, + .toDepth = HM_HH_INVALID_DEPTH, + .fromSpace = NULL, + .toSpace = NULL, + .pinned = NULL, + .containingObject = BOGUS_OBJPTR, + .bytesCopied = 0, + .objectsCopied = 0, + .stacksCopied = 0, + .bytesMoved = 0, + .objectsMoved = 0}; struct GC_foreachObjptrClosure forwardHHObjptrClosure = - {.fun = forwardHHObjptr, .env = &forwardHHObjptrArgs}; + {.fun = forwardHHObjptr, .env = &forwardHHObjptrArgs}; LOG(LM_HH_COLLECTION, LL_INFO, "collecting hh %p (L: %u):\n" @@ -288,7 +298,7 @@ void HM_HHC_collectLocal(uint32_t desiredScope) { " potential local scope is %u -> %u\n" " collection scope is %u -> %u\n", // " lchs %"PRIu64" lcs %"PRIu64, - ((void*)(hh)), + ((void *)(hh)), thread->currentDepth, s->procNumber, s->cumulativeStatistics->numHHLocalGCs, @@ -301,26 +311,30 @@ void HM_HHC_collectLocal(uint32_t desiredScope) { forwardHHObjptrArgs.minDepth, forwardHHObjptrArgs.maxDepth); - struct HM_chunkList pinned[maxDepth+1]; + struct HM_chunkList pinned[maxDepth + 1]; forwardHHObjptrArgs.pinned = &(pinned[0]); - for (uint32_t i = 0; i <= maxDepth; i++) HM_initChunkList(&(pinned[i])); + for (uint32_t i = 0; i <= maxDepth; i++) + HM_initChunkList(&(pinned[i])); - HM_HierarchicalHeap toSpace[maxDepth+1]; + HM_HierarchicalHeap toSpace[maxDepth + 1]; forwardHHObjptrArgs.toSpace = &(toSpace[0]); - for (uint32_t i = 0; i <= maxDepth; i++) toSpace[i] = NULL; + for (uint32_t i = 0; i <= maxDepth; i++) + toSpace[i] = NULL; - HM_HierarchicalHeap fromSpace[maxDepth+1]; + HM_HierarchicalHeap fromSpace[maxDepth + 1]; forwardHHObjptrArgs.fromSpace = &(fromSpace[0]); - for (uint32_t i = 0; i <= maxDepth; i++) fromSpace[i] = NULL; + for (uint32_t i = 0; i <= maxDepth; i++) + fromSpace[i] = NULL; for (HM_HierarchicalHeap cursor = hh; NULL != cursor; - cursor = cursor->nextAncestor) { + cursor = cursor->nextAncestor) + { fromSpace[HM_HH_getDepth(cursor)] = cursor; } /* ===================================================================== * logging */ - size_t sizesBefore[maxDepth+1]; + size_t sizesBefore[maxDepth + 1]; for (uint32_t i = 0; i <= maxDepth; i++) sizesBefore[i] = 0; size_t totalSizeBefore = 0; @@ -405,7 +419,7 @@ void HM_HHC_collectLocal(uint32_t desiredScope) { forwardHHObjptrArgs.toDepth = HM_HH_getDepth(cursor); struct HM_foreachDownptrClosure closure = - {.fun = tryUnpinOrKeepPinned, .env = (void*)&forwardHHObjptrArgs}; + {.fun = tryUnpinOrKeepPinned, .env = (void *)&forwardHHObjptrArgs}; HM_foreachRemembered(s, HM_HH_getRemSet(cursor), &closure); } forwardHHObjptrArgs.toDepth = HM_HH_INVALID_DEPTH; @@ -428,8 +442,9 @@ void HM_HHC_collectLocal(uint32_t desiredScope) { /* ===================================================================== */ - if (needGCTime(s)) { - startTiming (RUSAGE_THREAD, &ru_start); + if (needGCTime(s)) + { + startTiming(RUSAGE_THREAD, &ru_start); } timespec_now(&startTime); @@ -450,12 +465,12 @@ void HM_HHC_collectLocal(uint32_t desiredScope) { &forwardHHObjptrClosure, FALSE); LOG(LM_HH_COLLECTION, LL_DEBUG, - "Copied %"PRIu64" objects from stack", + "Copied %" PRIu64 " objects from stack", forwardHHObjptrArgs.objectsCopied - oldObjectCopied); Trace3(EVENT_COPY, - forwardHHObjptrArgs.bytesCopied, - forwardHHObjptrArgs.objectsCopied, - forwardHHObjptrArgs.stacksCopied); + forwardHHObjptrArgs.bytesCopied, + forwardHHObjptrArgs.objectsCopied, + forwardHHObjptrArgs.stacksCopied); /* forward contents of thread (hence including stack) */ oldObjectCopied = forwardHHObjptrArgs.objectsCopied; @@ -467,26 +482,25 @@ void HM_HHC_collectLocal(uint32_t desiredScope) { &forwardHHObjptrClosure, FALSE); LOG(LM_HH_COLLECTION, LL_DEBUG, - "Copied %"PRIu64" objects from thread", + "Copied %" PRIu64 " objects from thread", forwardHHObjptrArgs.objectsCopied - oldObjectCopied); Trace3(EVENT_COPY, - forwardHHObjptrArgs.bytesCopied, - forwardHHObjptrArgs.objectsCopied, - forwardHHObjptrArgs.stacksCopied); + forwardHHObjptrArgs.bytesCopied, + forwardHHObjptrArgs.objectsCopied, + forwardHHObjptrArgs.stacksCopied); /* forward thread itself */ LOG(LM_HH_COLLECTION, LL_DEBUG, - "Trying to forward current thread %p", - (void*)s->currentThread); + "Trying to forward current thread %p", + (void *)s->currentThread); oldObjectCopied = forwardHHObjptrArgs.objectsCopied; forwardHHObjptr(s, &(s->currentThread), &forwardHHObjptrArgs); LOG(LM_HH_COLLECTION, LL_DEBUG, - (1 == (forwardHHObjptrArgs.objectsCopied - oldObjectCopied)) ? - "Copied thread from GC_state" : "Did not copy thread from GC_state"); + (1 == (forwardHHObjptrArgs.objectsCopied - oldObjectCopied)) ? "Copied thread from GC_state" : "Did not copy thread from GC_state"); Trace3(EVENT_COPY, - forwardHHObjptrArgs.bytesCopied, - forwardHHObjptrArgs.objectsCopied, - forwardHHObjptrArgs.stacksCopied); + forwardHHObjptrArgs.bytesCopied, + forwardHHObjptrArgs.objectsCopied, + forwardHHObjptrArgs.stacksCopied); /* forward contents of deque */ oldObjectCopied = forwardHHObjptrArgs.objectsCopied; @@ -497,12 +511,12 @@ void HM_HHC_collectLocal(uint32_t desiredScope) { &forwardHHObjptrClosure, FALSE); LOG(LM_HH_COLLECTION, LL_DEBUG, - "Copied %"PRIu64" objects from deque", + "Copied %" PRIu64 " objects from deque", forwardHHObjptrArgs.objectsCopied - oldObjectCopied); Trace3(EVENT_COPY, - forwardHHObjptrArgs.bytesCopied, - forwardHHObjptrArgs.objectsCopied, - forwardHHObjptrArgs.stacksCopied); + forwardHHObjptrArgs.bytesCopied, + forwardHHObjptrArgs.objectsCopied, + forwardHHObjptrArgs.stacksCopied); LOG(LM_HH_COLLECTION, LL_DEBUG, "END root copy"); @@ -520,46 +534,52 @@ void HM_HHC_collectLocal(uint32_t desiredScope) { // }; /* off-by-one to prevent underflow */ - uint32_t depth = thread->currentDepth+1; - while (depth > forwardHHObjptrArgs.minDepth) { + uint32_t depth = thread->currentDepth + 1; + while (depth > forwardHHObjptrArgs.minDepth) + { depth--; HM_HierarchicalHeap toSpaceLevel = toSpace[depth]; - if (NULL == toSpaceLevel) { + if (NULL == toSpaceLevel) + { continue; } LOG(LM_HH_COLLECTION, LL_INFO, - "level %"PRIu32": num pinned: %zu", - depth, - HM_numRemembered(HM_HH_getRemSet(toSpaceLevel))); + "level %" PRIu32 ": num pinned: %zu", + depth, + HM_numRemembered(HM_HH_getRemSet(toSpaceLevel))); /* use the remembered (pinned) entries at this level as extra roots */ + /* forward the from-elements of the down-ptrs */ struct HM_foreachDownptrClosure closure = - {.fun = forwardObjptrsOfRemembered, .env = (void*)&forwardHHObjptrArgs}; + {.fun = forwardObjptrsOfRemembered, .env = (void *)&forwardHHObjptrArgs}; HM_foreachRemembered(s, HM_HH_getRemSet(toSpaceLevel), &closure); if (NULL != HM_HH_getChunkList(toSpaceLevel)->firstChunk) { HM_chunkList toSpaceList = HM_HH_getChunkList(toSpaceLevel); HM_forwardHHObjptrsInChunkList( - s, - toSpaceList->firstChunk, - HM_getChunkStart(toSpaceList->firstChunk), - // &skipStackAndThreadObjptrPredicate, - // &ssatoPredicateArgs, - &trueObjptrPredicate, - NULL, - &forwardHHObjptr, - &forwardHHObjptrArgs); + s, + toSpaceList->firstChunk, + HM_getChunkStart(toSpaceList->firstChunk), + // &skipStackAndThreadObjptrPredicate, + // &ssatoPredicateArgs, + &trueObjptrPredicate, + NULL, + &forwardHHObjptr, + &forwardHHObjptrArgs); } } - /* after everything has been scavenged, we have to move the pinned chunks */ - depth = thread->currentDepth+1; - while (depth > forwardHHObjptrArgs.minDepth) { + /* after everything has been scavenged, we have to move the pinned chunks and unmark + * objects moved by mark and scan */ + depth = thread->currentDepth + 1; + while (depth > forwardHHObjptrArgs.minDepth) + { depth--; HM_HierarchicalHeap toSpaceLevel = toSpace[depth]; - if (NULL == toSpaceLevel) { + if (NULL == toSpaceLevel) + { /* check that there are also no pinned chunks at this level * (if there was pinned chunk, then we would have also created a * toSpace HH at this depth, because we would have scavenged the @@ -572,7 +592,13 @@ void HM_HHC_collectLocal(uint32_t desiredScope) { // SAM_NOTE: safe to check here, because pinned chunks are separate. traverseEachObjInChunkList(s, HM_HH_getChunkList(toSpaceLevel)); #endif + // struct HM_foreachObjClosure unmarkClosure = + struct HM_foreachDownptrClosure closure = + {.fun = unmarkWrapper, .env = (void *)&forwardHHObjptrArgs}; + HM_foreachRemembered(s, HM_HH_getRemSet(toSpaceLevel), &closure); + // {.fun = unmark, .env = (void *)&forwardHHObjptrArgs}; + // HM_foreachObjInChunkList(s, &(pinned[depth]), &unmarkClosure); /* unset the flags on pinned chunks and update their HH pointer */ for (HM_chunk chunkCursor = pinned[depth].firstChunk; chunkCursor != NULL; @@ -588,35 +614,37 @@ void HM_HHC_collectLocal(uint32_t desiredScope) { } LOG(LM_HH_COLLECTION, LL_DEBUG, - "Copied %"PRIu64" objects in copy-collection", + "Copied %" PRIu64 " objects in copy-collection", forwardHHObjptrArgs.objectsCopied - oldObjectCopied); LOG(LM_HH_COLLECTION, LL_DEBUG, - "Copied %"PRIu64" stacks in copy-collection", + "Copied %" PRIu64 " stacks in copy-collection", forwardHHObjptrArgs.stacksCopied); Trace3(EVENT_COPY, - forwardHHObjptrArgs.bytesCopied, - forwardHHObjptrArgs.objectsCopied, - forwardHHObjptrArgs.stacksCopied); + forwardHHObjptrArgs.bytesCopied, + forwardHHObjptrArgs.objectsCopied, + forwardHHObjptrArgs.stacksCopied); /* ===================================================================== */ struct LGC_chunkInfo info = - {.depth = 0, - .procNum = s->procNumber, - .collectionNumber = s->cumulativeStatistics->numHHLocalGCs, - .freedType = LGC_FREED_NORMAL_CHUNK}; + {.depth = 0, + .procNum = s->procNumber, + .collectionNumber = s->cumulativeStatistics->numHHLocalGCs, + .freedType = LGC_FREED_NORMAL_CHUNK}; struct writeFreedBlockInfoFnClosure infoc = - {.fun = LGC_writeFreeChunkInfo, .env = &info}; + {.fun = LGC_writeFreeChunkInfo, .env = &info}; for (HM_HierarchicalHeap cursor = hh; NULL != cursor && HM_HH_getDepth(cursor) >= minDepth; - cursor = cursor->nextAncestor) { + cursor = cursor->nextAncestor) + { HM_chunkList suspects = HM_HH_getSuspects(cursor); - if (suspects->size != 0) { + if (suspects->size != 0) + { uint32_t depth = HM_HH_getDepth(cursor); forwardHHObjptrArgs.toDepth = depth; struct GC_foreachObjptrClosure fObjptrClosure = - {.fun = copySuspect, .env = &forwardHHObjptrArgs}; + {.fun = copySuspect, .env = &forwardHHObjptrArgs}; ES_foreachSuspect(s, suspects, &fObjptrClosure); info.depth = depth; info.freedType = LGC_FREED_SUSPECT_CHUNK; @@ -635,7 +663,8 @@ void HM_HHC_collectLocal(uint32_t desiredScope) { HM_chunkList level = HM_HH_getChunkList(hhTail); HM_chunkList remset = HM_HH_getRemSet(hhTail); - if (NULL != remset) { + if (NULL != remset) + { #if ASSERT /* clear out memory to quickly catch some memory safety errors */ // HM_chunk chunkCursor = remset->firstChunk; @@ -653,7 +682,8 @@ void HM_HHC_collectLocal(uint32_t desiredScope) { #if ASSERT HM_chunk chunkCursor = level->firstChunk; - while (chunkCursor != NULL) { + while (chunkCursor != NULL) + { assert(!chunkCursor->pinnedDuringCollection); chunkCursor = chunkCursor->nextChunk; } @@ -681,14 +711,16 @@ void HM_HHC_collectLocal(uint32_t desiredScope) { } /* merge in toSpace */ - if (NULL == hhTail && NULL == hhToSpace) { + if (NULL == hhTail && NULL == hhToSpace) + { /** SAM_NOTE: If we collected everything, I suppose this is possible. * But shouldn't the stack and thread at least be in the root-to-leaf * path? Should look into this... */ hh = HM_HH_new(s, thread->currentDepth); } - else { + else + { hh = HM_HH_zip(s, hhTail, hhToSpace); } @@ -706,15 +738,18 @@ void HM_HHC_collectLocal(uint32_t desiredScope) { NULL != cursor; cursor = cursor->nextAncestor) { - if (HM_getChunkListLastChunk(HM_HH_getChunkList(cursor)) != NULL) { + if (HM_getChunkListLastChunk(HM_HH_getChunkList(cursor)) != NULL) + { lastChunk = HM_getChunkListLastChunk(HM_HH_getChunkList(cursor)); break; } } thread->currentChunk = lastChunk; - if (lastChunk != NULL && !lastChunk->mightContainMultipleObjects) { - if (!HM_HH_extend(s, thread, GC_HEAP_LIMIT_SLOP)) { + if (lastChunk != NULL && !lastChunk->mightContainMultipleObjects) + { + if (!HM_HH_extend(s, thread, GC_HEAP_LIMIT_SLOP)) + { DIE("Ran out of space for hierarchical heap!\n"); } } @@ -728,7 +763,6 @@ void HM_HHC_collectLocal(uint32_t desiredScope) { * assert(lastChunk->frontier < (pointer)lastChunk + HM_BLOCK_SIZE); */ - #if 0 /** Finally, unfreeze chunks if we need to. */ if (s->controls->manageEntanglement) { @@ -759,7 +793,7 @@ void HM_HHC_collectLocal(uint32_t desiredScope) { cursor = cursor->nextAncestor) { struct HM_foreachDownptrClosure closure = - {.fun = checkRememberedEntry, .env = (void*)cursor}; + {.fun = checkRememberedEntry, .env = (void *)cursor}; HM_foreachRemembered(s, HM_HH_getRemSet(cursor), &closure); } #endif @@ -773,7 +807,7 @@ void HM_HHC_collectLocal(uint32_t desiredScope) { * TODO: IS THIS A PROBLEM? */ thread->bytesSurvivedLastCollection = - forwardHHObjptrArgs.bytesMoved + forwardHHObjptrArgs.bytesCopied; + forwardHHObjptrArgs.bytesMoved + forwardHHObjptrArgs.bytesCopied; thread->bytesAllocatedSinceLastCollection = 0; @@ -803,10 +837,13 @@ void HM_HHC_collectLocal(uint32_t desiredScope) { size_t sizeBefore = sizesBefore[i]; const char *sign; size_t diff; - if (sizeBefore > sizeAfter) { + if (sizeBefore > sizeAfter) + { sign = "-"; diff = sizeBefore - sizeAfter; - } else { + } + else + { sign = "+"; diff = sizeAfter - sizeBefore; } @@ -821,11 +858,14 @@ void HM_HHC_collectLocal(uint32_t desiredScope) { } } - if (totalSizeAfter > totalSizeBefore) { + if (totalSizeAfter > totalSizeBefore) + { // whoops? - } else { + } + else + { s->cumulativeStatistics->bytesReclaimedByLocal += - (totalSizeBefore - totalSizeAfter); + (totalSizeBefore - totalSizeAfter); } /* enter statistics if necessary */ @@ -843,8 +883,10 @@ void HM_HHC_collectLocal(uint32_t desiredScope) { // (int)thread->minLocalCollectionDepth); // } - if (needGCTime(s)) { - if (detailedGCTime(s)) { + if (needGCTime(s)) + { + if (detailedGCTime(s)) + { stopTiming(RUSAGE_THREAD, &ru_start, &s->cumulativeStatistics->ru_gcHHLocal); } /* @@ -880,10 +922,10 @@ bool isObjptrInToSpace(objptr op, struct ForwardHHObjptrArgs *args) /* ========================================================================= */ objptr relocateObject( - GC_state s, - objptr op, - HM_HierarchicalHeap tgtHeap, - struct ForwardHHObjptrArgs *args) + GC_state s, + objptr op, + HM_HierarchicalHeap tgtHeap, + struct ForwardHHObjptrArgs *args) { pointer p = objptrToPointer(op, NULL); @@ -903,7 +945,8 @@ objptr relocateObject( ©Bytes, &metaDataBytes); - if (!HM_getChunkOf(p)->mightContainMultipleObjects) { + if (!HM_getChunkOf(p)->mightContainMultipleObjects) + { /* This chunk contains *only* this object, so no need to copy. Instead, * just move the chunk. Don't forget to update the levelHead, too! */ HM_chunk chunk = HM_getChunkOf(p); @@ -912,9 +955,9 @@ objptr relocateObject( chunk->levelHead = HM_HH_getUFNode(tgtHeap); LOG(LM_HH_COLLECTION, LL_DEBUGMORE, - "Moved single-object chunk %p of size %zu", - (void*)chunk, - HM_getChunkSize(chunk)); + "Moved single-object chunk %p of size %zu", + (void *)chunk, + HM_getChunkSize(chunk)); args->bytesMoved += copyBytes; args->objectsMoved++; return op; @@ -926,9 +969,9 @@ objptr relocateObject( tgtHeap); /* Store the forwarding pointer in the old object metadata. */ - *(getFwdPtrp(p)) = pointerToObjptr (copyPointer + metaDataBytes, - NULL); - assert (hasFwdPtr(p)); + *(getFwdPtrp(p)) = pointerToObjptr(copyPointer + metaDataBytes, + NULL); + assert(hasFwdPtr(p)); args->bytesCopied += copyBytes; args->objectsCopied++; @@ -1045,16 +1088,19 @@ void unfreezeDisentangledDepthAfter( #endif /* ========================================================================= */ -void copySuspect(GC_state s, objptr *opp, void *rawArghh) { +void copySuspect(GC_state s, objptr *opp, void *rawArghh) +{ struct ForwardHHObjptrArgs *args = (struct ForwardHHObjptrArgs *)rawArghh; objptr op = *opp; assert(isObjptr(op)); pointer p = objptrToPointer(op, NULL); objptr new_ptr = op; - if (hasFwdPtr(p)) { + if (hasFwdPtr(p)) + { new_ptr = getFwdPtr(p); } - else if (!isPinned(op)) { + else if (!isPinned(op)) + { /* the suspect does not have a fwd-ptr and is not pinned * ==> its garbage, so skip it */ @@ -1068,8 +1114,68 @@ void copySuspect(GC_state s, objptr *opp, void *rawArghh) { HM_storeInchunkList(HM_HH_getSuspects(args->toSpace[opDepth]), &new_ptr, sizeof(objptr)); } -void tryUnpinOrKeepPinned(GC_state s, HM_remembered remElem, void* rawArgs) { - struct ForwardHHObjptrArgs* args = (struct ForwardHHObjptrArgs*)rawArgs; +void LGC_markAndScan(GC_state s, objptr *opp, void *rawArgs) +{ + struct ForwardHHObjptrArgs *args = (struct ForwardHHObjptrArgs *)rawArgs; + pointer p = objptrToPointer(*opp, NULL); + HM_chunk chunk = HM_getChunkOf(p); + uint32_t opDepth = HM_HH_getDepth(HM_getLevelHead(chunk)); + if ((opDepth > args->maxDepth) || (opDepth < args->minDepth)) + { + // DOUBLE CHECK + return; + } + else if (!CC_isPointerMarked(p)) + { + markObj(p); + if (!chunk->pinnedDuringCollection) + { + chunk->pinnedDuringCollection = TRUE; + HM_unlinkChunkPreserveLevelHead( + HM_HH_getChunkList(args->fromSpace[opDepth]), + chunk); + HM_appendChunk(&(args->pinned[opDepth]), chunk); + } + struct GC_foreachObjptrClosure msClosure = + {.fun = LGC_markAndScan, .env = rawArgs}; + foreachObjptrInObject(s, p, &trueObjptrPredicateClosure, &msClosure, FALSE); + } + else + { + assert(chunk->pinnedDuringCollection); + } +} + +void unmark( + __attribute__((unused)) GC_state s, + objptr *opp, + void *rawArgs) +{ + struct ForwardHHObjptrArgs *args = (struct ForwardHHObjptrArgs *)rawArgs; + pointer p = objptrToPointer(*opp, NULL); + HM_chunk chunk = HM_getChunkOf(p); + uint32_t opDepth = HM_HH_getDepth(HM_getLevelHead(chunk)); + assert(!hasFwdPtr(p)); + if ((opDepth > args->maxDepth) || (opDepth < args->minDepth)) + { + return; + } + if (CC_isPointerMarked(p)) + { + markObj(p); + struct GC_foreachObjptrClosure unmarkClosure = {.fun = unmark, .env = rawArgs}; + foreachObjptrInObject(s, p, &trueObjptrPredicateClosure, &unmarkClosure, FALSE); + } +} + +void unmarkWrapper(GC_state s, HM_remembered remElem, void *args) +{ + unmark(s, &(remElem->object), args); +} + +void tryUnpinOrKeepPinned(GC_state s, HM_remembered remElem, void *rawArgs) +{ + struct ForwardHHObjptrArgs *args = (struct ForwardHHObjptrArgs *)rawArgs; objptr op = remElem->object; #if ASSERT @@ -1078,13 +1184,14 @@ void tryUnpinOrKeepPinned(GC_state s, HM_remembered remElem, void* rawArgs) { assert(HM_HH_getDepth(fromHH) <= args->toDepth); #endif - if (!isPinned(op)) { + if (!isPinned(op)) + { // If previously unpinned, then no need to remember this object. assert(HM_getLevelHead(fromChunk) == args->fromSpace[args->toDepth]); LOG(LM_HH_PROMOTION, LL_INFO, - "forgetting remset entry from "FMTOBJPTR" to "FMTOBJPTR, - remElem->from, op); + "forgetting remset entry from " FMTOBJPTR " to " FMTOBJPTR, + remElem->from, op); return; } @@ -1095,13 +1202,14 @@ void tryUnpinOrKeepPinned(GC_state s, HM_remembered remElem, void* rawArgs) { * (getLevelHead, etc.), but this should be faster. The toDepth field * is set by the loop that calls this function */ uint32_t opDepth = args->toDepth; - HM_chunk chunk = HM_getChunkOf(objptrToPointer(op, NULL)); - if (NULL == args->toSpace[opDepth]) { + if (NULL == args->toSpace[opDepth]) + { args->toSpace[opDepth] = HM_HH_new(s, opDepth); } #if ASSERT + HM_chunk chunk = HM_getChunkOf(objptrToPointer(op, NULL)); assert(opDepth <= args->maxDepth); HM_HierarchicalHeap hh = HM_getLevelHead(chunk); assert(args->fromSpace[opDepth] == hh); @@ -1135,18 +1243,20 @@ void tryUnpinOrKeepPinned(GC_state s, HM_remembered remElem, void* rawArgs) { assert(fromDepth <= opDepth); - if (opDepth <= unpinDepth) { + if (opDepth <= unpinDepth) + { unpinObject(op); assert(fromDepth == opDepth); LOG(LM_HH_PROMOTION, LL_INFO, - "forgetting remset entry from "FMTOBJPTR" to "FMTOBJPTR, - remElem->from, op); + "forgetting remset entry from " FMTOBJPTR " to " FMTOBJPTR, + remElem->from, op); return; } - if (fromDepth > unpinDepth) { + if (fromDepth > unpinDepth) + { /** If this particular remembered entry came from deeper than some other * down-pointer, then we don't need to keep it around. There will be some * other remembered entry coming from the unpinDepth level. @@ -1157,8 +1267,8 @@ void tryUnpinOrKeepPinned(GC_state s, HM_remembered remElem, void* rawArgs) { */ LOG(LM_HH_PROMOTION, LL_INFO, - "forgetting remset entry from "FMTOBJPTR" to "FMTOBJPTR, - remElem->from, op); + "forgetting remset entry from " FMTOBJPTR " to " FMTOBJPTR, + remElem->from, op); return; } @@ -1169,74 +1279,101 @@ void tryUnpinOrKeepPinned(GC_state s, HM_remembered remElem, void* rawArgs) { * entry into the toSpace. */ HM_remember(HM_HH_getRemSet(args->toSpace[opDepth]), remElem); - - if (chunk->pinnedDuringCollection) { - return; + if ((fromDepth <= args->maxDepth) && (fromDepth >= args->minDepth)) { + HM_chunk chunk = HM_getChunkOf(objptrToPointer(op, NULL)); + uint32_t opDepth = HM_HH_getDepth(HM_getLevelHead(chunk)); + /* if this is a down-ptr completely inside the scope, + * no need to in-place things reachable from it + */ + printf("this is happening\n"); + if (!chunk->pinnedDuringCollection) + { + chunk->pinnedDuringCollection = TRUE; + HM_unlinkChunkPreserveLevelHead( + HM_HH_getChunkList(args->fromSpace[opDepth]), + chunk); + HM_appendChunk(&(args->pinned[opDepth]), chunk); + } + } + else { + // printf("fromDepth =%d, max = %d, min = %d", fromDepth, args->maxDepth, args->minDepth); + LGC_markAndScan(s, &op, rawArgs); } - chunk->pinnedDuringCollection = TRUE; - assert(hhContainsChunk(args->fromSpace[opDepth], chunk)); - assert(HM_getLevelHead(chunk) == args->fromSpace[opDepth]); + // LGC_markAndScan(s, &(remElem->from), rawArgs); - HM_unlinkChunkPreserveLevelHead( - HM_HH_getChunkList(args->fromSpace[opDepth]), - chunk); - HM_appendChunk(&(args->pinned[opDepth]), chunk); + // if (chunk->pinnedDuringCollection) { + // return; + // } - assert(HM_getLevelHead(chunk) == args->fromSpace[opDepth]); + // chunk->pinnedDuringCollection = TRUE; + // assert(hhContainsChunk(args->fromSpace[opDepth], chunk)); + // assert(HM_getLevelHead(chunk) == args->fromSpace[opDepth]); + + // HM_unlinkChunkPreserveLevelHead( + // HM_HH_getChunkList(args->fromSpace[opDepth]), + // chunk); + // HM_appendChunk(&(args->pinned[opDepth]), chunk); + + // assert(HM_getLevelHead(chunk) == args->fromSpace[opDepth]); } /* ========================================================================= */ -void forwardObjptrsOfRemembered(GC_state s, HM_remembered remElem, void* rawArgs) { +// JATIN_TODO: CHANGE NAME TO FORWARD FROM +/// COULD BE HEKLPFUL FOR DEGBUGGING TO FORWARD THE OBJECTS ANYWAY +void forwardObjptrsOfRemembered(GC_state s, HM_remembered remElem, void *rawArgs) +{ objptr op = remElem->object; assert(isPinned(op)); struct GC_foreachObjptrClosure closure = - {.fun = forwardHHObjptr, .env = rawArgs}; + {.fun = forwardHHObjptr, .env = rawArgs}; foreachObjptrInObject( - s, - objptrToPointer(op, NULL), - &trueObjptrPredicateClosure, - &closure, - FALSE - ); + s, + objptrToPointer(op, NULL), + &trueObjptrPredicateClosure, + &closure, + FALSE); forwardHHObjptr(s, &(remElem->from), rawArgs); } /* ========================================================================= */ -void forwardHHObjptr (GC_state s, - objptr* opp, - void* rawArgs) { - struct ForwardHHObjptrArgs* args = ((struct ForwardHHObjptrArgs*)(rawArgs)); +void forwardHHObjptr(GC_state s, + objptr *opp, + void *rawArgs) +{ + struct ForwardHHObjptrArgs *args = ((struct ForwardHHObjptrArgs *)(rawArgs)); objptr op = *opp; - pointer p = objptrToPointer (op, NULL); + pointer p = objptrToPointer(op, NULL); assert(args->toDepth == HM_HH_INVALID_DEPTH); - if (DEBUG_DETAILED) { - fprintf (stderr, - "forwardHHObjptr opp = "FMTPTR" op = "FMTOBJPTR" p = " - ""FMTPTR"\n", - (uintptr_t)opp, - op, - (uintptr_t)p); + if (DEBUG_DETAILED) + { + fprintf(stderr, + "forwardHHObjptr opp = " FMTPTR " op = " FMTOBJPTR " p = " + "" FMTPTR "\n", + (uintptr_t)opp, + op, + (uintptr_t)p); } LOG(LM_HH_COLLECTION, LL_DEBUGMORE, - "opp = "FMTPTR" op = "FMTOBJPTR" p = "FMTPTR, + "opp = " FMTPTR " op = " FMTOBJPTR " p = " FMTPTR, (uintptr_t)opp, op, (uintptr_t)p); - if (!isObjptr(op) || isObjptrInRootHeap(s, op)) { + if (!isObjptr(op) || isObjptrInRootHeap(s, op)) + { /* does not point to an HH objptr, so not in scope for collection */ LOG(LM_HH_COLLECTION, LL_DEBUGMORE, - "skipping opp = "FMTPTR" op = "FMTOBJPTR" p = "FMTPTR": not in HH.", + "skipping opp = " FMTPTR " op = " FMTOBJPTR " p = " FMTPTR ": not in HH.", (uintptr_t)opp, op, (uintptr_t)p); @@ -1245,7 +1382,8 @@ void forwardHHObjptr (GC_state s, uint32_t opDepth = HM_getObjptrDepthPathCompress(op); - if (opDepth > args->maxDepth) { + if (opDepth > args->maxDepth) + { DIE("entanglement detected during collection: %p is at depth %u, below %u", (void *)p, opDepth, @@ -1255,59 +1393,69 @@ void forwardHHObjptr (GC_state s, /* RAM_NOTE: This is more nuanced with non-local collection */ if ((opDepth > args->maxDepth) || /* cannot forward any object below 'args->minDepth' */ - (opDepth < args->minDepth)) { - LOG(LM_HH_COLLECTION, LL_DEBUGMORE, - "skipping opp = "FMTPTR" op = "FMTOBJPTR" p = "FMTPTR - ": depth %d not in [minDepth %d, maxDepth %d].", - (uintptr_t)opp, - op, - (uintptr_t)p, - opDepth, - args->minDepth, - args->maxDepth); - return; + (opDepth < args->minDepth)) + { + LOG(LM_HH_COLLECTION, LL_DEBUGMORE, + "skipping opp = " FMTPTR " op = " FMTOBJPTR " p = " FMTPTR + ": depth %d not in [minDepth %d, maxDepth %d].", + (uintptr_t)opp, + op, + (uintptr_t)p, + opDepth, + args->minDepth, + args->maxDepth); + return; } assert(HM_getObjptrDepth(op) >= args->minDepth); - if (isObjptrInToSpace(op, args)) { + if (isObjptrInToSpace(op, args)) + { assert(!hasFwdPtr(objptrToPointer(op, NULL))); assert(!isPinned(op)); + assert(!CC_isPointerMarked(objptrToPointer(op, NULL))); return; } /* Assert is in from space. This holds for pinned objects, too, because * their levelHead is still set to the fromSpace HH. (Pinned objects are * stored in a different chunklist during collection through.) */ - assert( HM_getLevelHead(HM_getChunkOf(objptrToPointer(op, NULL))) - == - args->fromSpace[HM_getObjptrDepth(op)] ); - - if (hasFwdPtr(p)) { + assert(HM_getLevelHead(HM_getChunkOf(objptrToPointer(op, NULL))) == + args->fromSpace[HM_getObjptrDepth(op)]); + if (hasFwdPtr(p)) + { objptr fop = getFwdPtr(p); assert(!hasFwdPtr(objptrToPointer(fop, NULL))); assert(isObjptrInToSpace(fop, args)); assert(HM_getObjptrDepth(fop) == opDepth); assert(!isPinned(fop)); + // assert(!CC_isPointerMarked(fop)); *opp = fop; return; } assert(!hasFwdPtr(p)); + if (CC_isPointerMarked(p)) + { + // this object is collected in-place. + return; + } + /** REALLY SUBTLE. CC clears out remset entries, but can't safely perform * unpinning. So, there could be objects that (for the purposes of LC) are * semantically unpinned, but just haven't been marked as such yet. Here, * we are lazily checking to see if this object should have been unpinned. */ - if (isPinned(op) && unpinDepthOf(op) < opDepth) { + if (isPinned(op) && unpinDepthOf(op) < opDepth) + { // This is a truly pinned object - assert(listContainsChunk( &(args->pinned[opDepth]), - HM_getChunkOf(objptrToPointer(op, NULL)) - )); + assert(listContainsChunk(&(args->pinned[opDepth]), + HM_getChunkOf(objptrToPointer(op, NULL)))); return; } - else { + else + { // This object should have been previously unpinned unpinObject(op); } @@ -1337,21 +1485,23 @@ void forwardHHObjptr (GC_state s, ©Bytes, &metaDataBytes); - switch (tag) { + switch (tag) + { case STACK_TAG: - args->stacksCopied++; - break; + args->stacksCopied++; + break; case WEAK_TAG: - die(__FILE__ ":%d: " - "forwardHHObjptr() does not support WEAK_TAG objects!", - __LINE__); - break; + die(__FILE__ ":%d: " + "forwardHHObjptr() does not support WEAK_TAG objects!", + __LINE__); + break; default: - break; + break; } HM_HierarchicalHeap tgtHeap = args->toSpace[opDepth]; - if (tgtHeap == NULL) { + if (tgtHeap == NULL) + { /* Level does not exist, so create it */ tgtHeap = HM_HH_new(s, opDepth); args->toSpace[opDepth] = tgtHeap; @@ -1363,7 +1513,7 @@ void forwardHHObjptr (GC_state s, } LOG(LM_HH_COLLECTION, LL_DEBUGMORE, - "opp "FMTPTR" set to "FMTOBJPTR, + "opp " FMTPTR " set to " FMTOBJPTR, ((uintptr_t)(opp)), *opp); } @@ -1371,10 +1521,11 @@ void forwardHHObjptr (GC_state s, pointer copyObject(pointer p, size_t objectSize, size_t copySize, - HM_HierarchicalHeap tgtHeap) { + HM_HierarchicalHeap tgtHeap) +{ -// check if you can add to existing chunk --> mightContain + size -// If not, allocate new chunk and copy. + // check if you can add to existing chunk --> mightContain + size + // If not, allocate new chunk and copy. assert(HM_HH_isLevelHead(tgtHeap)); assert(copySize <= objectSize); @@ -1386,24 +1537,27 @@ pointer copyObject(pointer p, bool mustExtend = false; HM_chunk chunk = HM_getChunkListLastChunk(tgtChunkList); - if(chunk == NULL || !chunk->mightContainMultipleObjects){ + if (chunk == NULL || !chunk->mightContainMultipleObjects) + { mustExtend = true; } - else { + else + { pointer frontier = HM_getChunkFrontier(chunk); pointer limit = HM_getChunkLimit(chunk); assert(frontier <= limit); mustExtend = ((size_t)(limit - frontier) < objectSize) || - (frontier + GC_SEQUENCE_METADATA_SIZE - >= (pointer)chunk + HM_BLOCK_SIZE); + (frontier + GC_SEQUENCE_METADATA_SIZE >= (pointer)chunk + HM_BLOCK_SIZE); } - if (mustExtend) { + if (mustExtend) + { /* Need to allocate a new chunk. Safe to use the dechecker state of where * the object came from, as all objects in the same heap can be safely * reassigned to any dechecker state of that heap. */ chunk = HM_allocateChunk(tgtChunkList, objectSize); - if (NULL == chunk) { + if (NULL == chunk) + { DIE("Ran out of space for Hierarchical Heap!"); } chunk->decheckState = HM_getChunkOf(p)->decheckState; @@ -1431,41 +1585,48 @@ pointer copyObject(pointer p, GC_objectTypeTag computeObjectCopyParameters(GC_state s, pointer p, size_t *objectSize, size_t *copySize, - size_t *metaDataSize) { - GC_header header; - GC_objectTypeTag tag; - uint16_t bytesNonObjptrs; - uint16_t numObjptrs; - header = getHeader(p); - splitHeader(s, header, &tag, NULL, &bytesNonObjptrs, &numObjptrs); - - /* Compute the space taken by the metadata and object body. */ - if ((NORMAL_TAG == tag) or (WEAK_TAG == tag)) { /* Fixed size object. */ - if (WEAK_TAG == tag) { - die(__FILE__ ":%d: " - "computeObjectSizeAndCopySize() #define does not support" - " WEAK_TAG objects!", - __LINE__); - } - *metaDataSize = GC_NORMAL_METADATA_SIZE; - *objectSize = bytesNonObjptrs + (numObjptrs * OBJPTR_SIZE); - *copySize = *objectSize; - } else if (SEQUENCE_TAG == tag) { - *metaDataSize = GC_SEQUENCE_METADATA_SIZE; - *objectSize = sizeofSequenceNoMetaData (s, getSequenceLength (p), - bytesNonObjptrs, numObjptrs); - *copySize = *objectSize; - } else { - /* Stack. */ - // bool current; - // size_t reservedNew; - GC_stack stack; - - assert (STACK_TAG == tag); - *metaDataSize = GC_STACK_METADATA_SIZE; - stack = (GC_stack)p; - - /* SAM_NOTE: + size_t *metaDataSize) +{ + GC_header header; + GC_objectTypeTag tag; + uint16_t bytesNonObjptrs; + uint16_t numObjptrs; + header = getHeader(p); + splitHeader(s, header, &tag, NULL, &bytesNonObjptrs, &numObjptrs); + + /* Compute the space taken by the metadata and object body. */ + if ((NORMAL_TAG == tag) or (WEAK_TAG == tag)) + { /* Fixed size object. */ + if (WEAK_TAG == tag) + { + die(__FILE__ ":%d: " + "computeObjectSizeAndCopySize() #define does not support" + " WEAK_TAG objects!", + __LINE__); + } + *metaDataSize = GC_NORMAL_METADATA_SIZE; + *objectSize = bytesNonObjptrs + (numObjptrs * OBJPTR_SIZE); + *copySize = *objectSize; + } + else if (SEQUENCE_TAG == tag) + { + *metaDataSize = GC_SEQUENCE_METADATA_SIZE; + *objectSize = sizeofSequenceNoMetaData(s, getSequenceLength(p), + bytesNonObjptrs, numObjptrs); + *copySize = *objectSize; + } + else + { + /* Stack. */ + // bool current; + // size_t reservedNew; + GC_stack stack; + + assert(STACK_TAG == tag); + *metaDataSize = GC_STACK_METADATA_SIZE; + stack = (GC_stack)p; + + /* SAM_NOTE: * I am disabling shrinking here because it assumes that * the stack is going to be copied, which doesn't work with the * "stacks-in-their-own-chunks" strategy. @@ -1486,34 +1647,37 @@ GC_objectTypeTag computeObjectCopyParameters(GC_state s, pointer p, stack->reserved = reservedNew; } #endif - *objectSize = sizeof (struct GC_stack) + stack->reserved; - *copySize = sizeof (struct GC_stack) + stack->used; - } + *objectSize = sizeof(struct GC_stack) + stack->reserved; + *copySize = sizeof(struct GC_stack) + stack->used; + } - *objectSize += *metaDataSize; - *copySize += *metaDataSize; + *objectSize += *metaDataSize; + *copySize += *metaDataSize; - return tag; + return tag; } - bool skipStackAndThreadObjptrPredicate(GC_state s, pointer p, - void* rawArgs) { + void *rawArgs) +{ /* silence compliler */ ((void)(s)); /* extract expected stack */ - LOCAL_USED_FOR_ASSERT const struct SSATOPredicateArgs* args = - ((struct SSATOPredicateArgs*)(rawArgs)); + LOCAL_USED_FOR_ASSERT const struct SSATOPredicateArgs *args = + ((struct SSATOPredicateArgs *)(rawArgs)); /* run through FALSE cases */ GC_header header; header = getHeader(p); - if (header == GC_STACK_HEADER) { + if (header == GC_STACK_HEADER) + { assert(args->expectedStackPointer == p); return FALSE; - } else if (header == GC_THREAD_HEADER) { + } + else if (header == GC_THREAD_HEADER) + { assert(args->expectedThreadPointer == p); return FALSE; } @@ -1524,9 +1688,9 @@ bool skipStackAndThreadObjptrPredicate(GC_state s, #if ASSERT void checkRememberedEntry( - __attribute__((unused)) GC_state s, - HM_remembered remElem, - void* args) + __attribute__((unused)) GC_state s, + HM_remembered remElem, + void *args) { objptr object = remElem->object; From 6442206faacf4372f2be52ed81ae961edc36fafd Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Thu, 17 Mar 2022 10:43:48 -0400 Subject: [PATCH 002/114] preserve hh records through lgc --- runtime/gc/hierarchical-heap-collection.c | 91 +++++++++++++---------- 1 file changed, 53 insertions(+), 38 deletions(-) diff --git a/runtime/gc/hierarchical-heap-collection.c b/runtime/gc/hierarchical-heap-collection.c index 0c293dffd..4a242c05d 100644 --- a/runtime/gc/hierarchical-heap-collection.c +++ b/runtime/gc/hierarchical-heap-collection.c @@ -550,39 +550,6 @@ void HM_HHC_collectLocal(uint32_t desiredScope) { } } - /* after everything has been scavenged, we have to move the pinned chunks */ - depth = thread->currentDepth+1; - while (depth > forwardHHObjptrArgs.minDepth) { - depth--; - HM_HierarchicalHeap toSpaceLevel = toSpace[depth]; - if (NULL == toSpaceLevel) { - /* check that there are also no pinned chunks at this level - * (if there was pinned chunk, then we would have also created a - * toSpace HH at this depth, because we would have scavenged the - * remembered entry) */ - assert(pinned[depth].firstChunk == NULL); - continue; - } - -#if ASSERT - // SAM_NOTE: safe to check here, because pinned chunks are separate. - traverseEachObjInChunkList(s, HM_HH_getChunkList(toSpaceLevel)); -#endif - - /* unset the flags on pinned chunks and update their HH pointer */ - for (HM_chunk chunkCursor = pinned[depth].firstChunk; - chunkCursor != NULL; - chunkCursor = chunkCursor->nextChunk) - { - assert(chunkCursor->pinnedDuringCollection); - chunkCursor->pinnedDuringCollection = FALSE; - chunkCursor->levelHead = HM_HH_getUFNode(toSpaceLevel); - } - - /* put the pinned chunks into the toSpace */ - HM_appendChunkList(HM_HH_getChunkList(toSpaceLevel), &(pinned[depth])); - } - LOG(LM_HH_COLLECTION, LL_DEBUG, "Copied %"PRIu64" objects in copy-collection", forwardHHObjptrArgs.objectsCopied - oldObjectCopied); @@ -642,13 +609,43 @@ void HM_HHC_collectLocal(uint32_t desiredScope) { info.depth = HM_HH_getDepth(hhTail); info.freedType = LGC_FREED_NORMAL_CHUNK; HM_freeChunksInListWithInfo(s, level, &infoc); - HM_HH_freeAllDependants(s, hhTail, FALSE); - freeFixedSize(getUFAllocator(s), HM_HH_getUFNode(hhTail)); - freeFixedSize(getHHAllocator(s), hhTail); + HM_HH_freeAllDependants(s, hhTail, TRUE); + // freeFixedSize(getUFAllocator(s), HM_HH_getUFNode(hhTail)); + // freeFixedSize(getHHAllocator(s), hhTail); hhTail = nextAncestor; } + + /* after everything has been scavenged, we have to move the pinned chunks */ + depth = thread->currentDepth+1; + while (depth > forwardHHObjptrArgs.minDepth) { + depth--; + HM_HierarchicalHeap fromSpaceLevel = fromSpace[depth]; + if (NULL == fromSpaceLevel) { + /* check that there are also no pinned chunks at this level + * (if there was pinned chunk, then there must also have been a + * fromSpace HH at this depth which originally stored the chunk) + */ + assert(pinned[depth].firstChunk == NULL); + continue; + } + + /* unset the flags on pinned chunks and update their HH pointer */ + for (HM_chunk chunkCursor = pinned[depth].firstChunk; + chunkCursor != NULL; + chunkCursor = chunkCursor->nextChunk) + { + assert(chunkCursor->levelHead == HM_HH_getUFNode(fromSpaceLevel)); + assert(chunkCursor->pinnedDuringCollection); + chunkCursor->pinnedDuringCollection = FALSE; + } + + /* put the pinned chunks into the toSpace */ + HM_appendChunkList(HM_HH_getChunkList(fromSpaceLevel), &(pinned[depth])); + } + + /* Build the toSpace hh */ HM_HierarchicalHeap hhToSpace = NULL; for (uint32_t i = 0; i <= maxDepth; i++) @@ -661,7 +658,7 @@ void HM_HHC_collectLocal(uint32_t desiredScope) { } /* merge in toSpace */ - if (NULL == hhTail && NULL == hhToSpace) { + if (NULL == hh && NULL == hhToSpace) { /** SAM_NOTE: If we collected everything, I suppose this is possible. * But shouldn't the stack and thread at least be in the root-to-leaf * path? Should look into this... @@ -669,7 +666,7 @@ void HM_HHC_collectLocal(uint32_t desiredScope) { hh = HM_HH_new(s, thread->currentDepth); } else { - hh = HM_HH_zip(s, hhTail, hhToSpace); + hh = HM_HH_zip(s, hh, hhToSpace); } thread->hierarchicalHeap = hh; @@ -742,6 +739,16 @@ void HM_HHC_collectLocal(uint32_t desiredScope) { {.fun = checkRememberedEntry, .env = (void*)cursor}; HM_foreachRemembered(s, HM_HH_getRemSet(cursor), &closure); } + + // make sure that original representatives haven't been messed up + for (HM_HierarchicalHeap cursor = hh; + NULL != cursor; + cursor = cursor->nextAncestor) + { + if (NULL != fromSpace[HM_HH_getDepth(cursor)]) { + assert(fromSpace[HM_HH_getDepth(cursor)] == cursor); + } + } #endif s->cumulativeStatistics->bytesHHLocaled += forwardHHObjptrArgs.bytesCopied; @@ -1136,12 +1143,20 @@ void tryUnpinOrKeepPinned(GC_state s, HM_remembered remElem, void* rawArgs) { assert(hhContainsChunk(args->fromSpace[opDepth], chunk)); assert(HM_getLevelHead(chunk) == args->fromSpace[opDepth]); + if (chunk->levelHead != HM_HH_getUFNode(args->fromSpace[opDepth])) { + chunk->levelHead = HM_HH_getUFNode(args->fromSpace[opDepth]); + } + HM_unlinkChunkPreserveLevelHead( HM_HH_getChunkList(args->fromSpace[opDepth]), chunk); HM_appendChunk(&(args->pinned[opDepth]), chunk); assert(HM_getLevelHead(chunk) == args->fromSpace[opDepth]); + /** stronger version of previous assertion, needed for safe freeing of + * hh dependants after LGC completes + */ + assert(chunk->levelHead == HM_HH_getUFNode(args->fromSpace[opDepth])); } /* ========================================================================= */ From 3c5e286d60d7c3e22366edfa3f14e94bd0aa8f9c Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Thu, 17 Mar 2022 12:35:54 -0400 Subject: [PATCH 003/114] expand pin types --- runtime/gc/pin.c | 123 +++++++++++++++++++++++++++++++++++++++-------- runtime/gc/pin.h | 44 ++++++++++++----- 2 files changed, 135 insertions(+), 32 deletions(-) diff --git a/runtime/gc/pin.c b/runtime/gc/pin.c index b564d5c0c..0f5b543d9 100644 --- a/runtime/gc/pin.c +++ b/runtime/gc/pin.c @@ -6,51 +6,96 @@ #if (defined (MLTON_GC_INTERNAL_FUNCS)) -bool pinObject(objptr op, uint32_t unpinDepth) +enum PinType pinType(GC_header h) { + if (0 == (h & GC_VALID_HEADER_MASK)) + return PIN_NONE; + + int t = ((h & PIN_MASK) >> PIN_SHIFT); + if (t == 0) + return PIN_NONE; + else if (t == 2) + return PIN_DOWN; + else if (t == 3) + return PIN_ANY; + else + DIE("NOT supposed to reach here!"); +} + +enum PinType maxPT(enum PinType pt1, enum PinType pt2) { + if (pt1 == PIN_NONE) + return pt2; + else if (pt1 == PIN_ANY || pt2 == PIN_ANY) + return PIN_ANY; + else + return PIN_DOWN; +} + +static inline GC_header getRep(enum PinType pt) +{ + GC_header h; + if (pt == PIN_NONE) + h = 0; + else if (pt == PIN_DOWN) + h = 0x20000000; + else + h = 0x30000000; + return h; +} + +bool pinObject(objptr op, uint32_t unpinDepth, enum PinType pt) { + bool a, b; + pinObjectInfo(op, unpinDepth, pt, &a, &b); + return a; +} + +void pinObjectInfo(objptr op, uint32_t unpinDepth, enum PinType pt, + bool* headerChange, bool* pinChange) { pointer p = objptrToPointer(op, NULL); + assert (pt != PIN_NONE); + + /*initialize with false*/ + *headerChange = false; + *pinChange = false; uint32_t maxUnpinDepth = TWOPOWER(UNPIN_DEPTH_BITS) - 1; if (unpinDepth > maxUnpinDepth) { DIE("unpinDepth %"PRIu32" exceeds max possible value %"PRIu32, unpinDepth, maxUnpinDepth); - return FALSE; + return; } assert( ((GC_header)unpinDepth) << UNPIN_DEPTH_SHIFT == (UNPIN_DEPTH_MASK & ((GC_header)unpinDepth) << UNPIN_DEPTH_SHIFT) ); - while (TRUE) { + while (true) { GC_header header = getHeader(p); - bool notPinned = (0 == (header & PIN_MASK) >> PIN_SHIFT); uint32_t previousUnpinDepth = (header & UNPIN_DEPTH_MASK) >> UNPIN_DEPTH_SHIFT; + uint32_t newUnpinDepth = min(previousUnpinDepth, unpinDepth); + enum PinType nt = maxPT(pt, pinType(header)); GC_header newHeader = - (header & (~UNPIN_DEPTH_MASK)) // clear unpin bits - | ((GC_header)unpinDepth << UNPIN_DEPTH_SHIFT) // put in new unpinDepth - | PIN_MASK; // set pin bit - - if (notPinned) { - /* first, handle case where this object was not already pinned */ - if (__sync_bool_compare_and_swap(getHeaderp(p), header, newHeader)) - return TRUE; + (header & (~UNPIN_DEPTH_MASK)) // clear unpin bits + | ((GC_header)newUnpinDepth << UNPIN_DEPTH_SHIFT) // put in new unpinDepth + | getRep(nt); // setup the pin type + + if(newHeader == header) { + return; } else { - /* if the object was previously pinned, we still need to do a writeMin */ - if (previousUnpinDepth <= unpinDepth) - return FALSE; - - if (__sync_bool_compare_and_swap(getHeaderp(p), header, newHeader)) - return TRUE; + if (__sync_bool_compare_and_swap(getHeaderp(p), header, newHeader)) { + *headerChange = true; + *pinChange = (nt != pinType(header)); + return; + } } } - DIE("should be impossible to reach here"); - return FALSE; + return; } void unpinObject(objptr op) { @@ -73,7 +118,7 @@ bool isPinned(objptr op) { * ...and then check the mark */ return (1 == (h & GC_VALID_HEADER_MASK)) && - (1 == ((h & PIN_MASK) >> PIN_SHIFT)); + (((h & PIN_MASK) >> PIN_SHIFT) > 0); } uint32_t unpinDepthOf(objptr op) { @@ -82,4 +127,40 @@ uint32_t unpinDepthOf(objptr op) { return d; } +bool tryUnpinWithDepth(objptr op, uint32_t opDepth) { + pointer p = objptrToPointer(op, NULL); + GC_header header = getHeader(p); + uint32_t d = (header & UNPIN_DEPTH_MASK) >> UNPIN_DEPTH_SHIFT; + + if (d >= opDepth) { + GC_header newHeader = + getHeader(p) + & (~UNPIN_DEPTH_MASK) // clear counter bits + & (~PIN_MASK); // clear mark bit + + return __sync_bool_compare_and_swap(getHeaderp(p), header, newHeader); + } + + return false; + +} + + +// bool tryPinDec(objptr op, uint32_t opDepth) { +// pointer p = objptrToPointer(op, NULL); +// GC_header header = getHeader(p); +// uint32_t d = (header & UNPIN_DEPTH_MASK) >> UNPIN_DEPTH_SHIFT; + +// if (d >= opDepth && pinType(header) == PIN_ANY) { +// GC_header newHeader = +// getHeader(p) +// & (~UNPIN_DEPTH_MASK) // clear counter bits +// & (~PIN_MASK); // clear mark bit + +// return __sync_bool_compare_and_swap(getHeaderp(p), header, newHeader)); +// } + +// return false; +// } + #endif diff --git a/runtime/gc/pin.h b/runtime/gc/pin.h index cc69a86e2..aa2adb07f 100644 --- a/runtime/gc/pin.h +++ b/runtime/gc/pin.h @@ -11,19 +11,25 @@ * * +------+-------------------------+----------+--------------+ * header fields | mark | counter | type-tag | valid-header | - * +------+-----------+-------------+----------+--------------+ - * sub-fields | | sus | pin | unpin-depth | | | - * +------+-----+-----+-------------+----------+--------------+ - * ^ ^ ^ ^ ^ ^ ^ - * offsets 32 31 30 29 20 1 0 + * +------+------------+-------------+----------+--------------+ + * sub-fields | | sus | pin | unpin-depth | | | + * +------+-----+------+-------------+----------+--------------+ + * ^ ^ ^ ^ ^ ^ ^ + * offsets 32 31 30 28 20 1 0 * */ -#define UNPIN_DEPTH_BITS 9 -#define UNPIN_DEPTH_MASK ((GC_header)0x1FF00000) +#define UNPIN_DEPTH_BITS 8 +#define UNPIN_DEPTH_MASK ((GC_header)0xFF00000) #define UNPIN_DEPTH_SHIFT 20 -#define PIN_BITS 1 -#define PIN_MASK ((GC_header)0x20000000) -#define PIN_SHIFT 29 +#define PIN_MASK ((GC_header)0x30000000) +#define PIN_SHIFT 28 + +enum PinType +{ + PIN_NONE, + PIN_DOWN, + PIN_ANY +}; /* Pin this object, making it immovable (by GC) until it reaches * unpinDepth (or shallower). Returns TRUE if the object was @@ -32,11 +38,20 @@ * Note that regardless of whether or not the object was previously * pinned, this does a writeMin on the unpinDepth of the object. */ -bool pinObject(objptr op, uint32_t unpinDepth); +bool pinObject(objptr op, uint32_t unpinDepth, enum PinType pt); + +void pinObjectInfo(objptr op, + uint32_t unpinDepth, + enum PinType pt, + bool* headerChange, + bool* pinChange); /* check if an object is pinned */ bool isPinned(objptr op); +/* */ +enum PinType pinType(GC_header header); + /* Unpin an object by clearing the mark and counter bits in its header. * This is only safe if the object is not being concurrently pinned. * As long as we only call this on objects that are local, it's safe. @@ -46,4 +61,11 @@ void unpinObject(objptr op); /* read the current unpin-depth of an object */ uint32_t unpinDepthOf(objptr op); +/* unpin an object if its depth allows. Because the unpinDepth can change + * concurrently, we want to make sure we use the logic in this function. + * If unpin is successful, then it returns true. Otherwise, false. + */ +bool tryUnpinWithDepth(objptr op, uint32_t opDepth); +// bool tryPinDec(objptr op, uint32_t opDepth); + #endif From aef1e0c68824c1d784566c579e20fca7600d3920 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Thu, 17 Mar 2022 12:36:49 -0400 Subject: [PATCH 004/114] remset public and private --- runtime/gc/concurrent-collection.c | 71 ++++++++------ runtime/gc/hierarchical-heap-collection.c | 110 ++++++++++------------ runtime/gc/remembered-set.c | 62 +++++++++--- runtime/gc/remembered-set.h | 29 +++++- 4 files changed, 167 insertions(+), 105 deletions(-) diff --git a/runtime/gc/concurrent-collection.c b/runtime/gc/concurrent-collection.c index 0b3dacabf..aaddaf264 100644 --- a/runtime/gc/concurrent-collection.c +++ b/runtime/gc/concurrent-collection.c @@ -590,18 +590,17 @@ void CC_collectAtPublicLevel(GC_state s, GC_thread thread, uint32_t depth) { /* ========================================================================= */ struct CC_tryUnpinOrKeepPinnedArgs { - HM_chunkList newRemSet; + HM_remSet newRemSet; HM_HierarchicalHeap tgtHeap; void* fromSpaceMarker; void* toSpaceMarker; }; - void CC_tryUnpinOrKeepPinned( - __attribute__((unused)) GC_state s, - HM_remembered remElem, - void* rawArgs) + __attribute__((unused)) GC_state s, + HM_remembered remElem, + void *rawArgs) { struct CC_tryUnpinOrKeepPinnedArgs* args = (struct CC_tryUnpinOrKeepPinnedArgs *)rawArgs; @@ -628,7 +627,36 @@ void CC_tryUnpinOrKeepPinned( assert(chunk->tmpHeap == args->fromSpaceMarker); assert(HM_getLevelHead(chunk) == args->tgtHeap); - HM_chunk fromChunk = HM_getChunkOf(objptrToPointer(remElem->from, NULL)); + // pointer p = objptrToPointer(remElem->object, NULL); + // GC_header header = getHeader(p); + // enum PinType pt = pinType(header); + // uint32_t unpinDepth = unpinDepthOf(remElem->object); + + // assert (pt != PIN_NONE); + + if (remElem->from != BOGUS_OBJPTR) + { + + // uint32_t opDepth = HM_HH_getDepth(args->tgtHeap); + // if (unpinDepth > opDepth && pt == PIN_ANY) { + // tryPinDec(remElem->object, opDepth); + // } + + HM_chunk fromChunk = HM_getChunkOf(objptrToPointer(remElem->from, NULL)); + assert(fromChunk->tmpHeap != args->toSpaceMarker); + + if (fromChunk->tmpHeap == args->fromSpaceMarker) { + assert(isChunkInList(fromChunk, HM_HH_getChunkList(args->tgtHeap))); + return; + } + + /* otherwise, object stays pinned, and we have to keep this remembered + * entry into the toSpace. */ + } + + /* grossly overapproximate for entangled objects */ + HM_remember(args->newRemSet, remElem, false); + /** SAM_NOTE: The goal of the following was to filter remset entries * to only keep the "shallowest" entries. But this is really tricky, @@ -641,7 +669,6 @@ void CC_tryUnpinOrKeepPinned( */ #if 0 uint32_t unpinDepth = unpinDepthOf(op); - uint32_t opDepth = HM_HH_getDepth(args->tgtHeap); uint32_t fromDepth = HM_HH_getDepth(HM_getLevelHead(fromChunk)); if (fromDepth > unpinDepth) { /** Can forget any down-pointer that came from shallower than the @@ -652,20 +679,6 @@ void CC_tryUnpinOrKeepPinned( assert(opDepth < unpinDepth || fromDepth == unpinDepth); #endif - assert(fromChunk->tmpHeap != args->toSpaceMarker); - - if (fromChunk->tmpHeap == args->fromSpaceMarker) - { - // fromChunk is in-scope of CC. Don't need to keep this remembered entry. - assert(isChunkInList(fromChunk, HM_HH_getChunkList(args->tgtHeap))); - return; - } - - /* otherwise, object stays pinned, and we have to keep this remembered - * entry into the toSpace. */ - - HM_remember(args->newRemSet, remElem); - assert(isChunkInList(chunk, HM_HH_getChunkList(args->tgtHeap))); assert(HM_getLevelHead(chunk) == args->tgtHeap); } @@ -678,9 +691,9 @@ void CC_filterPinned( void* fromSpaceMarker, void* toSpaceMarker) { - HM_chunkList oldRemSet = HM_HH_getRemSet(hh); - struct HM_chunkList newRemSet; - HM_initChunkList(&newRemSet); + HM_remSet oldRemSet = HM_HH_getRemSet(hh); + struct HM_remSet newRemSet; + HM_initRemSet(&newRemSet); LOG(LM_CC_COLLECTION, LL_INFO, "num pinned initially: %zu", @@ -711,13 +724,13 @@ void CC_filterPinned( struct writeFreedBlockInfoFnClosure infoc = {.fun = CC_writeFreeChunkInfo, .env = &info}; - HM_freeChunksInListWithInfo(s, oldRemSet, &infoc); + HM_freeRemSetWithInfo(s, oldRemSet, &infoc); *oldRemSet = newRemSet; // this moves all data into remset of hh - assert(HM_HH_getRemSet(hh)->firstChunk == newRemSet.firstChunk); - assert(HM_HH_getRemSet(hh)->lastChunk == newRemSet.lastChunk); - assert(HM_HH_getRemSet(hh)->size == newRemSet.size); - assert(HM_HH_getRemSet(hh)->usedSize == newRemSet.usedSize); + // assert(HM_HH_getRemSet(hh)->firstChunk == newRemSet.firstChunk); + // assert(HM_HH_getRemSet(hh)->lastChunk == newRemSet.lastChunk); + // assert(HM_HH_getRemSet(hh)->size == newRemSet.size); + // assert(HM_HH_getRemSet(hh)->usedSize == newRemSet.usedSize); LOG(LM_CC_COLLECTION, LL_INFO, "num pinned after filter: %zu", diff --git a/runtime/gc/hierarchical-heap-collection.c b/runtime/gc/hierarchical-heap-collection.c index 72d96322f..d254ed37f 100644 --- a/runtime/gc/hierarchical-heap-collection.c +++ b/runtime/gc/hierarchical-heap-collection.c @@ -662,7 +662,7 @@ void HM_HHC_collectLocal(uint32_t desiredScope) HM_HierarchicalHeap nextAncestor = hhTail->nextAncestor; HM_chunkList level = HM_HH_getChunkList(hhTail); - HM_chunkList remset = HM_HH_getRemSet(hhTail); + HM_remSet remset = HM_HH_getRemSet(hhTail); if (NULL != remset) { #if ASSERT @@ -677,7 +677,7 @@ void HM_HHC_collectLocal(uint32_t desiredScope) #endif info.depth = HM_HH_getDepth(hhTail); info.freedType = LGC_FREED_REMSET_CHUNK; - HM_freeChunksInListWithInfo(s, remset, &infoc); + HM_freeRemSetWithInfo(s, remset, &infoc); } #if ASSERT @@ -1178,16 +1178,16 @@ void tryUnpinOrKeepPinned(GC_state s, HM_remembered remElem, void *rawArgs) struct ForwardHHObjptrArgs *args = (struct ForwardHHObjptrArgs *)rawArgs; objptr op = remElem->object; -#if ASSERT - HM_chunk fromChunk = HM_getChunkOf(objptrToPointer(remElem->from, NULL)); - HM_HierarchicalHeap fromHH = HM_getLevelHead(fromChunk); - assert(HM_HH_getDepth(fromHH) <= args->toDepth); -#endif +// #if ASSERT +// HM_chunk fromChunk = HM_getChunkOf(objptrToPointer(remElem->from, NULL)); +// HM_HierarchicalHeap fromHH = HM_getLevelHead(fromChunk); +// assert(HM_HH_getDepth(fromHH) <= args->toDepth); +// #endif if (!isPinned(op)) { // If previously unpinned, then no need to remember this object. - assert(HM_getLevelHead(fromChunk) == args->fromSpace[args->toDepth]); + // assert(HM_getLevelHead(fromChunk) == args->fromSpace[args->toDepth]); LOG(LM_HH_PROMOTION, LL_INFO, "forgetting remset entry from " FMTOBJPTR " to " FMTOBJPTR, @@ -1238,67 +1238,58 @@ void tryUnpinOrKeepPinned(GC_state s, HM_remembered remElem, void *rawArgs) assert(HM_getObjptrDepth(op) == opDepth); assert(HM_getLevelHead(chunk) == args->fromSpace[opDepth]); - uint32_t unpinDepth = unpinDepthOf(op); - uint32_t fromDepth = HM_getObjptrDepth(remElem->from); - - assert(fromDepth <= opDepth); - - if (opDepth <= unpinDepth) - { - unpinObject(op); - assert(fromDepth == opDepth); - - LOG(LM_HH_PROMOTION, LL_INFO, - "forgetting remset entry from " FMTOBJPTR " to " FMTOBJPTR, - remElem->from, op); + bool unpin = tryUnpinWithDepth(op, opDepth); + if (unpin) { return; } - if (fromDepth > unpinDepth) - { - /** If this particular remembered entry came from deeper than some other - * down-pointer, then we don't need to keep it around. There will be some - * other remembered entry coming from the unpinDepth level. - * - * But note that it is very important that the condition is a strict - * inequality: we need to keep all remembered entries that came from the - * same shallowest level. (CC-chaining depends on this.) - */ + uint32_t unpinDepth = unpinDepthOf(op); - LOG(LM_HH_PROMOTION, LL_INFO, - "forgetting remset entry from " FMTOBJPTR " to " FMTOBJPTR, - remElem->from, op); + if (remElem->from != BOGUS_OBJPTR) + { + uint32_t fromDepth = HM_getObjptrDepth(remElem->from); + assert(fromDepth <= opDepth); + if (fromDepth > unpinDepth) { + /** If this particular remembered entry came from deeper than some other + * down-pointer, then we don't need to keep it around. There will be some + * other remembered entry coming from the unpinDepth level. + * + * But note that it is very important that the condition is a strict + * inequality: we need to keep all remembered entries that came from the + * same shallowest level. (CC-chaining depends on this.) + */ + + LOG(LM_HH_PROMOTION, LL_INFO, + "forgetting remset entry from " FMTOBJPTR " to " FMTOBJPTR, + remElem->from, op); - return; + return; + } } - assert(fromDepth == unpinDepth); - /* otherwise, object stays pinned, and we have to scavenge this remembered * entry into the toSpace. */ - HM_remember(HM_HH_getRemSet(args->toSpace[opDepth]), remElem); - if ((fromDepth <= args->maxDepth) && (fromDepth >= args->minDepth)) { - HM_chunk chunk = HM_getChunkOf(objptrToPointer(op, NULL)); - uint32_t opDepth = HM_HH_getDepth(HM_getLevelHead(chunk)); - /* if this is a down-ptr completely inside the scope, - * no need to in-place things reachable from it - */ - printf("this is happening\n"); - if (!chunk->pinnedDuringCollection) - { - chunk->pinnedDuringCollection = TRUE; - HM_unlinkChunkPreserveLevelHead( - HM_HH_getChunkList(args->fromSpace[opDepth]), - chunk); - HM_appendChunk(&(args->pinned[opDepth]), chunk); - } - } - else { - // printf("fromDepth =%d, max = %d, min = %d", fromDepth, args->maxDepth, args->minDepth); - LGC_markAndScan(s, &op, rawArgs); - } + HM_remember(HM_HH_getRemSet(args->toSpace[opDepth]), remElem, false); + // if ((fromDepth <= args->maxDepth) && (fromDepth >= args->minDepth)) { + // HM_chunk chunk = HM_getChunkOf(objptrToPointer(op, NULL)); + // uint32_t opDepth = HM_HH_getDepth(HM_getLevelHead(chunk)); + // /* if this is a down-ptr completely inside the scope, + // * no need to in-place things reachable from it + // */ + // if (!chunk->pinnedDuringCollection) + // { + // chunk->pinnedDuringCollection = TRUE; + // HM_unlinkChunkPreserveLevelHead( + // HM_HH_getChunkList(args->fromSpace[opDepth]), + // chunk); + // HM_appendChunk(&(args->pinned[opDepth]), chunk); + // } + // } + // else { + LGC_markAndScan(s, &op, rawArgs); + // } // LGC_markAndScan(s, &(remElem->from), rawArgs); @@ -1456,8 +1447,9 @@ void forwardHHObjptr(GC_state s, } else { + disentangleObject(s, op); // This object should have been previously unpinned - unpinObject(op); + // unpinObject(op); } /* ======================================================================== diff --git a/runtime/gc/remembered-set.c b/runtime/gc/remembered-set.c index 3b5bb2932..d5e7486ff 100644 --- a/runtime/gc/remembered-set.c +++ b/runtime/gc/remembered-set.c @@ -4,26 +4,32 @@ * See the file MLton-LICENSE for details. */ -void HM_remember(HM_chunkList remSet, HM_remembered remElem) { - HM_storeInchunkList(remSet, (void*)remElem, sizeof(struct HM_remembered)); +void HM_initRemSet(HM_remSet remSet) { + HM_initChunkList(&(remSet->private)); + CC_initConcList(&(remSet->public)); } -void HM_rememberAtLevel(HM_HierarchicalHeap hh, HM_remembered remElem) { - assert(hh != NULL); - HM_remember(HM_HH_getRemSet(hh), remElem); +void HM_remember(HM_remSet remSet, HM_remembered remElem, bool conc) { + if (!conc) { + HM_storeInchunkList(&(remSet->private), (void*)remElem, sizeof(struct HM_remembered)); + } + else { + CC_storeInConcList(&(remSet->public), (void *)remElem, sizeof(struct HM_remembered)); + } } -void HM_foreachRemembered( +void HM_foreachPrivate( GC_state s, - HM_chunkList remSet, + HM_chunkList chunkList, HM_foreachDownptrClosure f) { - assert(remSet != NULL); - HM_chunk chunk = HM_getChunkListFirstChunk(remSet); - while (chunk != NULL) { + HM_chunk chunk = HM_getChunkListFirstChunk(chunkList); + while (chunk != NULL) + { pointer p = HM_getChunkStart(chunk); pointer frontier = HM_getChunkFrontier(chunk); - while (p < frontier) { + while (p < frontier) + { f->fun(s, (HM_remembered)p, f->env); p += sizeof(struct HM_remembered); } @@ -31,10 +37,30 @@ void HM_foreachRemembered( } } -size_t HM_numRemembered(HM_chunkList remSet) { +void HM_foreachRemembered( + GC_state s, + HM_remSet remSet, + HM_foreachDownptrClosure f) +{ + assert(remSet != NULL); + HM_foreachPrivate(s, &(remSet->private), f); + while (TRUE) { + struct HM_chunkList _chunkList; + HM_chunkList chunkList = &(_chunkList); + CC_popAsChunkList(&(remSet->public), chunkList); + if (chunkList->firstChunk == NULL) { + break; + } + else { + HM_foreachPrivate(s, chunkList, f); + } + } +} + +size_t HM_numRemembered(HM_remSet remSet) { assert(remSet != NULL); size_t count = 0; - HM_chunk chunk = HM_getChunkListFirstChunk(remSet); + HM_chunk chunk = HM_getChunkListFirstChunk(&(remSet->private)); // ignore public for now. while (chunk != NULL) { pointer p = HM_getChunkStart(chunk); pointer frontier = HM_getChunkFrontier(chunk); @@ -44,3 +70,13 @@ size_t HM_numRemembered(HM_chunkList remSet) { return count; } + +void HM_appendRemSet(HM_remSet r1, HM_remSet r2) { + HM_appendChunkList(&(r1->private), &(r2->private)); + CC_appendConcList(&(r1->public), &(r2->public)); +} + +void HM_freeRemSetWithInfo(GC_state s, HM_remSet remSet, void* info) { + HM_freeChunksInListWithInfo(s, &(remSet->private), info); + CC_freeChunksInConcListWithInfo(s, &(remSet->public), info); +} diff --git a/runtime/gc/remembered-set.h b/runtime/gc/remembered-set.h index c1859d3f6..96df9b7f2 100644 --- a/runtime/gc/remembered-set.h +++ b/runtime/gc/remembered-set.h @@ -9,6 +9,7 @@ #if (defined (MLTON_GC_INTERNAL_TYPES)) +#include "gc/concurrent-list.h" /* Remembering that there exists a downpointer to this object. The unpin * depth of the object will be stored in the object header. */ @@ -17,6 +18,24 @@ typedef struct HM_remembered { objptr object; } * HM_remembered; + +/* +1. How do we do this public remSet in a hh changing away? +2. What's a simple ds that does the right thing? -> global lookup, which maps + a position in the heap hierarchy to a remSet. +3. Each chunk keeps track of which remSet? +4. + +hh->chunkList <--> ogList +toList +============================= + +*/ +typedef struct HM_remSet { + struct HM_chunkList private; + struct CC_concList public; +} * HM_remSet; + typedef void (*HM_foreachDownptrFun)(GC_state s, HM_remembered remElem, void* args); typedef struct HM_foreachDownptrClosure { @@ -29,10 +48,12 @@ typedef struct HM_foreachDownptrClosure { #if (defined (MLTON_GC_INTERNAL_BASIS)) -void HM_remember(HM_chunkList remSet, HM_remembered remElem); -void HM_rememberAtLevel(HM_HierarchicalHeap hh, HM_remembered remElem); -void HM_foreachRemembered(GC_state s, HM_chunkList remSet, HM_foreachDownptrClosure f); -size_t HM_numRemembered(HM_chunkList remSet); +void HM_initRemSet(HM_remSet remSet); +void HM_freeRemSetWithInfo(GC_state s, HM_remSet remSet, void* info); +void HM_remember(HM_remSet remSet, HM_remembered remElem, bool conc); +void HM_appendRemSet(HM_remSet r1, HM_remSet r2); +void HM_foreachRemembered(GC_state s, HM_remSet remSet, HM_foreachDownptrClosure f); +size_t HM_numRemembered(HM_remSet remSet); #endif /* defined (MLTON_GC_INTERNAL_BASIS) */ From 3f85bcaf2a163974751b33aeaba638b41de68481 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Thu, 17 Mar 2022 12:37:16 -0400 Subject: [PATCH 005/114] remSet and hh conc --- runtime/gc/hierarchical-heap.c | 57 ++++++++++++++++++++++++++++------ runtime/gc/hierarchical-heap.h | 6 ++-- 2 files changed, 51 insertions(+), 12 deletions(-) diff --git a/runtime/gc/hierarchical-heap.c b/runtime/gc/hierarchical-heap.c index 814e40bc0..726cc6adc 100644 --- a/runtime/gc/hierarchical-heap.c +++ b/runtime/gc/hierarchical-heap.c @@ -231,7 +231,7 @@ HM_HierarchicalHeap HM_HH_zip( if (depth1 == depth2) { HM_appendChunkList(HM_HH_getChunkList(hh1), HM_HH_getChunkList(hh2)); - HM_appendChunkList(HM_HH_getRemSet(hh1), HM_HH_getRemSet(hh2)); + HM_appendRemSet(HM_HH_getRemSet(hh1), HM_HH_getRemSet(hh2)); ES_move(HM_HH_getSuspects(hh1), HM_HH_getSuspects(hh2)); linkCCChains(s, hh1, hh2); @@ -349,7 +349,7 @@ void HM_HH_promoteChunks( { /* no need to do anything; this function only guarantees that the * current depth has been completely evacuated. */ - ES_clear(s, HM_HH_getSuspects(thread->hierarchicalHeap)); + ES_clear(s, thread->hierarchicalHeap); return; } @@ -379,7 +379,7 @@ void HM_HH_promoteChunks( if (NULL == hh->subHeapForCC) { assert(NULL == hh->subHeapCompletedCC); HM_appendChunkList(HM_HH_getChunkList(parent), HM_HH_getChunkList(hh)); - HM_appendChunkList(HM_HH_getRemSet(parent), HM_HH_getRemSet(hh)); + HM_appendRemSet(HM_HH_getRemSet(parent), HM_HH_getRemSet(hh)); ES_move(HM_HH_getSuspects(parent), HM_HH_getSuspects(hh)); linkCCChains(s, parent, hh); /* shortcut. */ @@ -454,7 +454,7 @@ void HM_HH_promoteChunks( assert(HM_HH_getDepth(hh) == currentDepth-1); } - ES_clear(s, HM_HH_getSuspects(thread->hierarchicalHeap)); + ES_clear(s, thread->hierarchicalHeap); #if ASSERT assert(hh == thread->hierarchicalHeap); @@ -503,7 +503,7 @@ HM_HierarchicalHeap HM_HH_new(GC_state s, uint32_t depth) hh->heightDependants = 0; HM_initChunkList(HM_HH_getChunkList(hh)); - HM_initChunkList(HM_HH_getRemSet(hh)); + HM_initRemSet(HM_HH_getRemSet(hh)); HM_initChunkList(HM_HH_getSuspects(hh)); return hh; @@ -591,6 +591,17 @@ bool HM_HH_extend(GC_state s, GC_thread thread, size_t bytesRequested) chunk->decheckState = thread->decheckState; chunk->levelHead = HM_HH_getUFNode(hh); + // hh->chunkList <--> og + // toList --> hh + // 1. in-place collection of unionFind nodes? + // 2. How do you make the hh fully concurrent? + // how do you make the union-find fully concurrent and collectible? + // what is the hh?? list of heaps + // -> + // -> + // -> + // -> + // 3. thread->currentChunk = chunk; HM_HH_addRecentBytesAllocated(thread, HM_getChunkSize(chunk)); @@ -747,7 +758,7 @@ void mergeCompletedCCs(GC_state s, HM_HierarchicalHeap hh) { HM_HH_getConcurrentPack(hh)->bytesSurvivedLastCollection += HM_HH_getConcurrentPack(completed)->bytesSurvivedLastCollection; HM_appendChunkList(HM_HH_getChunkList(hh), HM_HH_getChunkList(completed)); - HM_appendChunkList(HM_HH_getRemSet(hh), HM_HH_getRemSet(completed)); + HM_appendRemSet(HM_HH_getRemSet(hh), HM_HH_getRemSet(completed)); CC_freeStack(s, HM_HH_getConcurrentPack(completed)); linkInto(s, hh, completed); completed = next; @@ -924,7 +935,7 @@ void HM_HH_cancelCC(GC_state s, pointer threadp, pointer hhp) { mainhh->subHeapForCC = heap->subHeapForCC; HM_appendChunkList(HM_HH_getChunkList(mainhh), HM_HH_getChunkList(heap)); - HM_appendChunkList(HM_HH_getRemSet(mainhh), HM_HH_getRemSet(heap)); + HM_appendRemSet(HM_HH_getRemSet(mainhh), HM_HH_getRemSet(heap)); linkInto(s, mainhh, heap); @@ -937,7 +948,7 @@ void HM_HH_cancelCC(GC_state s, pointer threadp, pointer hhp) { HM_HH_getConcurrentPack(mainhh)->bytesSurvivedLastCollection += HM_HH_getConcurrentPack(completed)->bytesSurvivedLastCollection; HM_appendChunkList(HM_HH_getChunkList(mainhh), HM_HH_getChunkList(completed)); - HM_appendChunkList(HM_HH_getRemSet(mainhh), HM_HH_getRemSet(completed)); + HM_appendRemSet(HM_HH_getRemSet(mainhh), HM_HH_getRemSet(completed)); linkInto(s, mainhh, completed); completed = next; } @@ -1152,6 +1163,32 @@ void HM_HH_addRootForCollector(GC_state s, HM_HierarchicalHeap hh, pointer p) { } } +void HM_HH_rememberAtLevel(HM_HierarchicalHeap hh, HM_remembered remElem, bool conc) { + assert(hh != NULL); + if (!conc) { + HM_remember(HM_HH_getRemSet(hh), remElem, conc); + } else { + HM_UnionFindNode cursor = HM_HH_getUFNode(hh); + while(true) { + while (NULL != cursor->representative) { + cursor = cursor->representative; + } + hh = cursor->payload; + if (hh == NULL){ + /* race with a join that changed the cursor, iterate again */ + /*should not happen if we retire hh just like ufnodes*/ + assert (false); + continue; + } + HM_remember(HM_HH_getRemSet(hh), remElem, conc); + if (NULL == cursor->representative) { + return; + } + } + } + +} + void HM_HH_freeAllDependants( GC_state s, @@ -1263,8 +1300,8 @@ static inline void linkInto( assert(NULL == HM_HH_getUFNode(left)->dependant2); - HM_HH_getUFNode(right)->payload = NULL; - freeFixedSize(getHHAllocator(s), right); + // HM_HH_getUFNode(right)->payload = NULL; + // freeFixedSize(getHHAllocator(s), right); assert(HM_HH_isLevelHead(left)); } diff --git a/runtime/gc/hierarchical-heap.h b/runtime/gc/hierarchical-heap.h index 147377811..f7a4fa343 100644 --- a/runtime/gc/hierarchical-heap.h +++ b/runtime/gc/hierarchical-heap.h @@ -10,6 +10,7 @@ #include "chunk.h" #include "concurrent-collection.h" +#include "remembered-set.h" #if (defined (MLTON_GC_INTERNAL_TYPES)) @@ -62,7 +63,7 @@ typedef struct HM_HierarchicalHeap { struct HM_HierarchicalHeap *subHeapForCC; struct HM_HierarchicalHeap *subHeapCompletedCC; - struct HM_chunkList rememberedSet; + struct HM_remSet rememberedSet; struct ConcurrentPackage concurrentPack; struct HM_chunkList entanglementSuspects; @@ -104,7 +105,7 @@ static inline HM_chunkList HM_HH_getChunkList(HM_HierarchicalHeap hh) return &(hh->chunkList); } -static inline HM_chunkList HM_HH_getRemSet(HM_HierarchicalHeap hh) +static inline HM_remSet HM_HH_getRemSet(HM_HierarchicalHeap hh) { return &(hh->rememberedSet); } @@ -122,6 +123,7 @@ bool HM_HH_isLevelHead(HM_HierarchicalHeap hh); bool HM_HH_isCCollecting(HM_HierarchicalHeap hh); void HM_HH_addRootForCollector(GC_state s, HM_HierarchicalHeap hh, pointer p); +void HM_HH_rememberAtLevel(HM_HierarchicalHeap hh, HM_remembered remElem, bool conc); void HM_HH_merge(GC_state s, GC_thread parent, GC_thread child); void HM_HH_promoteChunks(GC_state s, GC_thread thread); From b268f2a2c422a7a6609b53bbff5ba9bd609faa04 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Thu, 17 Mar 2022 12:40:20 -0400 Subject: [PATCH 006/114] es update --- runtime/gc/entanglement-suspects.c | 109 ++++++++++++++++++----------- runtime/gc/entanglement-suspects.h | 15 +++- 2 files changed, 79 insertions(+), 45 deletions(-) diff --git a/runtime/gc/entanglement-suspects.c b/runtime/gc/entanglement-suspects.c index 63c342ef2..7e696158e 100644 --- a/runtime/gc/entanglement-suspects.c +++ b/runtime/gc/entanglement-suspects.c @@ -5,32 +5,32 @@ static inline bool suspicious_header(GC_header h) { static inline bool mark_suspect(objptr op) { pointer p = objptrToPointer(op, NULL); - // GC_header header = __sync_fetch_and_or(getHeaderp(p), SUSPECT_MASK); - // assert (1 == (header & GC_VALID_HEADER_MASK)); - while (TRUE) - { - GC_header header = getHeader(p); - GC_header newHeader = header | SUSPECT_MASK; - if (header == newHeader) - { - /* - just return because the suspect bit is already set - */ - return false; - } - else - { - /* - otherwise, install the new header with the bit set. this might fail - if a concurrent thread changes the header first. - */ - if (__sync_bool_compare_and_swap(getHeaderp(p), header, newHeader)) - return true; - } - } - DIE("should be impossible to reach here"); + GC_header header = __sync_fetch_and_or(getHeaderp(p), SUSPECT_MASK); + assert(1 == (header & GC_VALID_HEADER_MASK)); + // while (TRUE) + // { + // GC_header header = getHeader(p); + // GC_header newHeader = header | SUSPECT_MASK; + // if (header == newHeader) + // { + // /* + // just return because the suspect bit is already set + // */ + // return false; + // } + // else + // { + // /* + // otherwise, install the new header with the bit set. this might fail + // if a concurrent thread changes the header first. + // */ + // if (__sync_bool_compare_and_swap(getHeaderp(p), header, newHeader)) + // return true; + // } + // } + // DIE("should be impossible to reach here"); /*return true if this call marked the header, false if someone else did*/ - // return !suspicious_header(header); + return !suspicious_header(header); } static inline bool is_suspect(objptr op) @@ -45,25 +45,33 @@ static inline bool is_suspect(objptr op) } void clear_suspect( - __attribute__((unused)) GC_state s, + GC_state s, objptr *opp, - __attribute__((unused)) void *rawArgs) + void *rawArgs) { - objptr op = *opp; pointer p = objptrToPointer(op, NULL); - while (TRUE) - { - assert(is_suspect(op)); // no one else should clear it! - GC_header header = getHeader(p); - GC_header newHeader = header & (~SUSPECT_MASK); - // the header may be updated concurrently by a CC - if (__sync_bool_compare_and_swap(getHeaderp(p), header, newHeader)) - return; + ES_clearArgs eargs = (ES_clearArgs) rawArgs; + + GC_header header = getHeader(p); + uint32_t unpinDepth = (header & UNPIN_DEPTH_MASK) >> UNPIN_DEPTH_SHIFT; + + if (pinType(header) == PIN_ANY && unpinDepth < eargs->heapDepth) { + /* Not ready to be cleared */ + HM_HierarchicalHeap unpinHeap = HM_HH_getHeapAtDepth(s, eargs->thread, unpinDepth); + HM_storeInchunkList(HM_HH_getSuspects(unpinHeap), opp, sizeof(objptr)); + return; + } + + GC_header newHeader = header & ~(SUSPECT_MASK); + if (__sync_bool_compare_and_swap(getHeaderp(p), header, newHeader)) { + /* clearing successful */ + return; + } + else { + /*oops something changed in b/w, let's try at the next join*/ + HM_storeInchunkList(eargs->newList, opp, sizeof(objptr)); } - DIE("should be impossible to reach here"); - // assert(isObjptr(op) && is_suspect(p)); - // __sync_fetch_and_add(getHeaderp(p), ~(SUSPECT_MASK)); } bool ES_contains(__attribute__((unused)) HM_chunkList es, objptr op) @@ -71,6 +79,14 @@ bool ES_contains(__attribute__((unused)) HM_chunkList es, objptr op) return is_suspect(op); } +bool ES_mark(__attribute__((unused)) GC_state s, objptr op) { + return mark_suspect(op); +} + +void ES_unmark(GC_state s, objptr op) { + clear_suspect(s, &op, NULL); +} + void ES_add(__attribute__((unused)) GC_state s, HM_chunkList es, objptr op) { @@ -123,11 +139,20 @@ void ES_move(HM_chunkList list1, HM_chunkList list2) { HM_initChunkList(list2); } -void ES_clear(GC_state s, HM_chunkList es) +void ES_clear(GC_state s, HM_HierarchicalHeap hh) { + HM_chunkList es = HM_HH_getSuspects(hh); + uint32_t heapDepth = HM_HH_getDepth(hh); + struct HM_chunkList oldList = *(es); + HM_initChunkList(HM_HH_getSuspects(hh)); + struct ES_clearArgs eargs = { + .newList = HM_HH_getSuspects(hh), + .heapDepth = heapDepth, + .thread = getThreadCurrent(s) + }; struct GC_foreachObjptrClosure fObjptrClosure = - {.fun = clear_suspect, .env = NULL}; - int numSuspects = ES_foreachSuspect(s, es, &fObjptrClosure); + {.fun = clear_suspect, .env = &(eargs)}; + int numSuspects = ES_foreachSuspect(s, &oldList, &fObjptrClosure); s->cumulativeStatistics->numSuspectsCleared+=numSuspects; HM_freeChunksInList(s, es); diff --git a/runtime/gc/entanglement-suspects.h b/runtime/gc/entanglement-suspects.h index 235e987e6..fb82535da 100644 --- a/runtime/gc/entanglement-suspects.h +++ b/runtime/gc/entanglement-suspects.h @@ -6,17 +6,26 @@ #define SUSPECT_MASK ((GC_header)0x40000000) #define SUSPECT_SHIFT 30 +typedef struct ES_clearArgs { + HM_chunkList newList; + uint32_t heapDepth; + GC_thread thread; +} * ES_clearArgs; + +bool ES_mark(__attribute__((unused)) GC_state s, objptr op); +void ES_unmark(GC_state s, objptr op); + void ES_add(GC_state s, HM_chunkList es, objptr op); bool ES_contains(HM_chunkList es, objptr op); -HM_chunkList ES_append (GC_state s, HM_chunkList es1, HM_chunkList es2); +HM_chunkList ES_append(GC_state s, HM_chunkList es1, HM_chunkList es2); -void ES_clear(GC_state s, HM_chunkList es); +void ES_clear(GC_state s, HM_HierarchicalHeap hh); void ES_move(HM_chunkList list1, HM_chunkList list2); -int ES_foreachSuspect(GC_state s, HM_chunkList storage, struct GC_foreachObjptrClosure* fObjptrClosure); +int ES_foreachSuspect(GC_state s, HM_chunkList storage, struct GC_foreachObjptrClosure * fObjptrClosure); #endif #endif \ No newline at end of file From 75f4bcfeba90c68ed2f916af175db514abad4f4b Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Thu, 17 Mar 2022 12:40:31 -0400 Subject: [PATCH 007/114] runtime/gc/object.c --- runtime/gc/object.h | 1 + 1 file changed, 1 insertion(+) diff --git a/runtime/gc/object.h b/runtime/gc/object.h index 19d32ac3d..a8579832d 100644 --- a/runtime/gc/object.h +++ b/runtime/gc/object.h @@ -180,6 +180,7 @@ enum { static inline void splitHeader (GC_state s, GC_header header, GC_objectTypeTag *tagRet, bool *hasIdentityRet, uint16_t *bytesNonObjptrsRet, uint16_t *numObjptrsRet); +static inline bool isMutable(GC_state s, pointer p); static inline pointer advanceToObjectData (GC_state s, pointer p); static inline size_t objectSize(GC_state s, pointer p); From 489baf7dfe5e27e8081bcd7649eb9b411e2470b4 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Thu, 17 Mar 2022 12:40:43 -0400 Subject: [PATCH 008/114] mutable data --- runtime/gc/object.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/runtime/gc/object.c b/runtime/gc/object.c index 4dc652e89..2cd646c6a 100644 --- a/runtime/gc/object.c +++ b/runtime/gc/object.c @@ -90,6 +90,16 @@ void splitHeader(GC_state s, GC_header header, *numObjptrsRet = numObjptrs; } +static inline bool isMutable(GC_state s, pointer p) { + GC_header header = getHeader(p); + GC_objectTypeTag tag; + uint16_t bytesNonObjptrs; + uint16_t numObjptrs; + bool hasIdentity; + splitHeader(s, header, &tag, &hasIdentity, &bytesNonObjptrs, &numObjptrs); + return hasIdentity; +} + /* advanceToObjectData (s, p) * * If p points at the beginning of an object, then advanceToObjectData From d6478be537b7c37569314e022dc62f207e50613c Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Thu, 17 Mar 2022 13:05:57 -0400 Subject: [PATCH 009/114] concurrent list --- runtime/gc/concurrent-list.c | 169 +++++++++++++++++++++++++++++++++++ runtime/gc/concurrent-list.h | 38 ++++++++ 2 files changed, 207 insertions(+) create mode 100644 runtime/gc/concurrent-list.c create mode 100644 runtime/gc/concurrent-list.h diff --git a/runtime/gc/concurrent-list.c b/runtime/gc/concurrent-list.c new file mode 100644 index 000000000..a8b29eb45 --- /dev/null +++ b/runtime/gc/concurrent-list.c @@ -0,0 +1,169 @@ +void CC_initConcList(CC_concList concList) { + concList->firstChunk = NULL; + concList->lastChunk = NULL; + pthread_mutex_init(&(concList->mutex), NULL); +} + +void allocateChunkInConcList( + CC_concList concList, + size_t objSize, + HM_chunk lastChunk) { + pthread_mutex_lock(&concList->mutex); + + if(concList->lastChunk != lastChunk) { + pthread_mutex_unlock(&concList->mutex); + return; + } + + GC_state s = pthread_getspecific(gcstate_key); + HM_chunk chunk = HM_getFreeChunk(s, objSize); + + if (NULL == chunk) + { + DIE("Out of memory. Unable to allocate chunk of size %zu.", + objSize); + } + + assert(chunk->frontier == HM_getChunkStart(chunk)); + assert(chunk->mightContainMultipleObjects); + assert((size_t)(chunk->limit - chunk->frontier) >= objSize); + assert(chunk != NULL); + + chunk->prevChunk = lastChunk; + if (lastChunk != NULL) + { + lastChunk->nextChunk = chunk; + } + if (concList->firstChunk == NULL) + { + concList->firstChunk = chunk; + } + pthread_mutex_unlock(&concList->mutex); +} + +pointer CC_storeInConcList(CC_concList concList, void* p, size_t objSize){ + assert(concList != NULL); + while(TRUE) { + HM_chunk chunk = concList->lastChunk; + if (NULL == chunk) { + allocateChunkInConcList(concList, objSize, chunk); + continue; + } + else { + pointer frontier = HM_getChunkFrontier(chunk); + size_t sizePast = (size_t) (chunk->limit - frontier); + if (sizePast < objSize) { + allocateChunkInConcList(concList, objSize, chunk); + continue; + } + pointer new_frontier = frontier + objSize; + bool success = __sync_bool_compare_and_swap(&(chunk->frontier), frontier, new_frontier); + if (success) { + memcpy(frontier, p, objSize); + return frontier; + } + } + } + DIE("should never come here"); + return NULL; +} + + +// void CC_foreachObjInList(CC_concList concList, size_t objSize, HM_foreachObjClosure f) { + +// struct HM_chunkList _chunkList; +// HM_chunkList chunkList = &(_chunkList); +// chunkList->firstChunk = concList->firstChunk; +// pthread_mutex_lock(&concList->mutex); +// chunkList->lastChunk = concList->lastChunk; +// concList->firstChunk = NULL; +// concList->lastChunk = NULL; +// pthread_mutex_unlock(&concList->mutex); +// HM_foreachObjInChunkList(chunkList, objSize, f); +// } + +// void CC_foreachRemInConc(GC_state s, CC_concList concList, struct HM_foreachDownptrClosure* f) { +// struct HM_chunkList _store; +// HM_chunkList store = &(_store); +// HM_initChunkList(store); + +// while (TRUE) { +// HM_chunk firstChunk = concList->firstChunk; +// if (firstChunk == NULL) { +// break; +// } + +// pthread_mutex_lock(&concList->mutex); +// HM_chunk lastChunk = concList->lastChunk; +// concList->firstChunk = NULL; +// concList->lastChunk = NULL; +// pthread_mutex_unlock(&concList->mutex); + +// assert(firstChunk != NULL); +// assert(lastChunk != NULL); +// HM_chunk chunk = firstChunk; +// while (chunk != NULL) { +// pointer p = HM_getChunkStart(chunk); +// pointer frontier = HM_getChunkFrontier(chunk); +// while (p < frontier) +// { +// f->fun(s, (HM_remembered)p, f->env); +// p += sizeof(struct HM_remembered); +// } +// chunk = chunk->nextChunk; +// } + +// if (store->firstChunk != NULL) { +// store->lastChunk->nextChunk = firstChunk; +// firstChunk->prevChunk = store->lastChunk; +// store->lastChunk = lastChunk; +// } +// else { +// store->firstChunk = firstChunk; +// store->lastChunk = lastChunk; +// } +// } + +// /*add the chunks back to the list*/ +// pthread_mutex_lock(&concList->mutex); +// if (concList->firstChunk != NULL) { +// concList->firstChunk->prevChunk = store->lastChunk; +// store->lastChunk->nextChunk = concList->firstChunk; +// concList->firstChunk = store->firstChunk; +// } +// else { +// concList->firstChunk = store->firstChunk; +// concList->lastChunk = store->lastChunk; +// } +// pthread_mutex_unlock(&concList->mutex); + +// } + +void CC_popAsChunkList(CC_concList concList, HM_chunkList chunkList) { + pthread_mutex_lock(&concList->mutex); + chunkList->firstChunk = concList->firstChunk; + concList->firstChunk = NULL; + chunkList->lastChunk = concList->lastChunk; + concList->lastChunk = NULL; + pthread_mutex_unlock(&concList->mutex); +} + +void CC_appendConcList(CC_concList concList1, CC_concList concList2) { + pthread_mutex_lock(&concList1->mutex); + if (concList1->lastChunk == NULL) { + concList1->firstChunk = concList2->firstChunk; + concList1->lastChunk = concList2->lastChunk; + } + else { + concList1->lastChunk->nextChunk = concList2->firstChunk; + concList2->firstChunk->prevChunk = concList1->lastChunk; + concList1->lastChunk = concList2->lastChunk; + } + pthread_mutex_unlock(&concList2->mutex); +} + +void CC_freeChunksInConcListWithInfo(GC_state s, CC_concList concList, void *info) { + struct HM_chunkList _chunkList; + CC_popAsChunkList(concList, &(_chunkList)); + HM_freeChunksInListWithInfo(s, &(_chunkList), info); +} \ No newline at end of file diff --git a/runtime/gc/concurrent-list.h b/runtime/gc/concurrent-list.h new file mode 100644 index 000000000..a2d2392ff --- /dev/null +++ b/runtime/gc/concurrent-list.h @@ -0,0 +1,38 @@ +/* Copyright (C) 2018-2019 Sam Westrick + * Copyright (C) 2015 Ram Raghunathan. + * + * MLton is released under a HPND-style license. + * See the file MLton-LICENSE for details. + */ + + +#ifndef CC_LIST_H +#define CC_LIST_H + +struct CC_concList; +typedef struct CC_concList * CC_concList; + +#if (defined (MLTON_GC_INTERNAL_TYPES)) + +struct CC_concList { + HM_chunk firstChunk; + HM_chunk lastChunk; + pthread_mutex_t mutex; +}; + +#endif /* MLTON_GC_INTERNAL_TYPES */ + +#if (defined (MLTON_GC_INTERNAL_FUNCS)) + +void CC_initConcList(CC_concList concList); +pointer CC_storeInConcList(CC_concList concList, void* p, size_t objSize); +// void CC_foreachObjInList(CC_concList concList, size_t objSize, HM_foreachObjClosure f); +// void CC_foreachRemInConc(GC_state s, CC_concList concList, struct HM_foreachDownptrClosure* f); +void CC_popAsChunkList(CC_concList concList, HM_chunkList chunkList); + +void CC_freeChunksInConcListWithInfo(GC_state s, CC_concList concList, void *info); +void CC_appendConcList(CC_concList concList1, CC_concList concList2); + +#endif /* MLTON_GC_INTERNAL_FUNCS */ + +#endif /* CC_LIST_H */ From a6d6dfa01ebcf5e9f4b39ca1ed3e1337a3ec6717 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Thu, 17 Mar 2022 13:06:34 -0400 Subject: [PATCH 010/114] entangled reads --- runtime/gc.c | 1 + runtime/gc.h | 1 + runtime/gc/decheck.c | 34 ++++++++++++++++++++++++++++------ runtime/gc/decheck.h | 3 ++- 4 files changed, 32 insertions(+), 7 deletions(-) diff --git a/runtime/gc.c b/runtime/gc.c index c43d98513..a42cd543f 100644 --- a/runtime/gc.c +++ b/runtime/gc.c @@ -81,6 +81,7 @@ extern C_Pthread_Key_t gcstate_key; #include "gc/pin.c" #include "gc/pointer.c" #include "gc/profiling.c" +#include "gc/concurrent-list.c" #include "gc/remembered-set.c" #include "gc/rusage.c" #include "gc/sequence-allocate.c" diff --git a/runtime/gc.h b/runtime/gc.h index e7f0d0f9f..b433f63ac 100644 --- a/runtime/gc.h +++ b/runtime/gc.h @@ -89,6 +89,7 @@ typedef GC_state GCState_t; #include "gc/local-scope.h" #include "gc/local-heap.h" #include "gc/assign.h" +#include "gc/concurrent-list.h" #include "gc/remembered-set.h" #include "gc/gap.h" // #include "gc/deferred-promote.h" diff --git a/runtime/gc/decheck.c b/runtime/gc/decheck.c index 5c8080a6a..191a88d17 100644 --- a/runtime/gc/decheck.c +++ b/runtime/gc/decheck.c @@ -237,16 +237,16 @@ bool decheckIsOrdered(GC_thread thread, decheck_tid_t t1) return p1 == lca_path || dag_depth(t1) <= decheckGetSyncDepth(thread, llen); } -void decheckRead(GC_state s, objptr ptr) { +void decheckRead(GC_state s, objptr dst, objptr src) { GC_thread thread = getThreadCurrent(s); if (thread == NULL) return; decheck_tid_t tid = thread->decheckState; if (tid.bits == DECHECK_BOGUS_BITS) return; - if (!isObjptr(ptr)) + if (!isObjptr(src)) return; - HM_chunk chunk = HM_getChunkOf(objptrToPointer(ptr, NULL)); + HM_chunk chunk = HM_getChunkOf(objptrToPointer(src, NULL)); if (chunk == NULL) return; decheck_tid_t allocator = chunk->decheckState; @@ -257,9 +257,9 @@ void decheckRead(GC_state s, objptr ptr) { /** If we get here, there is entanglement. Next is how to handle it. */ - assert(!s->controls->manageEntanglement); - if (!s->controls->manageEntanglement) { - printf("Entanglement detected: object at %p\n", (void *) ptr); + assert(!s->controls->manageEntanglement || true); + if (!s->controls->manageEntanglement && false) { + printf("Entanglement detected: object at %p\n", (void *) src); printf("Allocator tree depth: %d\n", tree_depth(allocator)); printf("Allocator path: 0x%x\n", allocator.internal.path); printf("Allocator dag depth: %d\n", dag_depth(allocator)); @@ -268,6 +268,19 @@ void decheckRead(GC_state s, objptr ptr) { printf("Reader dag depth: %d\n", dag_depth(tid)); exit(-1); } + else { + uint32_t unpinDepth = lcaHeapDepth(tid, allocator); + bool headerChange, pinChange; + pinObjectInfo(src, unpinDepth, PIN_ANY, &headerChange, &pinChange); + if (pinChange) { + struct HM_remembered remElem_ = {.object = src, .from = BOGUS_OBJPTR}; + HM_HH_rememberAtLevel(HM_getLevelHeadPathCompress(chunk), &(remElem_), true); + } + if (isMutable(s, objptrToPointer(src, NULL)) && !ES_contains(NULL, src)) { + HM_HierarchicalHeap lcaHeap = HM_HH_getHeapAtDepth(s, thread, unpinDepth); + ES_add(s, HM_HH_getSuspects(lcaHeap), src); + } + } #if 0 s->cumulativeStatistics->numEntanglementsDetected++; @@ -319,3 +332,12 @@ void GC_HH_copySyncDepthsFromThread(GC_state s, objptr victimThread, uint32_t st */ memcpy(to, from, DECHECK_DEPTHS_LEN * sizeof(uint32_t)); } + +void disentangleObject(GC_state s, objptr ptr) { + if (isPinned(ptr)) { + unpinObject(ptr); + if (ES_contains(NULL, ptr)) { + ES_unmark(s, ptr); + } + } +} \ No newline at end of file diff --git a/runtime/gc/decheck.h b/runtime/gc/decheck.h index 07a596bbf..8ce367d8b 100644 --- a/runtime/gc/decheck.h +++ b/runtime/gc/decheck.h @@ -38,9 +38,10 @@ PRIVATE void GC_HH_copySyncDepthsFromThread(GC_state s, objptr victimThread, uin #if (defined (MLTON_GC_INTERNAL_FUNCS)) void decheckInit(GC_state s); -void decheckRead(GC_state s, objptr ptr); +void decheckRead(GC_state s, objptr dst, objptr src); bool decheckIsOrdered(GC_thread thread, decheck_tid_t t1); int lcaHeapDepth(decheck_tid_t t1, decheck_tid_t t2); +void disentangleObject(GC_state s, objptr ptr); #endif /* (defined (MLTON_GC_INTERNAL_FUNCS)) */ From b8d74e77886c14cfb1083595b55e0b66027e9577 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Thu, 17 Mar 2022 13:06:51 -0400 Subject: [PATCH 011/114] pinnign up --- runtime/gc/assign.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/runtime/gc/assign.c b/runtime/gc/assign.c index 0a118ba26..77f1175b2 100644 --- a/runtime/gc/assign.c +++ b/runtime/gc/assign.c @@ -6,16 +6,16 @@ * MLton is released under a HPND-style license. * See the file MLton-LICENSE for details. */ -void Assignable_decheckObjptr(objptr op) +void Assignable_decheckObjptr(objptr dst, objptr src) { GC_state s = pthread_getspecific(gcstate_key); s->cumulativeStatistics->numDisentanglementChecks++; - decheckRead(s, op); + decheckRead(s, dst, src); } objptr Assignable_readBarrier( GC_state s, - ARG_USED_FOR_ASSERT objptr obj, + objptr obj, objptr *field) { @@ -54,7 +54,7 @@ objptr Assignable_readBarrier( assert(ES_contains(NULL, obj)); s->cumulativeStatistics->numDisentanglementChecks++; objptr ptr = *field; - decheckRead(s, ptr); + decheckRead(s, ptr, obj); return ptr; } @@ -157,7 +157,7 @@ void Assignable_writeBarrier( if (pinObject(src, (uint32_t)unpinDepth)) { /** Just remember it at some arbitrary place... */ - HM_rememberAtLevel(getThreadCurrent(s)->hierarchicalHeap, remElem); + HM_HH_rememberAtLevel(getThreadCurrent(s)->hierarchicalHeap, remElem); } return; @@ -204,7 +204,7 @@ void Assignable_writeBarrier( ES_add(s, HM_HH_getSuspects(dhh), dst); } - bool success = pinObject(src, dd); + bool success = pinObject(src, dd, PIN_DOWN); // any concurrent pin can only decrease unpinDepth uint32_t unpinDepth = unpinDepthOf(src); @@ -226,7 +226,7 @@ void Assignable_writeBarrier( HM_HierarchicalHeap shh = HM_HH_getHeapAtDepth(s, thread, sd); assert(NULL != shh); assert(HM_HH_getConcurrentPack(shh)->ccstate == CC_UNREG); - HM_rememberAtLevel(shh, remElem); + HM_HH_rememberAtLevel(shh, remElem, false); LOG(LM_HH_PROMOTION, LL_INFO, "remembered downptr %"PRIu32"->%"PRIu32" from "FMTOBJPTR" to "FMTOBJPTR, From 052f03bce65cf2a24a3428addc46435441075332 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Thu, 17 Mar 2022 13:07:11 -0400 Subject: [PATCH 012/114] forgot --- runtime/gc/assign.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/gc/assign.h b/runtime/gc/assign.h index 7807e10a3..9409fa403 100644 --- a/runtime/gc/assign.h +++ b/runtime/gc/assign.h @@ -22,7 +22,7 @@ PRIVATE objptr Assignable_readBarrier( GC_state s, objptr dst, objptr* field ); -PRIVATE void Assignable_decheckObjptr(objptr op); +PRIVATE void Assignable_decheckObjptr(objptr dst, objptr src); #endif /* MLTON_GC_INTERNAL_BASIS */ From 27e0763fb414bca9d90a68a3ed7abe4e921c8485 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Mon, 21 Mar 2022 09:09:42 -0400 Subject: [PATCH 013/114] list bug --- runtime/gc/concurrent-collection.c | 1 + runtime/gc/concurrent-list.c | 25 +++++++++++++++++++------ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/runtime/gc/concurrent-collection.c b/runtime/gc/concurrent-collection.c index aaddaf264..661bbaa43 100644 --- a/runtime/gc/concurrent-collection.c +++ b/runtime/gc/concurrent-collection.c @@ -770,6 +770,7 @@ size_t CC_collectWithRoots( HM_HierarchicalHeap targetHH, __attribute__((unused)) GC_thread thread) { + return 0; getStackCurrent(s)->used = sizeofGCStateCurrentStackUsed(s); getThreadCurrent(s)->exnStack = s->exnStack; HM_HH_updateValues(getThreadCurrent(s), s->frontier); diff --git a/runtime/gc/concurrent-list.c b/runtime/gc/concurrent-list.c index a8b29eb45..5f2f1e121 100644 --- a/runtime/gc/concurrent-list.c +++ b/runtime/gc/concurrent-list.c @@ -149,17 +149,30 @@ void CC_popAsChunkList(CC_concList concList, HM_chunkList chunkList) { } void CC_appendConcList(CC_concList concList1, CC_concList concList2) { + + HM_chunk firstChunk, lastChunk; + pthread_mutex_lock(&concList2->mutex); + firstChunk = concList2->firstChunk; + lastChunk = concList2->lastChunk; + concList2->firstChunk = NULL; + concList2->lastChunk = NULL; + pthread_mutex_unlock(&concList2->mutex); + + if (firstChunk == NULL || lastChunk == NULL) { + return; + } + pthread_mutex_lock(&concList1->mutex); if (concList1->lastChunk == NULL) { - concList1->firstChunk = concList2->firstChunk; - concList1->lastChunk = concList2->lastChunk; + concList1->firstChunk = firstChunk; + concList1->lastChunk = lastChunk; } else { - concList1->lastChunk->nextChunk = concList2->firstChunk; - concList2->firstChunk->prevChunk = concList1->lastChunk; - concList1->lastChunk = concList2->lastChunk; + concList1->lastChunk->nextChunk = firstChunk; + firstChunk->prevChunk = concList1->lastChunk; + concList1->lastChunk = lastChunk; } - pthread_mutex_unlock(&concList2->mutex); + pthread_mutex_unlock(&concList1->mutex); } void CC_freeChunksInConcListWithInfo(GC_state s, CC_concList concList, void *info) { From 1338d8a3a78f940412a9a7c7c362d27bc169eb25 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Mon, 21 Mar 2022 09:09:58 -0400 Subject: [PATCH 014/114] unpin bug --- runtime/gc/pin.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/runtime/gc/pin.c b/runtime/gc/pin.c index 0f5b543d9..8912f6123 100644 --- a/runtime/gc/pin.c +++ b/runtime/gc/pin.c @@ -73,9 +73,14 @@ void pinObjectInfo(objptr op, uint32_t unpinDepth, enum PinType pt, while (true) { GC_header header = getHeader(p); - uint32_t previousUnpinDepth = - (header & UNPIN_DEPTH_MASK) >> UNPIN_DEPTH_SHIFT; - uint32_t newUnpinDepth = min(previousUnpinDepth, unpinDepth); + uint32_t newUnpinDepth; + if(isPinned(op)) { + uint32_t previousUnpinDepth = + (header & UNPIN_DEPTH_MASK) >> UNPIN_DEPTH_SHIFT; + newUnpinDepth = min(previousUnpinDepth, unpinDepth); + } else { + newUnpinDepth = unpinDepth; + } enum PinType nt = maxPT(pt, pinType(header)); GC_header newHeader = From 719fce0f7e3c2cff6d01551c69c2960e428555fd Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Mon, 21 Mar 2022 09:10:20 -0400 Subject: [PATCH 015/114] free hh alongside ufnode --- runtime/gc/hierarchical-heap-ebr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/runtime/gc/hierarchical-heap-ebr.c b/runtime/gc/hierarchical-heap-ebr.c index 8aa1bb666..e90ad99dd 100644 --- a/runtime/gc/hierarchical-heap-ebr.c +++ b/runtime/gc/hierarchical-heap-ebr.c @@ -61,7 +61,9 @@ static void rotateAndReclaim(GC_state s) { p < HM_getChunkFrontier(chunk); p += sizeof(HM_UnionFindNode *)) { - freeFixedSize(getUFAllocator(s), *(HM_UnionFindNode*)p); + HM_UnionFindNode hufp = * (HM_UnionFindNode *) p; + freeFixedSize(getHHAllocator(s), hufp->payload); + freeFixedSize(getUFAllocator(s), hufp); } } From 65e3c89e1effc9b3babdbbe8d2906e9e75d007d8 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Mon, 21 Mar 2022 09:12:07 -0400 Subject: [PATCH 016/114] weird merge fixed --- runtime/gc/hierarchical-heap-collection.c | 52 +++++++++-------------- 1 file changed, 21 insertions(+), 31 deletions(-) diff --git a/runtime/gc/hierarchical-heap-collection.c b/runtime/gc/hierarchical-heap-collection.c index 55080a54c..6f87e2f75 100644 --- a/runtime/gc/hierarchical-heap-collection.c +++ b/runtime/gc/hierarchical-heap-collection.c @@ -717,6 +717,10 @@ void HM_HHC_collectLocal(uint32_t desiredScope) continue; } + struct HM_foreachDownptrClosure closure = + {.fun = unmarkWrapper, .env = (void *)&forwardHHObjptrArgs}; + HM_foreachRemembered(s, HM_HH_getRemSet(fromSpaceLevel), &closure); + /* unset the flags on pinned chunks and update their HH pointer */ for (HM_chunk chunkCursor = pinned[depth].firstChunk; chunkCursor != NULL; @@ -744,26 +748,15 @@ void HM_HHC_collectLocal(uint32_t desiredScope) } /* merge in toSpace */ -<<<<<<< HEAD - if (NULL == hhTail && NULL == hhToSpace) - { -======= if (NULL == hh && NULL == hhToSpace) { ->>>>>>> 6442206faacf4372f2be52ed81ae961edc36fafd /** SAM_NOTE: If we collected everything, I suppose this is possible. * But shouldn't the stack and thread at least be in the root-to-leaf * path? Should look into this... */ hh = HM_HH_new(s, thread->currentDepth); } -<<<<<<< HEAD - else - { - hh = HM_HH_zip(s, hhTail, hhToSpace); -======= else { hh = HM_HH_zip(s, hh, hhToSpace); ->>>>>>> 6442206faacf4372f2be52ed81ae961edc36fafd } thread->hierarchicalHeap = hh; @@ -1183,6 +1176,16 @@ void LGC_markAndScan(GC_state s, objptr *opp, void *rawArgs) if (!chunk->pinnedDuringCollection) { chunk->pinnedDuringCollection = TRUE; + + if (chunk->levelHead != HM_HH_getUFNode(args->fromSpace[opDepth])) { + chunk->levelHead = HM_HH_getUFNode(args->fromSpace[opDepth]); + } + + // HM_unlinkChunkPreserveLevelHead( + // HM_HH_getChunkList(args->fromSpace[opDepth]), + // chunk); + // HM_appendChunk(&(args->pinned[opDepth]), chunk); + HM_unlinkChunkPreserveLevelHead( HM_HH_getChunkList(args->fromSpace[opDepth]), chunk); @@ -1349,10 +1352,9 @@ void tryUnpinOrKeepPinned(GC_state s, HM_remembered remElem, void *rawArgs) // return; // } -<<<<<<< HEAD - // chunk->pinnedDuringCollection = TRUE; - // assert(hhContainsChunk(args->fromSpace[opDepth], chunk)); - // assert(HM_getLevelHead(chunk) == args->fromSpace[opDepth]); + // if (chunk->levelHead != HM_HH_getUFNode(args->fromSpace[opDepth])) { + // chunk->levelHead = HM_HH_getUFNode(args->fromSpace[opDepth]); + // } // HM_unlinkChunkPreserveLevelHead( // HM_HH_getChunkList(args->fromSpace[opDepth]), @@ -1360,22 +1362,10 @@ void tryUnpinOrKeepPinned(GC_state s, HM_remembered remElem, void *rawArgs) // HM_appendChunk(&(args->pinned[opDepth]), chunk); // assert(HM_getLevelHead(chunk) == args->fromSpace[opDepth]); -======= - if (chunk->levelHead != HM_HH_getUFNode(args->fromSpace[opDepth])) { - chunk->levelHead = HM_HH_getUFNode(args->fromSpace[opDepth]); - } - - HM_unlinkChunkPreserveLevelHead( - HM_HH_getChunkList(args->fromSpace[opDepth]), - chunk); - HM_appendChunk(&(args->pinned[opDepth]), chunk); - - assert(HM_getLevelHead(chunk) == args->fromSpace[opDepth]); - /** stronger version of previous assertion, needed for safe freeing of - * hh dependants after LGC completes - */ - assert(chunk->levelHead == HM_HH_getUFNode(args->fromSpace[opDepth])); ->>>>>>> 6442206faacf4372f2be52ed81ae961edc36fafd + // /** stronger version of previous assertion, needed for safe freeing of + // * hh dependants after LGC completes + // */ + // assert(chunk->levelHead == HM_HH_getUFNode(args->fromSpace[opDepth])); } /* ========================================================================= */ From b5faea3f1300573824c5a6acb1058e017942dec4 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Mon, 21 Mar 2022 09:12:24 -0400 Subject: [PATCH 017/114] forgot --- runtime/gc/hierarchical-heap-collection.c | 45 ----------------------- 1 file changed, 45 deletions(-) diff --git a/runtime/gc/hierarchical-heap-collection.c b/runtime/gc/hierarchical-heap-collection.c index 6f87e2f75..6a1ad8e24 100644 --- a/runtime/gc/hierarchical-heap-collection.c +++ b/runtime/gc/hierarchical-heap-collection.c @@ -571,51 +571,6 @@ void HM_HHC_collectLocal(uint32_t desiredScope) } } -<<<<<<< HEAD - /* after everything has been scavenged, we have to move the pinned chunks and unmark - * objects moved by mark and scan */ - depth = thread->currentDepth + 1; - while (depth > forwardHHObjptrArgs.minDepth) - { - depth--; - HM_HierarchicalHeap toSpaceLevel = toSpace[depth]; - if (NULL == toSpaceLevel) - { - /* check that there are also no pinned chunks at this level - * (if there was pinned chunk, then we would have also created a - * toSpace HH at this depth, because we would have scavenged the - * remembered entry) */ - assert(pinned[depth].firstChunk == NULL); - continue; - } - -#if ASSERT - // SAM_NOTE: safe to check here, because pinned chunks are separate. - traverseEachObjInChunkList(s, HM_HH_getChunkList(toSpaceLevel)); -#endif - // struct HM_foreachObjClosure unmarkClosure = - struct HM_foreachDownptrClosure closure = - {.fun = unmarkWrapper, .env = (void *)&forwardHHObjptrArgs}; - HM_foreachRemembered(s, HM_HH_getRemSet(toSpaceLevel), &closure); - // {.fun = unmark, .env = (void *)&forwardHHObjptrArgs}; - - // HM_foreachObjInChunkList(s, &(pinned[depth]), &unmarkClosure); - /* unset the flags on pinned chunks and update their HH pointer */ - for (HM_chunk chunkCursor = pinned[depth].firstChunk; - chunkCursor != NULL; - chunkCursor = chunkCursor->nextChunk) - { - assert(chunkCursor->pinnedDuringCollection); - chunkCursor->pinnedDuringCollection = FALSE; - chunkCursor->levelHead = HM_HH_getUFNode(toSpaceLevel); - } - - /* put the pinned chunks into the toSpace */ - HM_appendChunkList(HM_HH_getChunkList(toSpaceLevel), &(pinned[depth])); - } - -======= ->>>>>>> 6442206faacf4372f2be52ed81ae961edc36fafd LOG(LM_HH_COLLECTION, LL_DEBUG, "Copied %" PRIu64 " objects in copy-collection", forwardHHObjptrArgs.objectsCopied - oldObjectCopied); From 1c252b6a82b94731bfca0cff65d40484931e141c Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Mon, 21 Mar 2022 13:31:53 -0400 Subject: [PATCH 018/114] forgot to unmark: --- runtime/gc/hierarchical-heap-collection.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/runtime/gc/hierarchical-heap-collection.c b/runtime/gc/hierarchical-heap-collection.c index 6a1ad8e24..c7f435c71 100644 --- a/runtime/gc/hierarchical-heap-collection.c +++ b/runtime/gc/hierarchical-heap-collection.c @@ -206,7 +206,7 @@ void HM_HHC_collectLocal(uint32_t desiredScope) uint32_t potentialLocalScope = UNPACK_IDX(topval); uint32_t originalLocalScope = pollCurrentLocalScope(s); - assert(thread->currentDepth == originalLocalScope); + // assert(thread->currentDepth == originalLocalScope); /** Compute the min depth for local collection. We claim as many levels * as we can without interfering with CC, but only so far as desired. @@ -635,7 +635,7 @@ void HM_HHC_collectLocal(uint32_t desiredScope) #endif info.depth = HM_HH_getDepth(hhTail); info.freedType = LGC_FREED_REMSET_CHUNK; - HM_freeRemSetWithInfo(s, remset, &infoc); + HM_freeChunksInListWithInfo(s, &(remset->private), &infoc); } #if ASSERT @@ -672,9 +672,12 @@ void HM_HHC_collectLocal(uint32_t desiredScope) continue; } - struct HM_foreachDownptrClosure closure = - {.fun = unmarkWrapper, .env = (void *)&forwardHHObjptrArgs}; - HM_foreachRemembered(s, HM_HH_getRemSet(fromSpaceLevel), &closure); + HM_HierarchicalHeap toSpaceLevel = toSpace[depth]; + if (toSpaceLevel != NULL) { + struct HM_foreachDownptrClosure closure = + {.fun = unmarkWrapper, .env = (void *)&forwardHHObjptrArgs}; + HM_foreachRemembered(s, HM_HH_getRemSet(toSpaceLevel), &closure); + } /* unset the flags on pinned chunks and update their HH pointer */ for (HM_chunk chunkCursor = pinned[depth].firstChunk; From 30190939e8aaa4ac020126085b50e78d186ccb3b Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Mon, 21 Mar 2022 13:32:23 -0400 Subject: [PATCH 019/114] minor --- runtime/gc/hierarchical-heap.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/runtime/gc/hierarchical-heap.c b/runtime/gc/hierarchical-heap.c index 726cc6adc..920d1a1cd 100644 --- a/runtime/gc/hierarchical-heap.c +++ b/runtime/gc/hierarchical-heap.c @@ -231,8 +231,8 @@ HM_HierarchicalHeap HM_HH_zip( if (depth1 == depth2) { HM_appendChunkList(HM_HH_getChunkList(hh1), HM_HH_getChunkList(hh2)); - HM_appendRemSet(HM_HH_getRemSet(hh1), HM_HH_getRemSet(hh2)); ES_move(HM_HH_getSuspects(hh1), HM_HH_getSuspects(hh2)); + HM_appendRemSet(HM_HH_getRemSet(hh1), HM_HH_getRemSet(hh2)); linkCCChains(s, hh1, hh2); // This has to happen before linkInto (which frees hh2) @@ -379,8 +379,8 @@ void HM_HH_promoteChunks( if (NULL == hh->subHeapForCC) { assert(NULL == hh->subHeapCompletedCC); HM_appendChunkList(HM_HH_getChunkList(parent), HM_HH_getChunkList(hh)); - HM_appendRemSet(HM_HH_getRemSet(parent), HM_HH_getRemSet(hh)); ES_move(HM_HH_getSuspects(parent), HM_HH_getSuspects(hh)); + HM_appendRemSet(HM_HH_getRemSet(parent), HM_HH_getRemSet(hh)); linkCCChains(s, parent, hh); /* shortcut. */ thread->hierarchicalHeap = parent; @@ -1280,7 +1280,7 @@ void HM_HH_freeAllDependants( /*******************************/ static inline void linkInto( - GC_state s, + __attribute__((unused)) GC_state s, HM_HierarchicalHeap left, HM_HierarchicalHeap right) { @@ -1302,6 +1302,7 @@ static inline void linkInto( // HM_HH_getUFNode(right)->payload = NULL; // freeFixedSize(getHHAllocator(s), right); + // HH_EBR_retire(s, HM_HH_getUFNode(right)); assert(HM_HH_isLevelHead(left)); } From a1b78cd55e1a502f024a5641a2a37377844a9366 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Mon, 21 Mar 2022 15:20:41 -0400 Subject: [PATCH 020/114] assert --- runtime/gc/hierarchical-heap-ebr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/runtime/gc/hierarchical-heap-ebr.c b/runtime/gc/hierarchical-heap-ebr.c index e90ad99dd..68a914863 100644 --- a/runtime/gc/hierarchical-heap-ebr.c +++ b/runtime/gc/hierarchical-heap-ebr.c @@ -62,6 +62,7 @@ static void rotateAndReclaim(GC_state s) { p += sizeof(HM_UnionFindNode *)) { HM_UnionFindNode hufp = * (HM_UnionFindNode *) p; + assert (hufp->payload != NULL); freeFixedSize(getHHAllocator(s), hufp->payload); freeFixedSize(getUFAllocator(s), hufp); } From 0f59d75e56287669ee6802d2a9208338da80a863 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Wed, 23 Mar 2022 22:34:22 -0400 Subject: [PATCH 021/114] fixed order --- runtime/gc/hierarchical-heap.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/runtime/gc/hierarchical-heap.c b/runtime/gc/hierarchical-heap.c index 920d1a1cd..9782b4243 100644 --- a/runtime/gc/hierarchical-heap.c +++ b/runtime/gc/hierarchical-heap.c @@ -230,16 +230,17 @@ HM_HierarchicalHeap HM_HH_zip( if (depth1 == depth2) { - HM_appendChunkList(HM_HH_getChunkList(hh1), HM_HH_getChunkList(hh2)); - ES_move(HM_HH_getSuspects(hh1), HM_HH_getSuspects(hh2)); - HM_appendRemSet(HM_HH_getRemSet(hh1), HM_HH_getRemSet(hh2)); - linkCCChains(s, hh1, hh2); - // This has to happen before linkInto (which frees hh2) HM_HierarchicalHeap hh2anc = hh2->nextAncestor; CC_freeStack(s, HM_HH_getConcurrentPack(hh2)); + linkCCChains(s, hh1, hh2); linkInto(s, hh1, hh2); + HM_appendChunkList(HM_HH_getChunkList(hh1), HM_HH_getChunkList(hh2)); + ES_move(HM_HH_getSuspects(hh1), HM_HH_getSuspects(hh2)); + HM_appendRemSet(HM_HH_getRemSet(hh1), HM_HH_getRemSet(hh2)); + + *cursor = hh1; cursor = &(hh1->nextAncestor); @@ -378,15 +379,16 @@ void HM_HH_promoteChunks( if (NULL == hh->subHeapForCC) { assert(NULL == hh->subHeapCompletedCC); + /* don't need the snapshot for this heap now. */ + CC_freeStack(s, HM_HH_getConcurrentPack(hh)); + linkCCChains(s, parent, hh); + linkInto(s, parent, hh); + HM_appendChunkList(HM_HH_getChunkList(parent), HM_HH_getChunkList(hh)); ES_move(HM_HH_getSuspects(parent), HM_HH_getSuspects(hh)); HM_appendRemSet(HM_HH_getRemSet(parent), HM_HH_getRemSet(hh)); - linkCCChains(s, parent, hh); /* shortcut. */ thread->hierarchicalHeap = parent; - /* don't need the snapshot for this heap now. */ - CC_freeStack(s, HM_HH_getConcurrentPack(hh)); - linkInto(s, parent, hh); hh = parent; } else { @@ -757,10 +759,10 @@ void mergeCompletedCCs(GC_state s, HM_HierarchicalHeap hh) { HM_HierarchicalHeap next = completed->subHeapCompletedCC; HM_HH_getConcurrentPack(hh)->bytesSurvivedLastCollection += HM_HH_getConcurrentPack(completed)->bytesSurvivedLastCollection; - HM_appendChunkList(HM_HH_getChunkList(hh), HM_HH_getChunkList(completed)); - HM_appendRemSet(HM_HH_getRemSet(hh), HM_HH_getRemSet(completed)); CC_freeStack(s, HM_HH_getConcurrentPack(completed)); linkInto(s, hh, completed); + HM_appendChunkList(HM_HH_getChunkList(hh), HM_HH_getChunkList(completed)); + HM_appendRemSet(HM_HH_getRemSet(hh), HM_HH_getRemSet(completed)); completed = next; } From 80e60548e57320557bc10c442282e17ff00531c6 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Wed, 23 Mar 2022 22:34:52 -0400 Subject: [PATCH 022/114] new rememebring policy for entangled writes --- runtime/gc/assign.c | 165 +++++++++++++++++++++++++++++--------------- 1 file changed, 111 insertions(+), 54 deletions(-) diff --git a/runtime/gc/assign.c b/runtime/gc/assign.c index 77f1175b2..c91f9094b 100644 --- a/runtime/gc/assign.c +++ b/runtime/gc/assign.c @@ -10,7 +10,10 @@ void Assignable_decheckObjptr(objptr dst, objptr src) { GC_state s = pthread_getspecific(gcstate_key); s->cumulativeStatistics->numDisentanglementChecks++; - decheckRead(s, dst, src); + if (!decheck(s, src)) + { + manage_entangled(s, src); + } } objptr Assignable_readBarrier( @@ -54,7 +57,10 @@ objptr Assignable_readBarrier( assert(ES_contains(NULL, obj)); s->cumulativeStatistics->numDisentanglementChecks++; objptr ptr = *field; - decheckRead(s, ptr, obj); + if (!decheck(s, obj)) + { + manage_entangled(s, obj); + } return ptr; } @@ -99,7 +105,7 @@ void Assignable_writeBarrier( (size_t)(objend - dstp)); #endif - HM_HierarchicalHeap dstHH = HM_getLevelHeadPathCompress(HM_getChunkOf(dstp)); + HM_HierarchicalHeap dstHH = HM_getLevelHead(HM_getChunkOf(dstp)); objptr readVal = *field; if (dstHH->depth >= 1 && isObjptr(readVal) && s->wsQueueTop!=BOGUS_OBJPTR) { @@ -127,6 +133,54 @@ void Assignable_writeBarrier( HM_remembered remElem = &remElem_; pointer srcp = objptrToPointer(src, NULL); + bool src_de = decheck(s, src); + + if (src_de) { + HM_HierarchicalHeap srcHH = HM_getLevelHeadPathCompress(HM_getChunkOf(srcp)); + if (srcHH == dstHH) { + /* internal pointers are always traced */ + return; + } + + bool dst_de = decheck(s, dst); + uint32_t dd = dstHH->depth, sd = srcHH->depth; + /* depth comparisons make sense only when src && dst are on the same root-to-leaf path, + i.e., when dst_de && src_de */ + bool snapshotted = dst_de && + ((dd > sd) || /* up pointer (snapshotted by the closure) */ + ((HM_HH_getConcurrentPack(srcHH)->ccstate != CC_UNREG) + && dd == sd) /* internal (within a chain) pointer to a snapshotted heap */ + ); + + if (snapshotted) { + return; + } + + /* otherwise pin*/ + bool primary_down_ptr = dst_de && dd < sd && (HM_HH_getConcurrentPack(dstHH)->ccstate == CC_UNREG); + enum PinType pt = primary_down_ptr ? PIN_DOWN : PIN_ANY; + uint32_t unpinDepth = dst_de ? dd + : (uint32_t) lcaHeapDepth(HM_getChunkOf(srcp)->decheckState, HM_getChunkOf(dstp)->decheckState); + bool success = pinObject(src, unpinDepth, pt); + if (success) { + HM_HierarchicalHeap shh = HM_HH_getHeapAtDepth(s, getThreadCurrent(s), sd); + assert(NULL != shh); + assert(HM_HH_getConcurrentPack(shh)->ccstate == CC_UNREG); + HM_HH_rememberAtLevel(shh, remElem, false); + + LOG(LM_HH_PROMOTION, LL_INFO, + "remembered downptr %"PRIu32"->%"PRIu32" from "FMTOBJPTR" to "FMTOBJPTR, + dstHH->depth, srcHH->depth, + dst, src); + } + + /*add dst to the suspect set*/ + if (dd > 0 && !ES_contains(NULL, dst)) { + HM_HierarchicalHeap dhh = HM_HH_getHeapAtDepth(s, getThreadCurrent(s), dd); + ES_add(s, HM_HH_getSuspects(dhh), dst); + } + } + #if 0 /** This is disabled for now. In the future we will come back to @@ -164,29 +218,28 @@ void Assignable_writeBarrier( } #endif - HM_HierarchicalHeap srcHH = HM_getLevelHeadPathCompress(HM_getChunkOf(srcp)); /* Up-pointer. */ - if (dstHH->depth > srcHH->depth) - return; + // if (dstHH->depth > srcHH->depth) + // return; /* Internal pointer. It's safe to ignore an internal pointer if: * 1. it's contained entirely within one subheap, or * 2. the pointed-to object (src) lives in an already snapshotted subregion */ - if ( (dstHH == srcHH) || - (dstHH->depth == srcHH->depth && - HM_HH_getConcurrentPack(srcHH)->ccstate != CC_UNREG) ) { - // assert(...); - // if (dstHH != srcHH) { - // printf( - // "ignore internal pointer "FMTPTR" --> "FMTPTR". dstHH == srcHH? %d\n", - // (uintptr_t)dstp, - // (uintptr_t)srcp, - // srcHH == dstHH); - // } - return; - } + // if ( (dstHH == srcHH) || + // (dstHH->depth == srcHH->depth && + // HM_HH_getConcurrentPack(srcHH)->ccstate != CC_UNREG) ) { + // // assert(...); + // // if (dstHH != srcHH) { + // // printf( + // // "ignore internal pointer "FMTPTR" --> "FMTPTR". dstHH == srcHH? %d\n", + // // (uintptr_t)dstp, + // // (uintptr_t)srcp, + // // srcHH == dstHH); + // // } + // return; + // } /** Otherwise, its a down-pointer, so * (i) make dst a suspect for entanglement, i.e., mark the suspect bit of dst's header * (see pin.h for header-layout). @@ -197,42 +250,46 @@ void Assignable_writeBarrier( */ /* make dst a suspect for entanglement */ - uint32_t dd = dstHH->depth; - GC_thread thread = getThreadCurrent(s); - if (dd > 0 && !ES_contains(NULL, dst)) { - HM_HierarchicalHeap dhh = HM_HH_getHeapAtDepth(s, thread, dd); - ES_add(s, HM_HH_getSuspects(dhh), dst); - } + // uint32_t dd = dstHH->depth; + // if (dd > 0 && !ES_contains(NULL, dst)) { + // HM_HierarchicalHeap dhh = HM_HH_getHeapAtDepth(s, thread, dd); + // ES_add(s, HM_HH_getSuspects(dhh), dst); + // } + + // if (decheck(s, src)) { + // uint32_t sd = srcHH->depth; + // bool dst_de = decheck(s, dst); + // assert (dd <= sd); + // bool true_down_ptr = dd < sd && (HM_HH_getConcurrentPack(dstHH)->ccstate == CC_UNREG) && dst_de; + // // bool unpinDepth = dst_de ? dd : lcaDepth(srcHH->tid, dstHH->tid); + // /* treat a pointer from a chained heap as a cross pointer */ + // bool success = pinObject(src, dd, true_down_ptr ? PIN_DOWN : PIN_ANY); + // if (success) + // { + // HM_HierarchicalHeap shh = HM_HH_getHeapAtDepth(s, thread, sd); + // assert(NULL != shh); + // assert(HM_HH_getConcurrentPack(shh)->ccstate == CC_UNREG); + // HM_HH_rememberAtLevel(shh, remElem, false); + + // LOG(LM_HH_PROMOTION, LL_INFO, + // "remembered downptr %"PRIu32"->%"PRIu32" from "FMTOBJPTR" to "FMTOBJPTR, + // dstHH->depth, srcHH->depth, + // dst, src); + // } + // if (!dst_de) + // { + // DIE("HAVE TO HANDLE ENTANGLED WRITES SEPARATELY"); + // // HM_HierarchicalHeap lcaHeap = HM_HH_getHeapAtDepth(s, thread, unpinDepth); + // // ES_add(s, HM_HH_getSuspects(lcaHeap), ptr); + // } + // } + + + // // any concurrent pin can only decrease unpinDepth + // assert(unpinDepth <= dd); + + // bool maybe_across_chain = write_de && (dd == unpinDepth) && (dd == sd); - bool success = pinObject(src, dd, PIN_DOWN); - - // any concurrent pin can only decrease unpinDepth - uint32_t unpinDepth = unpinDepthOf(src); - assert(unpinDepth <= dd); - - if (success || dd == unpinDepth) - { - uint32_t sd = srcHH->depth; -#if 0 - /** Fix a silly issue where, when we are dealing with entanglement, the - * lower object is actually deeper than the current thread (which is - * possible because of entanglement! the thread is peeking inside of - * some other thread's heaps, and the other thread might be deeper). - */ - if (d > thread->currentDepth && s->controls->manageEntanglement) - d = thread->currentDepth; -#endif - - HM_HierarchicalHeap shh = HM_HH_getHeapAtDepth(s, thread, sd); - assert(NULL != shh); - assert(HM_HH_getConcurrentPack(shh)->ccstate == CC_UNREG); - HM_HH_rememberAtLevel(shh, remElem, false); - - LOG(LM_HH_PROMOTION, LL_INFO, - "remembered downptr %"PRIu32"->%"PRIu32" from "FMTOBJPTR" to "FMTOBJPTR, - dstHH->depth, srcHH->depth, - dst, src); - } /* SAM_NOTE: TODO: track bytes allocated here in * thread->bytesAllocatedSinceLast...? */ From 8fd97bee92046397a0ed799ca5fa3d4b9c0f7e5f Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Wed, 23 Mar 2022 22:35:17 -0400 Subject: [PATCH 023/114] separate mgmt from detection --- runtime/gc/decheck.c | 83 +++++++++++++++++++++++++------------------- runtime/gc/decheck.h | 4 +-- 2 files changed, 49 insertions(+), 38 deletions(-) diff --git a/runtime/gc/decheck.c b/runtime/gc/decheck.c index 191a88d17..d9228d291 100644 --- a/runtime/gc/decheck.c +++ b/runtime/gc/decheck.c @@ -237,51 +237,62 @@ bool decheckIsOrdered(GC_thread thread, decheck_tid_t t1) return p1 == lca_path || dag_depth(t1) <= decheckGetSyncDepth(thread, llen); } -void decheckRead(GC_state s, objptr dst, objptr src) { +void manage_entangled(GC_state s, objptr ptr) { + + GC_thread thread = getThreadCurrent(s); + decheck_tid_t tid = thread->decheckState; + HM_chunk chunk = HM_getChunkOf(objptrToPointer(ptr, NULL)); + decheck_tid_t allocator = chunk->decheckState; + + if (!s->controls->manageEntanglement && false) + { + printf("Entanglement detected: object at %p\n", (void *)ptr); + printf("Allocator tree depth: %d\n", tree_depth(allocator)); + printf("Allocator path: 0x%x\n", allocator.internal.path); + printf("Allocator dag depth: %d\n", dag_depth(allocator)); + printf("Reader tree depth: %d\n", tree_depth(tid)); + printf("Reader path: 0x%x\n", tid.internal.path); + printf("Reader dag depth: %d\n", dag_depth(tid)); + exit(-1); + } + + + uint32_t unpinDepth = lcaHeapDepth(tid, allocator); + bool headerChange, pinChange; + pinObjectInfo(ptr, unpinDepth, PIN_ANY, &headerChange, &pinChange); + if (pinChange) + { + struct HM_remembered remElem_ = {.object = ptr, .from = BOGUS_OBJPTR}; + HM_HH_rememberAtLevel(HM_getLevelHeadPathCompress(chunk), &(remElem_), true); + } + if (isMutable(s, objptrToPointer(ptr, NULL)) && !ES_contains(NULL, ptr)) + { + HM_HierarchicalHeap lcaHeap = HM_HH_getHeapAtDepth(s, thread, unpinDepth); + ES_add(s, HM_HH_getSuspects(lcaHeap), ptr); + } +} + +bool decheck(GC_state s, objptr ptr) { + if (!s->controls->manageEntanglement) + return true; GC_thread thread = getThreadCurrent(s); if (thread == NULL) - return; + return true; decheck_tid_t tid = thread->decheckState; if (tid.bits == DECHECK_BOGUS_BITS) - return; - if (!isObjptr(src)) - return; - HM_chunk chunk = HM_getChunkOf(objptrToPointer(src, NULL)); + return true; + if (!isObjptr(ptr)) + return true; + HM_chunk chunk = HM_getChunkOf(objptrToPointer(ptr, NULL)); if (chunk == NULL) - return; + return true; decheck_tid_t allocator = chunk->decheckState; if (allocator.bits == DECHECK_BOGUS_BITS) - return; + return true; if (decheckIsOrdered(thread, allocator)) - return; - - /** If we get here, there is entanglement. Next is how to handle it. */ - - assert(!s->controls->manageEntanglement || true); - if (!s->controls->manageEntanglement && false) { - printf("Entanglement detected: object at %p\n", (void *) src); - printf("Allocator tree depth: %d\n", tree_depth(allocator)); - printf("Allocator path: 0x%x\n", allocator.internal.path); - printf("Allocator dag depth: %d\n", dag_depth(allocator)); - printf("Reader tree depth: %d\n", tree_depth(tid)); - printf("Reader path: 0x%x\n", tid.internal.path); - printf("Reader dag depth: %d\n", dag_depth(tid)); - exit(-1); - } - else { - uint32_t unpinDepth = lcaHeapDepth(tid, allocator); - bool headerChange, pinChange; - pinObjectInfo(src, unpinDepth, PIN_ANY, &headerChange, &pinChange); - if (pinChange) { - struct HM_remembered remElem_ = {.object = src, .from = BOGUS_OBJPTR}; - HM_HH_rememberAtLevel(HM_getLevelHeadPathCompress(chunk), &(remElem_), true); - } - if (isMutable(s, objptrToPointer(src, NULL)) && !ES_contains(NULL, src)) { - HM_HierarchicalHeap lcaHeap = HM_HH_getHeapAtDepth(s, thread, unpinDepth); - ES_add(s, HM_HH_getSuspects(lcaHeap), src); - } - } + return true; + return false; #if 0 s->cumulativeStatistics->numEntanglementsDetected++; diff --git a/runtime/gc/decheck.h b/runtime/gc/decheck.h index 8ce367d8b..3c2191fd5 100644 --- a/runtime/gc/decheck.h +++ b/runtime/gc/decheck.h @@ -38,11 +38,11 @@ PRIVATE void GC_HH_copySyncDepthsFromThread(GC_state s, objptr victimThread, uin #if (defined (MLTON_GC_INTERNAL_FUNCS)) void decheckInit(GC_state s); -void decheckRead(GC_state s, objptr dst, objptr src); +bool decheck(GC_state s, objptr ptr); bool decheckIsOrdered(GC_thread thread, decheck_tid_t t1); int lcaHeapDepth(decheck_tid_t t1, decheck_tid_t t2); void disentangleObject(GC_state s, objptr ptr); - +void manage_entangled(GC_state s, objptr ptr); #endif /* (defined (MLTON_GC_INTERNAL_FUNCS)) */ #endif /* _DECHECK_H_ */ From 738816a8abc2dabefead56b9e3728c29ad4f99c4 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Wed, 23 Mar 2022 22:35:44 -0400 Subject: [PATCH 024/114] from Object handling --- runtime/gc/concurrent-collection.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/runtime/gc/concurrent-collection.c b/runtime/gc/concurrent-collection.c index 661bbaa43..83954e3e3 100644 --- a/runtime/gc/concurrent-collection.c +++ b/runtime/gc/concurrent-collection.c @@ -325,7 +325,9 @@ void forwardPtrChunk (GC_state s, objptr *opp, void* rawArgs) { void forwardPinned(GC_state s, HM_remembered remElem, void* rawArgs) { objptr src = remElem->object; forwardPtrChunk(s, &src, rawArgs); - forwardPtrChunk(s, &(remElem->from), rawArgs); + if (remElem->from != BOGUS_OBJPTR) { + forwardPtrChunk(s, &(remElem->from), rawArgs); + } #if 0 #if ASSERT @@ -389,7 +391,9 @@ void unmarkPinned( objptr src = remElem->object; assert(!(HM_getChunkOf(objptrToPointer(src, NULL))->pinnedDuringCollection)); unmarkPtrChunk(s, &src, rawArgs); - unmarkPtrChunk(s, &(remElem->from), rawArgs); + if (remElem->from != BOGUS_OBJPTR) { + unmarkPtrChunk(s, &(remElem->from), rawArgs); + } #if 0 #if ASSERT @@ -770,7 +774,6 @@ size_t CC_collectWithRoots( HM_HierarchicalHeap targetHH, __attribute__((unused)) GC_thread thread) { - return 0; getStackCurrent(s)->used = sizeofGCStateCurrentStackUsed(s); getThreadCurrent(s)->exnStack = s->exnStack; HM_HH_updateValues(getThreadCurrent(s), s->frontier); From 296303a860cf2543bb4d2641378d66eda1acb835 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Wed, 23 Mar 2022 22:36:07 -0400 Subject: [PATCH 025/114] fixes --- runtime/gc/hierarchical-heap-collection.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/runtime/gc/hierarchical-heap-collection.c b/runtime/gc/hierarchical-heap-collection.c index c7f435c71..c681cc564 100644 --- a/runtime/gc/hierarchical-heap-collection.c +++ b/runtime/gc/hierarchical-heap-collection.c @@ -1130,6 +1130,7 @@ void LGC_markAndScan(GC_state s, objptr *opp, void *rawArgs) } else if (!CC_isPointerMarked(p)) { + assert(args->fromSpace[opDepth] == HM_getLevelHead(chunk)); markObj(p); if (!chunk->pinnedDuringCollection) { @@ -1155,6 +1156,7 @@ void LGC_markAndScan(GC_state s, objptr *opp, void *rawArgs) } else { + assert(args->fromSpace[opDepth] == HM_getLevelHead(chunk)); assert(chunk->pinnedDuringCollection); } } @@ -1345,8 +1347,9 @@ void forwardObjptrsOfRemembered(GC_state s, HM_remembered remElem, void *rawArgs &trueObjptrPredicateClosure, &closure, FALSE); - - forwardHHObjptr(s, &(remElem->from), rawArgs); + if (remElem->from != BOGUS_OBJPTR) { + forwardHHObjptr(s, &(remElem->from), rawArgs); + } } /* ========================================================================= */ @@ -1714,11 +1717,13 @@ void checkRememberedEntry( assert(HM_getLevelHead(theChunk) == hh); assert(!hasFwdPtr(objptrToPointer(object, NULL))); - assert(!hasFwdPtr(objptrToPointer(remElem->from, NULL))); + if (remElem->from != BOGUS_OBJPTR) { + assert(!hasFwdPtr(objptrToPointer(remElem->from, NULL))); - HM_chunk fromChunk = HM_getChunkOf(objptrToPointer(remElem->from, NULL)); - HM_HierarchicalHeap fromHH = HM_getLevelHead(fromChunk); - assert(HM_HH_getDepth(fromHH) <= HM_HH_getDepth(hh)); + HM_chunk fromChunk = HM_getChunkOf(objptrToPointer(remElem->from, NULL)); + HM_HierarchicalHeap fromHH = HM_getLevelHead(fromChunk); + assert(HM_HH_getDepth(fromHH) <= HM_HH_getDepth(hh)); + } } bool hhContainsChunk(HM_HierarchicalHeap hh, HM_chunk theChunk) From a8703e11faecf3eaa43a59c5de609d9ef180a47f Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Wed, 23 Mar 2022 22:36:30 -0400 Subject: [PATCH 026/114] silly bug --- runtime/gc/concurrent-list.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/runtime/gc/concurrent-list.c b/runtime/gc/concurrent-list.c index 5f2f1e121..1553a423b 100644 --- a/runtime/gc/concurrent-list.c +++ b/runtime/gc/concurrent-list.c @@ -8,14 +8,14 @@ void allocateChunkInConcList( CC_concList concList, size_t objSize, HM_chunk lastChunk) { - pthread_mutex_lock(&concList->mutex); + GC_state s = pthread_getspecific(gcstate_key); + pthread_mutex_lock(&concList->mutex); if(concList->lastChunk != lastChunk) { pthread_mutex_unlock(&concList->mutex); return; } - GC_state s = pthread_getspecific(gcstate_key); HM_chunk chunk = HM_getFreeChunk(s, objSize); if (NULL == chunk) @@ -34,6 +34,7 @@ void allocateChunkInConcList( { lastChunk->nextChunk = chunk; } + concList->lastChunk = chunk; if (concList->firstChunk == NULL) { concList->firstChunk = chunk; From 6bb43a8459a4661adecffcfb5c112b8ec2afbc24 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Thu, 24 Mar 2022 09:43:53 -0400 Subject: [PATCH 027/114] temp --- include/c-chunk.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/c-chunk.h b/include/c-chunk.h index 9f7df8cec..23a283529 100644 --- a/include/c-chunk.h +++ b/include/c-chunk.h @@ -56,7 +56,7 @@ extern void Assignable_writeBarrier(CPointer, Objptr, Objptr*, Objptr); extern Objptr Assignable_readBarrier(CPointer, Objptr, Objptr*); -extern void Assignable_decheckObjptr(Objptr); +extern void Assignable_decheckObjptr(Objptr, Objptr); static inline Real64 ArrayR64_cas(Real64* a, Word64 i, Real64 x, Real64 y) { @@ -79,7 +79,7 @@ Real64 ArrayR64_cas(Real64* a, Word64 i, Real64 x, Real64 y) { static inline Objptr RefP_cas(Objptr* r, Objptr x, Objptr y) { Objptr result = __sync_val_compare_and_swap(r, x, y); - Assignable_decheckObjptr(result); + Assignable_decheckObjptr(*r, result); return result; } @@ -97,7 +97,7 @@ Objptr RefP_cas(Objptr* r, Objptr x, Objptr y) { static inline Objptr ArrayP_cas(Objptr* a, Word64 i, Objptr x, Objptr y) { Objptr result = __sync_val_compare_and_swap(a + i, x, y); - Assignable_decheckObjptr(result); + Assignable_decheckObjptr(*a, result); return result; } From 9461cda859aa177aa8b027c58e5fb55edd7c68ec Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Thu, 24 Mar 2022 19:47:56 -0400 Subject: [PATCH 028/114] how stupid are you --- runtime/gc/entanglement-suspects.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/gc/entanglement-suspects.c b/runtime/gc/entanglement-suspects.c index 7e696158e..59f785718 100644 --- a/runtime/gc/entanglement-suspects.c +++ b/runtime/gc/entanglement-suspects.c @@ -155,5 +155,5 @@ void ES_clear(GC_state s, HM_HierarchicalHeap hh) int numSuspects = ES_foreachSuspect(s, &oldList, &fObjptrClosure); s->cumulativeStatistics->numSuspectsCleared+=numSuspects; - HM_freeChunksInList(s, es); + HM_freeChunksInList(s, &(oldList)); } From b48a8b932e282ccac2d826eb5138687c86ca981a Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Mon, 11 Apr 2022 15:32:12 -0400 Subject: [PATCH 029/114] comment --- runtime/gc/assign.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/runtime/gc/assign.c b/runtime/gc/assign.c index c91f9094b..9f2e69e1a 100644 --- a/runtime/gc/assign.c +++ b/runtime/gc/assign.c @@ -144,8 +144,11 @@ void Assignable_writeBarrier( bool dst_de = decheck(s, dst); uint32_t dd = dstHH->depth, sd = srcHH->depth; - /* depth comparisons make sense only when src && dst are on the same root-to-leaf path, - i.e., when dst_de && src_de */ + /* Depth comparisons make sense only when src && dst are on the same root-to-leaf path, + * checking this maybe expensive, so we approximate here. + * If both dst_de && src_de hold, they are on the same path + * Otherwise, we assume they are on different paths. + */ bool snapshotted = dst_de && ((dd > sd) || /* up pointer (snapshotted by the closure) */ ((HM_HH_getConcurrentPack(srcHH)->ccstate != CC_UNREG) From d7e5b73c215861a282f682a790f0b715c8c4c40c Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Mon, 11 Apr 2022 15:33:00 -0400 Subject: [PATCH 030/114] fix cross poitner thigns --- runtime/gc/decheck.c | 3 ++- runtime/gc/hierarchical-heap-collection.c | 28 +++++++++++++---------- runtime/gc/remembered-set.c | 1 + 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/runtime/gc/decheck.c b/runtime/gc/decheck.c index d9228d291..1535ee417 100644 --- a/runtime/gc/decheck.c +++ b/runtime/gc/decheck.c @@ -273,8 +273,9 @@ void manage_entangled(GC_state s, objptr ptr) { } bool decheck(GC_state s, objptr ptr) { - if (!s->controls->manageEntanglement) + if (!s->controls->manageEntanglement) { return true; + } GC_thread thread = getThreadCurrent(s); if (thread == NULL) return true; diff --git a/runtime/gc/hierarchical-heap-collection.c b/runtime/gc/hierarchical-heap-collection.c index c681cc564..5529e9e8b 100644 --- a/runtime/gc/hierarchical-heap-collection.c +++ b/runtime/gc/hierarchical-heap-collection.c @@ -1128,6 +1128,9 @@ void LGC_markAndScan(GC_state s, objptr *opp, void *rawArgs) // DOUBLE CHECK return; } + else if (args->fromSpace[opDepth] != HM_getLevelHead(chunk)) { + return; + } else if (!CC_isPointerMarked(p)) { assert(args->fromSpace[opDepth] == HM_getLevelHead(chunk)); @@ -1393,13 +1396,13 @@ void forwardHHObjptr(GC_state s, uint32_t opDepth = HM_getObjptrDepthPathCompress(op); - if (opDepth > args->maxDepth) - { - DIE("entanglement detected during collection: %p is at depth %u, below %u", - (void *)p, - opDepth, - args->maxDepth); - } + // if (opDepth > args->maxDepth) + // { + // DIE("entanglement detected during collection: %p is at depth %u, below %u", + // (void *)p, + // opDepth, + // args->maxDepth); + // } /* RAM_NOTE: This is more nuanced with non-local collection */ if ((opDepth > args->maxDepth) || @@ -1427,12 +1430,13 @@ void forwardHHObjptr(GC_state s, assert(!CC_isPointerMarked(objptrToPointer(op, NULL))); return; } + else if (HM_getLevelHead(HM_getChunkOf(objptrToPointer(op, NULL))) != + args->fromSpace[HM_getObjptrDepth(op)]) + { + assert (!decheck(s, op)); + return; + } - /* Assert is in from space. This holds for pinned objects, too, because - * their levelHead is still set to the fromSpace HH. (Pinned objects are - * stored in a different chunklist during collection through.) */ - assert(HM_getLevelHead(HM_getChunkOf(objptrToPointer(op, NULL))) == - args->fromSpace[HM_getObjptrDepth(op)]); if (hasFwdPtr(p)) { objptr fop = getFwdPtr(p); diff --git a/runtime/gc/remembered-set.c b/runtime/gc/remembered-set.c index d5e7486ff..edc52035a 100644 --- a/runtime/gc/remembered-set.c +++ b/runtime/gc/remembered-set.c @@ -53,6 +53,7 @@ void HM_foreachRemembered( } else { HM_foreachPrivate(s, chunkList, f); + HM_appendChunkList(&(remSet->private), chunkList); } } } From a6444120264f29c5b3d1772ef4462ac0bd3e164d Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Mon, 11 Apr 2022 15:33:42 -0400 Subject: [PATCH 031/114] temporary disable option --- runtime/gc/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/gc/init.c b/runtime/gc/init.c index 76517f76e..5f0953dbe 100644 --- a/runtime/gc/init.c +++ b/runtime/gc/init.c @@ -444,7 +444,7 @@ int GC_init (GC_state s, int argc, char **argv) { s->controls->emptinessFraction = 0.25; s->controls->superblockThreshold = 7; // superblocks of 128 blocks s->controls->megablockThreshold = 18; - s->controls->manageEntanglement = FALSE; + s->controls->manageEntanglement = TRUE; /* Not arbitrary; should be at least the page size and must also respect the * limit check coalescing amount in the compiler. */ From 4fc7b563d73904184a90d1d3ed37a88730b78d34 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Tue, 12 Apr 2022 18:14:37 -0400 Subject: [PATCH 032/114] technically integrated but some bugs --- runtime/gc/concurrent-collection.c | 2 +- runtime/gc/decheck.c | 230 ++++++++++++---------- runtime/gc/entanglement-suspects.c | 4 +- runtime/gc/hierarchical-heap-collection.c | 174 +++++++++++++--- runtime/gc/hierarchical-heap-collection.h | 4 + 5 files changed, 280 insertions(+), 134 deletions(-) diff --git a/runtime/gc/concurrent-collection.c b/runtime/gc/concurrent-collection.c index 6f695e83d..037784e7c 100644 --- a/runtime/gc/concurrent-collection.c +++ b/runtime/gc/concurrent-collection.c @@ -754,7 +754,7 @@ void CC_tryUnpinOrKeepPinned( * entry. It will be merged and handled properly later. */ - HM_remember(args->newRemSet, remElem); + HM_remember(args->newRemSet, remElem, false); return; } diff --git a/runtime/gc/decheck.c b/runtime/gc/decheck.c index 99ed1f624..8e4113d32 100644 --- a/runtime/gc/decheck.c +++ b/runtime/gc/decheck.c @@ -34,18 +34,18 @@ bool GC_HH_decheckMaxDepth(ARG_USED_FOR_DETECT_ENTANGLEMENT objptr resultRef) { #ifdef DETECT_ENTANGLEMENT void decheckInit(GC_state s) { #if ASSERT - if (mmap(SYNCH_DEPTHS_BASE, SYNCH_DEPTHS_LEN, PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, 0, 0) == MAP_FAILED) { - perror("mmap error"); - exit(-1); - } - memset(synch_depths, 0, MAX_PATHS * sizeof(uint32_t)); - synch_depths[1] = 0; + if (mmap(SYNCH_DEPTHS_BASE, SYNCH_DEPTHS_LEN, PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, 0, 0) == MAP_FAILED) { + perror("mmap error"); + exit(-1); + } + memset(synch_depths, 0, MAX_PATHS * sizeof(uint32_t)); + synch_depths[1] = 0; #endif - GC_thread thread = getThreadCurrent(s); - thread->decheckState.internal.path = 1; - thread->decheckState.internal.depth = 0; + GC_thread thread = getThreadCurrent(s); + thread->decheckState.internal.path = 1; + thread->decheckState.internal.depth = 0; } #else inline void decheckInit(GC_state s) { @@ -55,16 +55,16 @@ inline void decheckInit(GC_state s) { #endif static inline unsigned int tree_depth(decheck_tid_t tid) { - return tid.internal.depth & 0x1f; + return tid.internal.depth & 0x1f; } static inline unsigned int dag_depth(decheck_tid_t tid) { - return tid.internal.depth >> 5; + return tid.internal.depth >> 5; } static inline uint32_t norm_path(decheck_tid_t tid) { - unsigned int td = tree_depth(tid); - return tid.internal.path & ((1 << (td+1)) - 1); + unsigned int td = tree_depth(tid); + return tid.internal.path & ((1 << (td+1)) - 1); } @@ -89,31 +89,31 @@ static inline uint32_t decheckGetSyncDepth(GC_thread thread, uint32_t pathLen) { * refs just to pass values by destination-passing through the FFI. */ void GC_HH_decheckFork(GC_state s, uint64_t *left, uint64_t *right) { - GC_thread thread = getThreadCurrent(s); - decheck_tid_t tid = thread->decheckState; - assert(tid.bits != DECHECK_BOGUS_BITS); - unsigned int h = tree_depth(tid); - assert(h < MAX_FORK_DEPTH); - - decheck_tid_t t1; - t1.internal.path = (tid.internal.path & ~(1 << h)) | (1 << (h+1)); - t1.internal.depth = tid.internal.depth + (1 << 5) + 1; - *left = t1.bits; - - decheck_tid_t t2; - t2.internal.path = (tid.internal.path | (1 << h)) | (1 << (h+1)); - t2.internal.depth = tid.internal.depth + (1 << 5) + 1; - *right = t2.bits; - - assert(tree_depth(t1) == tree_depth(tid)+1); - assert(tree_depth(t2) == tree_depth(tid)+1); - assert(dag_depth(t1) == dag_depth(tid)+1); - assert(dag_depth(t2) == dag_depth(tid)+1); - assert((norm_path(t1) ^ norm_path(t2)) == (uint32_t)(1 << h)); + GC_thread thread = getThreadCurrent(s); + decheck_tid_t tid = thread->decheckState; + assert(tid.bits != DECHECK_BOGUS_BITS); + unsigned int h = tree_depth(tid); + assert(h < MAX_FORK_DEPTH); + + decheck_tid_t t1; + t1.internal.path = (tid.internal.path & ~(1 << h)) | (1 << (h+1)); + t1.internal.depth = tid.internal.depth + (1 << 5) + 1; + *left = t1.bits; + + decheck_tid_t t2; + t2.internal.path = (tid.internal.path | (1 << h)) | (1 << (h+1)); + t2.internal.depth = tid.internal.depth + (1 << 5) + 1; + *right = t2.bits; + + assert(tree_depth(t1) == tree_depth(tid)+1); + assert(tree_depth(t2) == tree_depth(tid)+1); + assert(dag_depth(t1) == dag_depth(tid)+1); + assert(dag_depth(t2) == dag_depth(tid)+1); + assert((norm_path(t1) ^ norm_path(t2)) == (uint32_t)(1 << h)); #if ASSERT - synch_depths[norm_path(t1)] = dag_depth(t1); - synch_depths[norm_path(t2)] = dag_depth(t2); + synch_depths[norm_path(t1)] = dag_depth(t1); + synch_depths[norm_path(t2)] = dag_depth(t2); #endif } #else @@ -128,18 +128,18 @@ void GC_HH_decheckFork(GC_state s, uint64_t *left, uint64_t *right) { #ifdef DETECT_ENTANGLEMENT void GC_HH_decheckSetTid(GC_state s, uint64_t bits) { - decheck_tid_t tid; - tid.bits = bits; + decheck_tid_t tid; + tid.bits = bits; - GC_thread thread = getThreadCurrent(s); - thread->decheckState = tid; + GC_thread thread = getThreadCurrent(s); + thread->decheckState = tid; // #if ASSERT // synch_depths[norm_path(tid)] = dag_depth(tid); // #endif - decheckSetSyncDepth(thread, tree_depth(tid), dag_depth(tid)); + decheckSetSyncDepth(thread, tree_depth(tid), dag_depth(tid)); - assert(decheckGetSyncDepth(thread, tree_depth(tid)) == synch_depths[norm_path(tid)]); + assert(decheckGetSyncDepth(thread, tree_depth(tid)) == synch_depths[norm_path(tid)]); } #else void GC_HH_decheckSetTid(GC_state s, uint64_t bits) { @@ -165,32 +165,32 @@ uint64_t GC_HH_decheckGetTid(GC_state s, objptr threadp) { #ifdef DETECT_ENTANGLEMENT void GC_HH_decheckJoin(GC_state s, uint64_t left, uint64_t right) { - decheck_tid_t t1; - t1.bits = left; - decheck_tid_t t2; - t2.bits = right; - - assert(tree_depth(t1) == tree_depth(t2)); - assert(tree_depth(t1) >= 1); - - GC_thread thread = getThreadCurrent(s); - unsigned int td = tree_depth(t1) - 1; - unsigned int dd = MAX(dag_depth(t1), dag_depth(t2)) + 1; - assert(dag_depth(t1) == synch_depths[norm_path(t1)]); - assert(dag_depth(t2) == synch_depths[norm_path(t2)]); - decheck_tid_t tid; - tid.internal.path = t1.internal.path | (1 << td); - tid.internal.depth = (dd << 5) + td; - thread->decheckState = tid; - - assert(tree_depth(tid) == tree_depth(t1)-1); + decheck_tid_t t1; + t1.bits = left; + decheck_tid_t t2; + t2.bits = right; + + assert(tree_depth(t1) == tree_depth(t2)); + assert(tree_depth(t1) >= 1); + + GC_thread thread = getThreadCurrent(s); + unsigned int td = tree_depth(t1) - 1; + unsigned int dd = MAX(dag_depth(t1), dag_depth(t2)) + 1; + assert(dag_depth(t1) == synch_depths[norm_path(t1)]); + assert(dag_depth(t2) == synch_depths[norm_path(t2)]); + decheck_tid_t tid; + tid.internal.path = t1.internal.path | (1 << td); + tid.internal.depth = (dd << 5) + td; + thread->decheckState = tid; + + assert(tree_depth(tid) == tree_depth(t1)-1); #if ASSERT - synch_depths[norm_path(tid)] = dd; + synch_depths[norm_path(tid)] = dd; #endif - decheckSetSyncDepth(thread, tree_depth(tid), dd); + decheckSetSyncDepth(thread, tree_depth(tid), dd); - assert(decheckGetSyncDepth(thread, tree_depth(tid)) == synch_depths[norm_path(tid)]); + assert(decheckGetSyncDepth(thread, tree_depth(tid)) == synch_depths[norm_path(tid)]); } #else void GC_HH_decheckJoin(GC_state s, uint64_t left, uint64_t right) { @@ -307,6 +307,7 @@ bool decheckIsOrdered(GC_thread thread, decheck_tid_t t1) { } #endif +#ifdef DETECT_ENTANGLEMENT void manage_entangled(GC_state s, objptr ptr) { @@ -343,57 +344,68 @@ void manage_entangled(GC_state s, objptr ptr) { } } +#else +void manage_entangled(GC_state s, objptr ptr) { + (void)s; + (void)ptr; + return; +} +#endif + +#ifdef DETECT_ENTANGLEMENT + bool decheck(GC_state s, objptr ptr) { - if (!s->controls->manageEntanglement) { + if (!s->controls->manageEntanglement) { + return true; + } + GC_thread thread = getThreadCurrent(s); + if (thread == NULL) return true; - } - GC_thread thread = getThreadCurrent(s); - if (thread == NULL) - return true; - decheck_tid_t tid = thread->decheckState; - if (tid.bits == DECHECK_BOGUS_BITS) - return true; - if (!isObjptr(ptr)) - return true; - HM_chunk chunk = HM_getChunkOf(objptrToPointer(ptr, NULL)); - if (chunk == NULL) - return true; - decheck_tid_t allocator = chunk->decheckState; - if (allocator.bits == DECHECK_BOGUS_BITS) - return true; - if (decheckIsOrdered(thread, allocator)) - return true; - - return false; + decheck_tid_t tid = thread->decheckState; + if (tid.bits == DECHECK_BOGUS_BITS) + return true; + if (!isObjptr(ptr)) + return true; + HM_chunk chunk = HM_getChunkOf(objptrToPointer(ptr, NULL)); + if (chunk == NULL) + return true; + decheck_tid_t allocator = chunk->decheckState; + if (allocator.bits == DECHECK_BOGUS_BITS) + return true; + if (decheckIsOrdered(thread, allocator)) + return true; + + return false; #if 0 - s->cumulativeStatistics->numEntanglementsDetected++; - - /** set the chunk's disentangled depth. This synchronizes with GC, if there - * is GC happening by the owner of this chunk. - */ - int32_t newDD = lcaHeapDepth(thread->decheckState, allocator); - assert(newDD >= 1); - while (TRUE) { - int32_t oldDD = atomicLoadS32(&(chunk->disentangledDepth)); - - /** Negative means it's frozen for GC. Wait until it's unfrozen... */ - while (oldDD < 0) { - pthread_yield(); - oldDD = atomicLoadS32(&(chunk->disentangledDepth)); - } - - /** And then attempt to update. */ - if (newDD >= oldDD || - __sync_bool_compare_and_swap(&(chunk->disentangledDepth), oldDD, newDD)) - break; + s->cumulativeStatistics->numEntanglementsDetected++; + + /** set the chunk's disentangled depth. This synchronizes with GC, if there + * is GC happening by the owner of this chunk. + */ + int32_t newDD = lcaHeapDepth(thread->decheckState, allocator); + assert(newDD >= 1); + while (TRUE) { + int32_t oldDD = atomicLoadS32(&(chunk->disentangledDepth)); + + /** Negative means it's frozen for GC. Wait until it's unfrozen... */ + while (oldDD < 0) { + pthread_yield(); + oldDD = atomicLoadS32(&(chunk->disentangledDepth)); } + + /** And then attempt to update. */ + if (newDD >= oldDD || + __sync_bool_compare_and_swap(&(chunk->disentangledDepth), oldDD, newDD)) + break; + } #endif } #else -void decheckRead(GC_state s, objptr ptr) { +bool decheck(GC_state s, objptr ptr) +{ (void)s; (void)ptr; - return; + return true; } #endif @@ -426,7 +438,8 @@ void GC_HH_copySyncDepthsFromThread(GC_state s, objptr victimThread, uint32_t st } #else -void GC_HH_copySyncDepthsFromThread(GC_state s, objptr victimThread, uint32_t stealDepth) { +void GC_HH_copySyncDepthsFromThread(GC_state s, objptr victimThread, uint32_t stealDepth) +{ (void)s; (void)victimThread; (void)stealDepth; @@ -442,3 +455,4 @@ void disentangleObject(GC_state s, objptr ptr) { } } } + diff --git a/runtime/gc/entanglement-suspects.c b/runtime/gc/entanglement-suspects.c index 59f785718..321f6c488 100644 --- a/runtime/gc/entanglement-suspects.c +++ b/runtime/gc/entanglement-suspects.c @@ -47,9 +47,9 @@ static inline bool is_suspect(objptr op) void clear_suspect( GC_state s, objptr *opp, + objptr op, void *rawArgs) { - objptr op = *opp; pointer p = objptrToPointer(op, NULL); ES_clearArgs eargs = (ES_clearArgs) rawArgs; @@ -84,7 +84,7 @@ bool ES_mark(__attribute__((unused)) GC_state s, objptr op) { } void ES_unmark(GC_state s, objptr op) { - clear_suspect(s, &op, NULL); + clear_suspect(s, &op, op, NULL); } void ES_add(__attribute__((unused)) GC_state s, HM_chunkList es, objptr op) diff --git a/runtime/gc/hierarchical-heap-collection.c b/runtime/gc/hierarchical-heap-collection.c index bfd2599e9..9dcdc7c65 100644 --- a/runtime/gc/hierarchical-heap-collection.c +++ b/runtime/gc/hierarchical-heap-collection.c @@ -44,17 +44,17 @@ void tryUnpinOrKeepPinned( HM_remembered remElem, void *rawArgs); -void LGC_markAndScan(GC_state s, objptr *opp, void *rawArgs); -void unmark(GC_state s, objptr *opp, void *rawArgs); +void LGC_markAndScan(GC_state s, objptr *opp, objptr op, void *rawArgs); +void unmark(GC_state s, objptr *opp, objptr op, void *rawArgs); -void copySuspect(GC_state s, objptr *opp, void *rawArghh); +void copySuspect(GC_state s, objptr *opp, objptr op, void *rawArghh); void forwardObjptrsOfRemembered( GC_state s, HM_remembered remElem, void *rawArgs); -void unmarkWrapper(GC_state s, HM_remembered remElem, void *args); +void unmarkWrapper(GC_state s, HM_remembered remElem, void *rawArgs); // void scavengeChunkOfPinnedObject(GC_state s, objptr op, void* rawArgs); @@ -295,6 +295,7 @@ void HM_HHC_collectLocal(uint32_t desiredScope) .stacksCopied = 0, .bytesMoved = 0, .objectsMoved = 0}; + CC_workList_init(s, &(forwardHHObjptrArgs.worklist)); struct GC_foreachObjptrClosure forwardHHObjptrClosure = {.fun = forwardHHObjptr, .env = &forwardHHObjptrArgs}; @@ -1101,10 +1102,13 @@ void unfreezeDisentangledDepthAfter( #endif /* ========================================================================= */ -void copySuspect(GC_state s, objptr *opp, void *rawArghh) +void copySuspect( + GC_state s, + __attribute__((unused)) objptr *opp, + objptr op, + void *rawArghh) { struct ForwardHHObjptrArgs *args = (struct ForwardHHObjptrArgs *)rawArghh; - objptr op = *opp; assert(isObjptr(op)); pointer p = objptrToPointer(op, NULL); objptr new_ptr = op; @@ -1127,10 +1131,14 @@ void copySuspect(GC_state s, objptr *opp, void *rawArghh) HM_storeInchunkList(HM_HH_getSuspects(args->toSpace[opDepth]), &new_ptr, sizeof(objptr)); } -void LGC_markAndScan(GC_state s, objptr *opp, void *rawArgs) +void markAndAdd ( + GC_state s, + __attribute__((unused)) objptr *opp, + objptr op, + void* rawArgs) { struct ForwardHHObjptrArgs *args = (struct ForwardHHObjptrArgs *)rawArgs; - pointer p = objptrToPointer(*opp, NULL); + pointer p = objptrToPointer(op, NULL); HM_chunk chunk = HM_getChunkOf(p); uint32_t opDepth = HM_HH_getDepth(HM_getLevelHead(chunk)); if ((opDepth > args->maxDepth) || (opDepth < args->minDepth)) @@ -1145,6 +1153,7 @@ void LGC_markAndScan(GC_state s, objptr *opp, void *rawArgs) { assert(args->fromSpace[opDepth] == HM_getLevelHead(chunk)); markObj(p); + CC_workList_push(s, &(args->worklist), op); if (!chunk->pinnedDuringCollection) { chunk->pinnedDuringCollection = TRUE; @@ -1152,20 +1161,11 @@ void LGC_markAndScan(GC_state s, objptr *opp, void *rawArgs) if (chunk->levelHead != HM_HH_getUFNode(args->fromSpace[opDepth])) { chunk->levelHead = HM_HH_getUFNode(args->fromSpace[opDepth]); } - - // HM_unlinkChunkPreserveLevelHead( - // HM_HH_getChunkList(args->fromSpace[opDepth]), - // chunk); - // HM_appendChunk(&(args->pinned[opDepth]), chunk); - HM_unlinkChunkPreserveLevelHead( HM_HH_getChunkList(args->fromSpace[opDepth]), chunk); HM_appendChunk(&(args->pinned[opDepth]), chunk); } - struct GC_foreachObjptrClosure msClosure = - {.fun = LGC_markAndScan, .env = rawArgs}; - foreachObjptrInObject(s, p, &trueObjptrPredicateClosure, &msClosure, FALSE); } else { @@ -1174,9 +1174,33 @@ void LGC_markAndScan(GC_state s, objptr *opp, void *rawArgs) } } +void unmarkAndAdd ( + GC_state s, + __attribute__((unused)) objptr *opp, + objptr op, + void* rawArgs) +{ + struct ForwardHHObjptrArgs *args = (struct ForwardHHObjptrArgs *)rawArgs; + pointer p = objptrToPointer(op, NULL); + HM_chunk chunk = HM_getChunkOf(p); + uint32_t opDepth = HM_HH_getDepth(HM_getLevelHead(chunk)); + assert(!hasFwdPtr(p)); + if ((opDepth > args->maxDepth) || (opDepth < args->minDepth)) { + return; + } + else if (args->fromSpace[opDepth] != HM_getLevelHead(chunk)) { + return; + } + else if (CC_isPointerMarked(p)) { + markObj(p); + CC_workList_push(s, &(args->worklist), op); + } +} + void unmark( - __attribute__((unused)) GC_state s, - objptr *opp, + GC_state s, + __attribute__((unused)) objptr *opp, + objptr op, void *rawArgs) { struct ForwardHHObjptrArgs *args = (struct ForwardHHObjptrArgs *)rawArgs; @@ -1196,9 +1220,113 @@ void unmark( } } -void unmarkWrapper(GC_state s, HM_remembered remElem, void *args) +void phaseLoop (GC_state s, void* rawArgs, GC_foreachObjptrClosure fClosure) { + struct ForwardHHObjptrArgs *args = (struct ForwardHHObjptrArgs *)rawArgs; + + CC_workList worklist = &(args->worklist); + objptr *current = CC_workList_pop(s, worklist); + while (NULL != current) + { + callIfIsObjptr(s, fClosure, current); + current = CC_workList_pop(s, worklist); + } + assert(CC_workList_isEmpty(s, worklist)); +} + +void LGC_markAndScan( + GC_state s, + __attribute__((unused)) objptr *opp, + objptr op, + void *rawArgs) { - unmark(s, &(remElem->object), args); + struct ForwardHHObjptrArgs *args = (struct ForwardHHObjptrArgs *)rawArgs; + struct GC_foreachObjptrClosure markClosure = + {.fun = markAndAdd, .env = (void*) args}; + CC_workList_push(s, &(args->worklist), op); + phaseLoop(s, rawArgs, &markClosure); +} +// void LGC_markAndScan( +// GC_state s, +// __attribute__((unused)) objptr *opp, +// objptr op, +// void *rawArgs) +// { +// struct ForwardHHObjptrArgs *args = (struct ForwardHHObjptrArgs *)rawArgs; +// pointer p = objptrToPointer(op, NULL); +// HM_chunk chunk = HM_getChunkOf(p); +// uint32_t opDepth = HM_HH_getDepth(HM_getLevelHead(chunk)); +// if ((opDepth > args->maxDepth) || (opDepth < args->minDepth)) +// { +// // DOUBLE CHECK +// return; +// } +// else if (args->fromSpace[opDepth] != HM_getLevelHead(chunk)) { +// return; +// } +// else if (!CC_isPointerMarked(p)) +// { +// assert(args->fromSpace[opDepth] == HM_getLevelHead(chunk)); +// markObj(p); +// if (!chunk->pinnedDuringCollection) +// { +// chunk->pinnedDuringCollection = TRUE; + +// if (chunk->levelHead != HM_HH_getUFNode(args->fromSpace[opDepth])) { +// chunk->levelHead = HM_HH_getUFNode(args->fromSpace[opDepth]); +// } + +// // HM_unlinkChunkPreserveLevelHead( +// // HM_HH_getChunkList(args->fromSpace[opDepth]), +// // chunk); +// // HM_appendChunk(&(args->pinned[opDepth]), chunk); + +// HM_unlinkChunkPreserveLevelHead( +// HM_HH_getChunkList(args->fromSpace[opDepth]), +// chunk); +// HM_appendChunk(&(args->pinned[opDepth]), chunk); +// } +// struct GC_foreachObjptrClosure msClosure = +// {.fun = LGC_markAndScan, .env = rawArgs}; +// foreachObjptrInObject(s, p, &trueObjptrPredicateClosure, &msClosure, FALSE); +// } +// else +// { +// assert(args->fromSpace[opDepth] == HM_getLevelHead(chunk)); +// assert(chunk->pinnedDuringCollection); +// } +// } + +// void unmarkLoop( +// __attribute__((unused)) GC_state s, +// __attribute__((unused)) objptr *opp, +// objptr op, +// void *rawArgs) +// { +// struct ForwardHHObjptrArgs *args = (struct ForwardHHObjptrArgs *)rawArgs; +// pointer p = objptrToPointer(op, NULL); +// HM_chunk chunk = HM_getChunkOf(p); +// uint32_t opDepth = HM_HH_getDepth(HM_getLevelHead(chunk)); +// assert(!hasFwdPtr(p)); +// if ((opDepth > args->maxDepth) || (opDepth < args->minDepth)) +// { +// return; +// } +// if (CC_isPointerMarked(p)) +// { +// markObj(p); +// struct GC_foreachObjptrClosure unmarkClosure = {.fun = unmark, .env = rawArgs}; +// foreachObjptrInObject(s, p, &trueObjptrPredicateClosure, &unmarkClosure, FALSE); +// } +// } + +void unmarkWrapper(GC_state s, HM_remembered remElem, void *rawArgs) +{ + struct ForwardHHObjptrArgs *args = (struct ForwardHHObjptrArgs *)rawArgs; + struct GC_foreachObjptrClosure unmarkClosure = + {.fun = unmarkAndAdd, .env = args}; + CC_workList_push(s, &(args->worklist), remElem->object); + phaseLoop(s, rawArgs, &unmarkClosure); + // unmark(s, &(remElem->object), remElem->object, rawArgs); } void tryUnpinOrKeepPinned(GC_state s, HM_remembered remElem, void *rawArgs) @@ -1316,7 +1444,7 @@ void tryUnpinOrKeepPinned(GC_state s, HM_remembered remElem, void *rawArgs) // } // } // else { - LGC_markAndScan(s, &op, rawArgs); + LGC_markAndScan(s, &op, op, rawArgs); // } // LGC_markAndScan(s, &(remElem->from), rawArgs); @@ -1444,7 +1572,7 @@ void forwardHHObjptr( else if (HM_getLevelHead(HM_getChunkOf(objptrToPointer(op, NULL))) != args->fromSpace[HM_getObjptrDepth(op)]) { - assert (!decheck(s, op)); + // assert (!decheck(s, op)); return; } diff --git a/runtime/gc/hierarchical-heap-collection.h b/runtime/gc/hierarchical-heap-collection.h index 8999c5031..02aacf002 100644 --- a/runtime/gc/hierarchical-heap-collection.h +++ b/runtime/gc/hierarchical-heap-collection.h @@ -19,6 +19,7 @@ #define HIERARCHICAL_HEAP_COLLECTION_H_ #include "chunk.h" +#include "cc-work-list.h" #if (defined (MLTON_GC_INTERNAL_TYPES)) struct ForwardHHObjptrArgs { @@ -43,6 +44,9 @@ struct ForwardHHObjptrArgs { /* large objects are "moved" (rather than copied). */ size_t bytesMoved; uint64_t objectsMoved; + + /*worklist for mark and scan*/ + struct CC_workList worklist; }; struct checkDEDepthsArgs { From 555c4e68966f72f4f9b7b3a72c5356151b5999ae Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Mon, 25 Apr 2022 11:15:47 -0400 Subject: [PATCH 033/114] hence overcome the stupidity that is you --- runtime/gc/assign.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/runtime/gc/assign.c b/runtime/gc/assign.c index 9f2e69e1a..26d024ba6 100644 --- a/runtime/gc/assign.c +++ b/runtime/gc/assign.c @@ -57,9 +57,9 @@ objptr Assignable_readBarrier( assert(ES_contains(NULL, obj)); s->cumulativeStatistics->numDisentanglementChecks++; objptr ptr = *field; - if (!decheck(s, obj)) + if (!decheck(s, ptr)) { - manage_entangled(s, obj); + manage_entangled(s, ptr); } return ptr; @@ -129,9 +129,6 @@ void Assignable_writeBarrier( if (dst == s->wsQueue) return; - struct HM_remembered remElem_ = {.object = src, .from = dst}; - HM_remembered remElem = &remElem_; - pointer srcp = objptrToPointer(src, NULL); bool src_de = decheck(s, src); @@ -162,15 +159,21 @@ void Assignable_writeBarrier( /* otherwise pin*/ bool primary_down_ptr = dst_de && dd < sd && (HM_HH_getConcurrentPack(dstHH)->ccstate == CC_UNREG); enum PinType pt = primary_down_ptr ? PIN_DOWN : PIN_ANY; - uint32_t unpinDepth = dst_de ? dd - : (uint32_t) lcaHeapDepth(HM_getChunkOf(srcp)->decheckState, HM_getChunkOf(dstp)->decheckState); + uint32_t unpinDepth = dst_de ? dd : + (uint32_t)lcaHeapDepth(HM_getChunkOf(srcp)->decheckState, + HM_getChunkOf(dstp)->decheckState); + bool success = pinObject(src, unpinDepth, pt); if (success) { + objptr fromObj = pt == PIN_ANY ? BOGUS_OBJPTR : dst; + struct HM_remembered remElem_ = {.object = src, .from = fromObj}; + HM_remembered remElem = &remElem_; + HM_HierarchicalHeap shh = HM_HH_getHeapAtDepth(s, getThreadCurrent(s), sd); assert(NULL != shh); assert(HM_HH_getConcurrentPack(shh)->ccstate == CC_UNREG); - HM_HH_rememberAtLevel(shh, remElem, false); + HM_HH_rememberAtLevel(shh, remElem, false); LOG(LM_HH_PROMOTION, LL_INFO, "remembered downptr %"PRIu32"->%"PRIu32" from "FMTOBJPTR" to "FMTOBJPTR, dstHH->depth, srcHH->depth, @@ -179,6 +182,8 @@ void Assignable_writeBarrier( /*add dst to the suspect set*/ if (dd > 0 && !ES_contains(NULL, dst)) { + /*if dst is not a suspect, it must be disentangled*/ + assert (dst_de); HM_HierarchicalHeap dhh = HM_HH_getHeapAtDepth(s, getThreadCurrent(s), dd); ES_add(s, HM_HH_getSuspects(dhh), dst); } From 50cd7dd55ad35b1f3696b8ad0615a64a4bbbd95c Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Mon, 25 Apr 2022 11:16:17 -0400 Subject: [PATCH 034/114] remembered sets do not track size because they are concurrent --- runtime/gc/chunk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/runtime/gc/chunk.c b/runtime/gc/chunk.c index 1d0589831..71d7912a0 100644 --- a/runtime/gc/chunk.c +++ b/runtime/gc/chunk.c @@ -624,8 +624,8 @@ void HM_assertChunkListInvariants(HM_chunkList chunkList) { chunk = chunk->nextChunk; } assert(chunkList->lastChunk == chunk); - assert(chunkList->size == size); - assert(chunkList->usedSize == usedSize); + // assert(chunkList->size == size); + // assert(chunkList->usedSize == usedSize); } #else void HM_assertChunkListInvariants(HM_chunkList chunkList) { From 0c4d708a6b7eef5b9ac5536f2e53396c3ec9760f Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Mon, 25 Apr 2022 11:16:50 -0400 Subject: [PATCH 035/114] fix bogus dechcek state and traverse immutable objectsd --- runtime/gc/decheck.c | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/runtime/gc/decheck.c b/runtime/gc/decheck.c index 8e4113d32..205cfe539 100644 --- a/runtime/gc/decheck.c +++ b/runtime/gc/decheck.c @@ -127,6 +127,14 @@ void GC_HH_decheckFork(GC_state s, uint64_t *left, uint64_t *right) { #ifdef DETECT_ENTANGLEMENT + +void setStateIfBogus(HM_chunk chunk, decheck_tid_t tid) { + if ((chunk->decheckState).bits == DECHECK_BOGUS_BITS) + { + chunk->decheckState = tid; + } +} + void GC_HH_decheckSetTid(GC_state s, uint64_t bits) { decheck_tid_t tid; tid.bits = bits; @@ -134,9 +142,9 @@ void GC_HH_decheckSetTid(GC_state s, uint64_t bits) { GC_thread thread = getThreadCurrent(s); thread->decheckState = tid; -// #if ASSERT -// synch_depths[norm_path(tid)] = dag_depth(tid); -// #endif + setStateIfBogus(HM_getChunkOf((pointer)thread), tid); + setStateIfBogus(HM_getChunkOf((pointer)thread->stack), tid); + decheckSetSyncDepth(thread, tree_depth(tid), dag_depth(tid)); assert(decheckGetSyncDepth(thread, tree_depth(tid)) == synch_depths[norm_path(tid)]); @@ -308,6 +316,16 @@ bool decheckIsOrdered(GC_thread thread, decheck_tid_t t1) { #endif #ifdef DETECT_ENTANGLEMENT +void manage_entangled(GC_state s, objptr ptr) ; + +void manage_immutable( + GC_state s, + __attribute__((unused)) objptr *opp, + objptr op, + void *rawArgs) +{ + manage_entangled (s, op); +} void manage_entangled(GC_state s, objptr ptr) { @@ -342,6 +360,11 @@ void manage_entangled(GC_state s, objptr ptr) { HM_HierarchicalHeap lcaHeap = HM_HH_getHeapAtDepth(s, thread, unpinDepth); ES_add(s, HM_HH_getSuspects(lcaHeap), ptr); } + else if (!isMutable(s, objptrToPointer(ptr, NULL))) { + struct GC_foreachObjptrClosure emanageClosure = + {.fun = manage_immutable, .env = NULL}; + foreachObjptrInObject(s, ptr, &trueObjptrPredicateClosure, &emanageClosure, FALSE); + } } #else @@ -370,8 +393,10 @@ bool decheck(GC_state s, objptr ptr) { if (chunk == NULL) return true; decheck_tid_t allocator = chunk->decheckState; - if (allocator.bits == DECHECK_BOGUS_BITS) - return true; + if (allocator.bits == DECHECK_BOGUS_BITS) { + // assert (false); + return true; + } if (decheckIsOrdered(thread, allocator)) return true; From 7b9a7747ce32a5d67e3cbbc7850e0c42cd183518 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Mon, 25 Apr 2022 11:19:04 -0400 Subject: [PATCH 036/114] remSet unmark bug; add to worklist bug; some assertions --- runtime/gc/hierarchical-heap-collection.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/runtime/gc/hierarchical-heap-collection.c b/runtime/gc/hierarchical-heap-collection.c index 9dcdc7c65..f65996457 100644 --- a/runtime/gc/hierarchical-heap-collection.c +++ b/runtime/gc/hierarchical-heap-collection.c @@ -680,6 +680,7 @@ void HM_HHC_collectLocal(uint32_t desiredScope) * fromSpace HH at this depth which originally stored the chunk) */ assert(pinned[depth].firstChunk == NULL); + assert(NULL == toSpace[depth] || (HM_HH_getRemSet(toSpace[depth])->private).firstChunk == NULL); continue; } @@ -688,6 +689,9 @@ void HM_HHC_collectLocal(uint32_t desiredScope) struct HM_foreachDownptrClosure closure = {.fun = unmarkWrapper, .env = (void *)&forwardHHObjptrArgs}; HM_foreachRemembered(s, HM_HH_getRemSet(toSpaceLevel), &closure); + /*go through the public of fromSpaceLevel, they will be joined later anyway*/ + assert((HM_HH_getRemSet(fromSpaceLevel)->private).firstChunk == NULL); + HM_foreachRemembered(s, HM_HH_getRemSet(fromSpaceLevel), &closure); } /* unset the flags on pinned chunks and update their HH pointer */ @@ -1143,7 +1147,6 @@ void markAndAdd ( uint32_t opDepth = HM_HH_getDepth(HM_getLevelHead(chunk)); if ((opDepth > args->maxDepth) || (opDepth < args->minDepth)) { - // DOUBLE CHECK return; } else if (args->fromSpace[opDepth] != HM_getLevelHead(chunk)) { @@ -1193,6 +1196,7 @@ void unmarkAndAdd ( } else if (CC_isPointerMarked(p)) { markObj(p); + assert (chunk->pinnedDuringCollection); CC_workList_push(s, &(args->worklist), op); } } @@ -1204,7 +1208,7 @@ void unmark( void *rawArgs) { struct ForwardHHObjptrArgs *args = (struct ForwardHHObjptrArgs *)rawArgs; - pointer p = objptrToPointer(*opp, NULL); + pointer p = objptrToPointer(op, NULL); HM_chunk chunk = HM_getChunkOf(p); uint32_t opDepth = HM_HH_getDepth(HM_getLevelHead(chunk)); assert(!hasFwdPtr(p)); @@ -1212,7 +1216,11 @@ void unmark( { return; } - if (CC_isPointerMarked(p)) + else if (args->fromSpace[opDepth] != HM_getLevelHead(chunk)) + { + return; + } + else if (CC_isPointerMarked(p)) { markObj(p); struct GC_foreachObjptrClosure unmarkClosure = {.fun = unmark, .env = rawArgs}; @@ -1242,7 +1250,8 @@ void LGC_markAndScan( struct ForwardHHObjptrArgs *args = (struct ForwardHHObjptrArgs *)rawArgs; struct GC_foreachObjptrClosure markClosure = {.fun = markAndAdd, .env = (void*) args}; - CC_workList_push(s, &(args->worklist), op); + markAndAdd (s, opp, op, rawArgs); + // CC_workList_push(s, &(args->worklist), op); phaseLoop(s, rawArgs, &markClosure); } // void LGC_markAndScan( @@ -1324,6 +1333,8 @@ void unmarkWrapper(GC_state s, HM_remembered remElem, void *rawArgs) struct ForwardHHObjptrArgs *args = (struct ForwardHHObjptrArgs *)rawArgs; struct GC_foreachObjptrClosure unmarkClosure = {.fun = unmarkAndAdd, .env = args}; + + unmarkAndAdd(s, &(remElem->object), remElem->object, rawArgs); CC_workList_push(s, &(args->worklist), remElem->object); phaseLoop(s, rawArgs, &unmarkClosure); // unmark(s, &(remElem->object), remElem->object, rawArgs); From c2f7728f94e6987f5d2e4d7c6f57d4ce39c8d225 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Mon, 9 May 2022 15:28:02 -0400 Subject: [PATCH 037/114] fix reads of mutable through immutable --- runtime/gc/assign.c | 8 ++++++-- runtime/gc/decheck.c | 23 ++++++++++++++++------- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/runtime/gc/assign.c b/runtime/gc/assign.c index 26d024ba6..3b130dd8c 100644 --- a/runtime/gc/assign.c +++ b/runtime/gc/assign.c @@ -181,9 +181,13 @@ void Assignable_writeBarrier( } /*add dst to the suspect set*/ - if (dd > 0 && !ES_contains(NULL, dst)) { + if (dd > 0 && dst_de && !ES_contains(NULL, dst)) { /*if dst is not a suspect, it must be disentangled*/ - assert (dst_de); + // if (!dst_de) { + // printf("problematix: %p \n", dst); + // DIE("done"); + // } + // assert (dst_de); HM_HierarchicalHeap dhh = HM_HH_getHeapAtDepth(s, getThreadCurrent(s), dd); ES_add(s, HM_HH_getSuspects(dhh), dst); } diff --git a/runtime/gc/decheck.c b/runtime/gc/decheck.c index 205cfe539..4dad0ec9e 100644 --- a/runtime/gc/decheck.c +++ b/runtime/gc/decheck.c @@ -318,13 +318,24 @@ bool decheckIsOrdered(GC_thread thread, decheck_tid_t t1) { #ifdef DETECT_ENTANGLEMENT void manage_entangled(GC_state s, objptr ptr) ; -void manage_immutable( +void mark_mutable_frontier( GC_state s, __attribute__((unused)) objptr *opp, objptr op, - void *rawArgs) + __attribute__((unused)) void *rawArgs) { - manage_entangled (s, op); + if (isMutable(s, op)) { + manage_entangled (s, op); + } + // else if (ES_contains(NULL, op)) { + // return; + // } + else { + struct GC_foreachObjptrClosure emanageClosure = + {.fun = mark_mutable_frontier, .env = NULL}; + foreachObjptrInObject(s, op, &trueObjptrPredicateClosure, &emanageClosure, FALSE); + // ES_mark (NULL, op); + } } void manage_entangled(GC_state s, objptr ptr) { @@ -360,10 +371,8 @@ void manage_entangled(GC_state s, objptr ptr) { HM_HierarchicalHeap lcaHeap = HM_HH_getHeapAtDepth(s, thread, unpinDepth); ES_add(s, HM_HH_getSuspects(lcaHeap), ptr); } - else if (!isMutable(s, objptrToPointer(ptr, NULL))) { - struct GC_foreachObjptrClosure emanageClosure = - {.fun = manage_immutable, .env = NULL}; - foreachObjptrInObject(s, ptr, &trueObjptrPredicateClosure, &emanageClosure, FALSE); + else if (!isMutable(s, objptrToPointer(ptr, NULL)) && headerChange) { + mark_mutable_frontier (s, &ptr, ptr, NULL); } } From f4109867aa2f3840460c7c5d95585cfc38a28ea3 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Tue, 24 May 2022 14:28:57 -0400 Subject: [PATCH 038/114] optimize down pointers --- runtime/gc/assign.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/runtime/gc/assign.c b/runtime/gc/assign.c index 3b130dd8c..aee87c557 100644 --- a/runtime/gc/assign.c +++ b/runtime/gc/assign.c @@ -157,15 +157,15 @@ void Assignable_writeBarrier( } /* otherwise pin*/ - bool primary_down_ptr = dst_de && dd < sd && (HM_HH_getConcurrentPack(dstHH)->ccstate == CC_UNREG); - enum PinType pt = primary_down_ptr ? PIN_DOWN : PIN_ANY; + // bool primary_down_ptr = dst_de && dd < sd && (HM_HH_getConcurrentPack(dstHH)->ccstate == CC_UNREG); uint32_t unpinDepth = dst_de ? dd : (uint32_t)lcaHeapDepth(HM_getChunkOf(srcp)->decheckState, HM_getChunkOf(dstp)->decheckState); + enum PinType pt = dst_de ? PIN_DOWN : PIN_ANY; bool success = pinObject(src, unpinDepth, pt); - if (success) { - objptr fromObj = pt == PIN_ANY ? BOGUS_OBJPTR : dst; + if (success || (dst_de && dd == unpinDepthOf (src))) { + objptr fromObj = pt == PIN_DOWN ? dst : BOGUS_OBJPTR; struct HM_remembered remElem_ = {.object = src, .from = fromObj}; HM_remembered remElem = &remElem_; From 18aa4cced2220116016e7923afff4c7787f6b6d3 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Wed, 25 May 2022 17:33:33 -0400 Subject: [PATCH 039/114] hacky opts may be wrong, remember commit --- runtime/gc/assign.c | 10 +++--- runtime/gc/hierarchical-heap-collection.c | 40 +++++++++++------------ 2 files changed, 24 insertions(+), 26 deletions(-) diff --git a/runtime/gc/assign.c b/runtime/gc/assign.c index aee87c557..a261c3f48 100644 --- a/runtime/gc/assign.c +++ b/runtime/gc/assign.c @@ -128,19 +128,17 @@ void Assignable_writeBarrier( /* deque down-pointers are handled separately during collection. */ if (dst == s->wsQueue) return; - + uint32_t dd = dstHH->depth; pointer srcp = objptrToPointer(src, NULL); - bool src_de = decheck(s, src); - + bool src_de = (HM_getLevelHead(HM_getChunkOf(srcp)) == getThreadCurrent(s)->hierarchicalHeap) || decheck(s, src); if (src_de) { + bool dst_de = (dd == 1) || decheck(s, dst); HM_HierarchicalHeap srcHH = HM_getLevelHeadPathCompress(HM_getChunkOf(srcp)); if (srcHH == dstHH) { /* internal pointers are always traced */ return; } - - bool dst_de = decheck(s, dst); - uint32_t dd = dstHH->depth, sd = srcHH->depth; + uint32_t sd = srcHH->depth; /* Depth comparisons make sense only when src && dst are on the same root-to-leaf path, * checking this maybe expensive, so we approximate here. * If both dst_de && src_de hold, they are on the same path diff --git a/runtime/gc/hierarchical-heap-collection.c b/runtime/gc/hierarchical-heap-collection.c index f65996457..fe01c5653 100644 --- a/runtime/gc/hierarchical-heap-collection.c +++ b/runtime/gc/hierarchical-heap-collection.c @@ -1375,8 +1375,8 @@ void tryUnpinOrKeepPinned(GC_state s, HM_remembered remElem, void *rawArgs) args->toSpace[opDepth] = HM_HH_new(s, opDepth); } -#if ASSERT HM_chunk chunk = HM_getChunkOf(objptrToPointer(op, NULL)); +#if ASSERT assert(opDepth <= args->maxDepth); HM_HierarchicalHeap hh = HM_getLevelHead(chunk); assert(args->fromSpace[opDepth] == hh); @@ -1439,31 +1439,31 @@ void tryUnpinOrKeepPinned(GC_state s, HM_remembered remElem, void *rawArgs) * entry into the toSpace. */ HM_remember(HM_HH_getRemSet(args->toSpace[opDepth]), remElem, false); - // if ((fromDepth <= args->maxDepth) && (fromDepth >= args->minDepth)) { - // HM_chunk chunk = HM_getChunkOf(objptrToPointer(op, NULL)); - // uint32_t opDepth = HM_HH_getDepth(HM_getLevelHead(chunk)); - // /* if this is a down-ptr completely inside the scope, - // * no need to in-place things reachable from it - // */ - // if (!chunk->pinnedDuringCollection) - // { - // chunk->pinnedDuringCollection = TRUE; - // HM_unlinkChunkPreserveLevelHead( - // HM_HH_getChunkList(args->fromSpace[opDepth]), - // chunk); - // HM_appendChunk(&(args->pinned[opDepth]), chunk); - // } - // } - // else { + uint32_t fromDepth = HM_getObjptrDepth(remElem->from); + if ((fromDepth <= args->maxDepth) && (fromDepth >= args->minDepth)) { + HM_chunk chunk = HM_getChunkOf(objptrToPointer(op, NULL)); + uint32_t opDepth = HM_HH_getDepth(HM_getLevelHead(chunk)); + /* if this is a down-ptr completely inside the scope, + * no need to in-place things reachable from it + */ + if (!chunk->pinnedDuringCollection) + { + chunk->pinnedDuringCollection = TRUE; + HM_unlinkChunkPreserveLevelHead( + HM_HH_getChunkList(args->fromSpace[opDepth]), + chunk); + HM_appendChunk(&(args->pinned[opDepth]), chunk); + } + } + else { LGC_markAndScan(s, &op, op, rawArgs); - // } + } // LGC_markAndScan(s, &(remElem->from), rawArgs); // if (chunk->pinnedDuringCollection) { // return; // } - // if (chunk->levelHead != HM_HH_getUFNode(args->fromSpace[opDepth])) { // chunk->levelHead = HM_HH_getUFNode(args->fromSpace[opDepth]); // } @@ -1472,7 +1472,7 @@ void tryUnpinOrKeepPinned(GC_state s, HM_remembered remElem, void *rawArgs) // HM_HH_getChunkList(args->fromSpace[opDepth]), // chunk); // HM_appendChunk(&(args->pinned[opDepth]), chunk); - + // chunk->pinnedDuringCollection = TRUE; // assert(HM_getLevelHead(chunk) == args->fromSpace[opDepth]); // /** stronger version of previous assertion, needed for safe freeing of // * hh dependants after LGC completes From 1b039c5b6bbf8d1453d073f08baddc7300edbee2 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Thu, 26 May 2022 10:57:23 -0400 Subject: [PATCH 040/114] exact lgc for de --- runtime/gc/hierarchical-heap-collection.c | 401 +++++++++++++++------- runtime/gc/hierarchical-heap-collection.h | 30 +- 2 files changed, 292 insertions(+), 139 deletions(-) diff --git a/runtime/gc/hierarchical-heap-collection.c b/runtime/gc/hierarchical-heap-collection.c index fe01c5653..38a0601a1 100644 --- a/runtime/gc/hierarchical-heap-collection.c +++ b/runtime/gc/hierarchical-heap-collection.c @@ -206,26 +206,27 @@ void HM_HHC_collectLocal(uint32_t desiredScope) uint32_t potentialLocalScope = UNPACK_IDX(topval); uint32_t originalLocalScope = pollCurrentLocalScope(s); - if (thread->currentDepth != originalLocalScope) { + if (thread->currentDepth != originalLocalScope) + { LOG(LM_HH_COLLECTION, LL_DEBUG, - "Skipping collection:\n" - " currentDepth %u\n" - " originalLocalScope %u\n" - " potentialLocalScope %u\n", - thread->currentDepth, - originalLocalScope, - potentialLocalScope); + "Skipping collection:\n" + " currentDepth %u\n" + " originalLocalScope %u\n" + " potentialLocalScope %u\n", + thread->currentDepth, + originalLocalScope, + potentialLocalScope); return; } /** Compute the min depth for local collection. We claim as many levels - * as we can without interfering with CC, but only so far as desired. - * - * Note that we could permit local collection at the same level as a - * registered (but not yet stolen) CC, as long as we update the rootsets - * stored for the CC. But this is tricky. Much simpler to just avoid CC'ed - * levels entirely. - */ + * as we can without interfering with CC, but only so far as desired. + * + * Note that we could permit local collection at the same level as a + * registered (but not yet stolen) CC, as long as we update the rootsets + * stored for the CC. But this is tricky. Much simpler to just avoid CC'ed + * levels entirely. + */ uint32_t minNoCC = minDepthWithoutCC(thread); uint32_t minOkay = desiredScope; minOkay = max(minOkay, thread->minLocalCollectionDepth); @@ -288,13 +289,16 @@ void HM_HHC_collectLocal(uint32_t desiredScope) .toDepth = HM_HH_INVALID_DEPTH, .fromSpace = NULL, .toSpace = NULL, + .toSpaceStart = NULL, + .toSpaceStartChunk = NULL, .pinned = NULL, .containingObject = BOGUS_OBJPTR, .bytesCopied = 0, .objectsCopied = 0, .stacksCopied = 0, .bytesMoved = 0, - .objectsMoved = 0}; + .objectsMoved = 0, + .concurrent = false}; CC_workList_init(s, &(forwardHHObjptrArgs.worklist)); struct GC_foreachObjptrClosure forwardHHObjptrClosure = {.fun = forwardHHObjptr, .env = &forwardHHObjptrArgs}; @@ -329,8 +333,16 @@ void HM_HHC_collectLocal(uint32_t desiredScope) HM_HierarchicalHeap toSpace[maxDepth + 1]; forwardHHObjptrArgs.toSpace = &(toSpace[0]); + pointer toSpaceStart[maxDepth + 1]; + forwardHHObjptrArgs.toSpaceStart = &(toSpaceStart[0]); + HM_chunk toSpaceStartChunk[maxDepth + 1]; + forwardHHObjptrArgs.toSpaceStartChunk = &(toSpaceStartChunk[0]); for (uint32_t i = 0; i <= maxDepth; i++) + { toSpace[i] = NULL; + toSpaceStart[i] = NULL; + toSpaceStartChunk[i] = NULL; + } HM_HierarchicalHeap fromSpace[maxDepth + 1]; forwardHHObjptrArgs.fromSpace = &(fromSpace[0]); @@ -421,6 +433,7 @@ void HM_HHC_collectLocal(uint32_t desiredScope) Trace0(EVENT_PROMOTION_ENTER); timespec_now(&startTime); + forwardHHObjptrArgs.concurrent = true; /* For each remembered entry, if possible, unpin and discard the entry. * otherwise, copy the remembered entry to the toSpace remembered set. */ for (HM_HierarchicalHeap cursor = hh; @@ -433,8 +446,23 @@ void HM_HHC_collectLocal(uint32_t desiredScope) {.fun = tryUnpinOrKeepPinned, .env = (void *)&forwardHHObjptrArgs}; HM_foreachRemembered(s, HM_HH_getRemSet(cursor), &closure); } + forwardHHObjptrArgs.concurrent = false; forwardHHObjptrArgs.toDepth = HM_HH_INVALID_DEPTH; + for (uint32_t i = 0; i <= maxDepth; i++) + { + if (toSpace[i] != NULL) + { + HM_chunkList toSpaceList = HM_HH_getChunkList(toSpace[i]); + if (toSpaceList->firstChunk != NULL) + { + toSpaceStart[i] = HM_getChunkFrontier(toSpaceList->lastChunk); + toSpaceStartChunk[i] = toSpaceList->lastChunk; + assert(HM_getChunkOf(toSpaceStart[i]) == toSpaceList->lastChunk); + } + } + } + // assertInvariants(thread); #if ASSERT @@ -569,10 +597,12 @@ void HM_HHC_collectLocal(uint32_t desiredScope) if (NULL != HM_HH_getChunkList(toSpaceLevel)->firstChunk) { HM_chunkList toSpaceList = HM_HH_getChunkList(toSpaceLevel); + pointer start = toSpaceStart[depth] != NULL ? toSpaceStart[depth] : HM_getChunkStart(toSpaceList->firstChunk); + HM_chunk startChunk = toSpaceStartChunk[depth] != NULL ? toSpaceStartChunk[depth] : toSpaceList->firstChunk; HM_forwardHHObjptrsInChunkList( s, - toSpaceList->firstChunk, - HM_getChunkStart(toSpaceList->firstChunk), + startChunk, + start, // &skipStackAndThreadObjptrPredicate, // &ssatoPredicateArgs, &trueObjptrPredicate, @@ -668,13 +698,14 @@ void HM_HHC_collectLocal(uint32_t desiredScope) hhTail = nextAncestor; } - /* after everything has been scavenged, we have to move the pinned chunks */ - depth = thread->currentDepth+1; - while (depth > forwardHHObjptrArgs.minDepth) { + depth = thread->currentDepth + 1; + while (depth > forwardHHObjptrArgs.minDepth) + { depth--; HM_HierarchicalHeap fromSpaceLevel = fromSpace[depth]; - if (NULL == fromSpaceLevel) { + if (NULL == fromSpaceLevel) + { /* check that there are also no pinned chunks at this level * (if there was pinned chunk, then there must also have been a * fromSpace HH at this depth which originally stored the chunk) @@ -684,15 +715,15 @@ void HM_HHC_collectLocal(uint32_t desiredScope) continue; } - HM_HierarchicalHeap toSpaceLevel = toSpace[depth]; - if (toSpaceLevel != NULL) { - struct HM_foreachDownptrClosure closure = - {.fun = unmarkWrapper, .env = (void *)&forwardHHObjptrArgs}; - HM_foreachRemembered(s, HM_HH_getRemSet(toSpaceLevel), &closure); - /*go through the public of fromSpaceLevel, they will be joined later anyway*/ - assert((HM_HH_getRemSet(fromSpaceLevel)->private).firstChunk == NULL); - HM_foreachRemembered(s, HM_HH_getRemSet(fromSpaceLevel), &closure); - } + // HM_HierarchicalHeap toSpaceLevel = toSpace[depth]; + // if (toSpaceLevel != NULL) { + // struct HM_foreachDownptrClosure closure = + // {.fun = unmarkWrapper, .env = (void *)&forwardHHObjptrArgs}; + // HM_foreachRemembered(s, HM_HH_getRemSet(toSpaceLevel), &closure); + // /*go through the public of fromSpaceLevel, they will be joined later anyway*/ + // assert((HM_HH_getRemSet(fromSpaceLevel)->private).firstChunk == NULL); + // HM_foreachRemembered(s, HM_HH_getRemSet(fromSpaceLevel), &closure); + // } /* unset the flags on pinned chunks and update their HH pointer */ for (HM_chunk chunkCursor = pinned[depth].firstChunk; @@ -708,7 +739,6 @@ void HM_HHC_collectLocal(uint32_t desiredScope) HM_appendChunkList(HM_HH_getChunkList(fromSpaceLevel), &(pinned[depth])); } - /* Build the toSpace hh */ HM_HierarchicalHeap hhToSpace = NULL; for (uint32_t i = 0; i <= maxDepth; i++) @@ -721,14 +751,16 @@ void HM_HHC_collectLocal(uint32_t desiredScope) } /* merge in toSpace */ - if (NULL == hh && NULL == hhToSpace) { + if (NULL == hh && NULL == hhToSpace) + { /** SAM_NOTE: If we collected everything, I suppose this is possible. - * But shouldn't the stack and thread at least be in the root-to-leaf - * path? Should look into this... - */ + * But shouldn't the stack and thread at least be in the root-to-leaf + * path? Should look into this... + */ hh = HM_HH_new(s, thread->currentDepth); } - else { + else + { hh = HM_HH_zip(s, hh, hhToSpace); } @@ -810,7 +842,8 @@ void HM_HHC_collectLocal(uint32_t desiredScope) NULL != cursor; cursor = cursor->nextAncestor) { - if (NULL != fromSpace[HM_HH_getDepth(cursor)]) { + if (NULL != fromSpace[HM_HH_getDepth(cursor)]) + { assert(fromSpace[HM_HH_getDepth(cursor)] == cursor); } } @@ -943,15 +976,27 @@ objptr relocateObject( GC_state s, objptr op, HM_HierarchicalHeap tgtHeap, - struct ForwardHHObjptrArgs *args) + struct ForwardHHObjptrArgs *args, + bool *relocSuccess) { + *relocSuccess = true; pointer p = objptrToPointer(op, NULL); - assert(!hasFwdPtr(p)); assert(HM_HH_isLevelHead(tgtHeap)); HM_chunkList tgtChunkList = HM_HH_getChunkList(tgtHeap); + GC_header header = getHeader(p); + + if (pinType(header) != PIN_NONE) + { + // object is pinned, so can't relocate + // this case must happen from a down pointer or as a down pointer. + *relocSuccess = false; + assert(args->concurrent); + return op; + } + size_t metaDataBytes; size_t objectBytes; size_t copyBytes; @@ -987,8 +1032,20 @@ objptr relocateObject( tgtHeap); /* Store the forwarding pointer in the old object metadata. */ - *(getFwdPtrp(p)) = pointerToObjptr(copyPointer + metaDataBytes, - NULL); + objptr newPointer = pointerToObjptr(copyPointer + metaDataBytes, NULL); + if (!args->concurrent) + { + *(getFwdPtrp(p)) = newPointer; + } + else + { + if (!__sync_bool_compare_and_swap(getFwdPtrp(p), header, newPointer)) + { + assert(isPinned(op)); + *relocSuccess = false; + return op; + } + } assert(hasFwdPtr(p)); args->bytesCopied += copyBytes; @@ -1107,10 +1164,10 @@ void unfreezeDisentangledDepthAfter( /* ========================================================================= */ void copySuspect( - GC_state s, - __attribute__((unused)) objptr *opp, - objptr op, - void *rawArghh) + GC_state s, + __attribute__((unused)) objptr *opp, + objptr op, + void *rawArghh) { struct ForwardHHObjptrArgs *args = (struct ForwardHHObjptrArgs *)rawArghh; assert(isObjptr(op)); @@ -1135,11 +1192,16 @@ void copySuspect( HM_storeInchunkList(HM_HH_getSuspects(args->toSpace[opDepth]), &new_ptr, sizeof(objptr)); } -void markAndAdd ( - GC_state s, - __attribute__((unused)) objptr *opp, - objptr op, - void* rawArgs) +bool headerForwarded(GC_header h) +{ + return (!(GC_VALID_HEADER_MASK & h)); +} + +void markAndAdd( + GC_state s, + objptr *opp, + objptr op, + void *rawArgs) { struct ForwardHHObjptrArgs *args = (struct ForwardHHObjptrArgs *)rawArgs; pointer p = objptrToPointer(op, NULL); @@ -1149,54 +1211,111 @@ void markAndAdd ( { return; } - else if (args->fromSpace[opDepth] != HM_getLevelHead(chunk)) { + else if (args->fromSpace[opDepth] != HM_getLevelHead(chunk)) + { return; } - else if (!CC_isPointerMarked(p)) - { - assert(args->fromSpace[opDepth] == HM_getLevelHead(chunk)); - markObj(p); - CC_workList_push(s, &(args->worklist), op); - if (!chunk->pinnedDuringCollection) - { - chunk->pinnedDuringCollection = TRUE; - if (chunk->levelHead != HM_HH_getUFNode(args->fromSpace[opDepth])) { - chunk->levelHead = HM_HH_getUFNode(args->fromSpace[opDepth]); - } - HM_unlinkChunkPreserveLevelHead( - HM_HH_getChunkList(args->fromSpace[opDepth]), - chunk); - HM_appendChunk(&(args->pinned[opDepth]), chunk); - } + if (hasFwdPtr(p)) + { + *opp = getFwdPtr(p); + assert(CC_isPointerMarked(objptrToPointer(getFwdPtr(p), NULL))); + } + else if (isPinned(op)) + { + return; } else { - assert(args->fromSpace[opDepth] == HM_getLevelHead(chunk)); - assert(chunk->pinnedDuringCollection); + assert(!hasFwdPtr(p)); + assert(args->concurrent); + HM_HierarchicalHeap tgtHeap = args->toSpace[opDepth]; + if (tgtHeap == NULL) + { + /* Level does not exist, so create it */ + tgtHeap = HM_HH_new(s, opDepth); + args->toSpace[opDepth] = tgtHeap; + } + assert(p == objptrToPointer(op, NULL)); + bool relocateSuccess; + objptr op_new = relocateObject(s, op, tgtHeap, args, &relocateSuccess); + if (relocateSuccess) + { + *opp = op_new; + // markObj(objptrToPointer(op_new, NULL)); + CC_workList_push(s, &(args->worklist), op_new); + } + else + { + // markObj(p); + CC_workList_push(s, &(args->worklist), op); + if (!chunk->pinnedDuringCollection) + { + chunk->pinnedDuringCollection = TRUE; + + if (chunk->levelHead != HM_HH_getUFNode(args->fromSpace[opDepth])) + { + chunk->levelHead = HM_HH_getUFNode(args->fromSpace[opDepth]); + } + HM_unlinkChunkPreserveLevelHead( + HM_HH_getChunkList(args->fromSpace[opDepth]), + chunk); + HM_appendChunk(&(args->pinned[opDepth]), chunk); + } + } } + // else if () + // { + // if (!CC_isPointerMarked(p)) + // { + // assert(args->fromSpace[opDepth] == HM_getLevelHead(chunk)); + // markObj(p); + + // } else { + // assert(args->fromSpace[opDepth] == HM_getLevelHead(chunk)); + // assert(chunk->pinnedDuringCollection); + // } + // } + // else if (!hasFwdPtr(p)) { + // assert(args->concurrent); + // HM_HierarchicalHeap tgtHeap = args->toSpace[opDepth]; + // if (tgtHeap == NULL) + // { + // /* Level does not exist, so create it */ + // tgtHeap = HM_HH_new(s, opDepth); + // args->toSpace[opDepth] = tgtHeap; + // } + // assert(p == objptrToPointer(op, NULL)); + + // /* use the forwarding pointer */ + // *opp = relocateObject(s, op, tgtHeap, args); + + // } + return; } -void unmarkAndAdd ( - GC_state s, - __attribute__((unused)) objptr *opp, - objptr op, - void* rawArgs) +void unmarkAndAdd( + GC_state s, + __attribute__((unused)) objptr *opp, + objptr op, + void *rawArgs) { struct ForwardHHObjptrArgs *args = (struct ForwardHHObjptrArgs *)rawArgs; pointer p = objptrToPointer(op, NULL); HM_chunk chunk = HM_getChunkOf(p); uint32_t opDepth = HM_HH_getDepth(HM_getLevelHead(chunk)); assert(!hasFwdPtr(p)); - if ((opDepth > args->maxDepth) || (opDepth < args->minDepth)) { + if ((opDepth > args->maxDepth) || (opDepth < args->minDepth)) + { return; } - else if (args->fromSpace[opDepth] != HM_getLevelHead(chunk)) { + else if (args->fromSpace[opDepth] != HM_getLevelHead(chunk) && !isObjptrInToSpace(op, args)) + { return; } - else if (CC_isPointerMarked(p)) { + else if (CC_isPointerMarked(p)) + { markObj(p); - assert (chunk->pinnedDuringCollection); CC_workList_push(s, &(args->worklist), op); } } @@ -1228,7 +1347,8 @@ void unmark( } } -void phaseLoop (GC_state s, void* rawArgs, GC_foreachObjptrClosure fClosure) { +void phaseLoop(GC_state s, void *rawArgs, GC_foreachObjptrClosure fClosure) +{ struct ForwardHHObjptrArgs *args = (struct ForwardHHObjptrArgs *)rawArgs; CC_workList worklist = &(args->worklist); @@ -1242,17 +1362,36 @@ void phaseLoop (GC_state s, void* rawArgs, GC_foreachObjptrClosure fClosure) { } void LGC_markAndScan( - GC_state s, - __attribute__((unused)) objptr *opp, - objptr op, - void *rawArgs) + GC_state s, + objptr *opp, + objptr op, + void *rawArgs) { + pointer p = objptrToPointer(op, NULL); + HM_chunk chunk = HM_getChunkOf(p); struct ForwardHHObjptrArgs *args = (struct ForwardHHObjptrArgs *)rawArgs; + uint32_t opDepth = HM_HH_getDepth(HM_getLevelHead(chunk)); + assert(args->fromSpace[opDepth] == HM_getLevelHead(chunk)); + CC_workList_push(s, &(args->worklist), op); + if (!chunk->pinnedDuringCollection) + { + chunk->pinnedDuringCollection = TRUE; + + if (chunk->levelHead != HM_HH_getUFNode(args->fromSpace[opDepth])) + { + chunk->levelHead = HM_HH_getUFNode(args->fromSpace[opDepth]); + } + HM_unlinkChunkPreserveLevelHead( + HM_HH_getChunkList(args->fromSpace[opDepth]), + chunk); + HM_appendChunk(&(args->pinned[opDepth]), chunk); + } struct GC_foreachObjptrClosure markClosure = - {.fun = markAndAdd, .env = (void*) args}; - markAndAdd (s, opp, op, rawArgs); + {.fun = markAndAdd, .env = (void *)args}; + // markAndAdd (s, opp, op, rawArgs); // CC_workList_push(s, &(args->worklist), op); phaseLoop(s, rawArgs, &markClosure); + // } } // void LGC_markAndScan( // GC_state s, @@ -1332,7 +1471,7 @@ void unmarkWrapper(GC_state s, HM_remembered remElem, void *rawArgs) { struct ForwardHHObjptrArgs *args = (struct ForwardHHObjptrArgs *)rawArgs; struct GC_foreachObjptrClosure unmarkClosure = - {.fun = unmarkAndAdd, .env = args}; + {.fun = unmarkAndAdd, .env = args}; unmarkAndAdd(s, &(remElem->object), remElem->object, rawArgs); CC_workList_push(s, &(args->worklist), remElem->object); @@ -1345,11 +1484,11 @@ void tryUnpinOrKeepPinned(GC_state s, HM_remembered remElem, void *rawArgs) struct ForwardHHObjptrArgs *args = (struct ForwardHHObjptrArgs *)rawArgs; objptr op = remElem->object; -// #if ASSERT -// HM_chunk fromChunk = HM_getChunkOf(objptrToPointer(remElem->from, NULL)); -// HM_HierarchicalHeap fromHH = HM_getLevelHead(fromChunk); -// assert(HM_HH_getDepth(fromHH) <= args->toDepth); -// #endif + // #if ASSERT + // HM_chunk fromChunk = HM_getChunkOf(objptrToPointer(remElem->from, NULL)); + // HM_HierarchicalHeap fromHH = HM_getLevelHead(fromChunk); + // assert(HM_HH_getDepth(fromHH) <= args->toDepth); + // #endif if (!isPinned(op)) { @@ -1360,6 +1499,8 @@ void tryUnpinOrKeepPinned(GC_state s, HM_remembered remElem, void *rawArgs) "forgetting remset entry from " FMTOBJPTR " to " FMTOBJPTR, remElem->from, op); + return; + } else if ((isObjptrInToSpace(op, args))) { return; } @@ -1407,7 +1548,8 @@ void tryUnpinOrKeepPinned(GC_state s, HM_remembered remElem, void *rawArgs) bool unpin = tryUnpinWithDepth(op, opDepth); - if (unpin) { + if (unpin) + { return; } @@ -1417,15 +1559,16 @@ void tryUnpinOrKeepPinned(GC_state s, HM_remembered remElem, void *rawArgs) { uint32_t fromDepth = HM_getObjptrDepth(remElem->from); assert(fromDepth <= opDepth); - if (fromDepth > unpinDepth) { + if (fromDepth > unpinDepth) + { /** If this particular remembered entry came from deeper than some other - * down-pointer, then we don't need to keep it around. There will be some - * other remembered entry coming from the unpinDepth level. - * - * But note that it is very important that the condition is a strict - * inequality: we need to keep all remembered entries that came from the - * same shallowest level. (CC-chaining depends on this.) - */ + * down-pointer, then we don't need to keep it around. There will be some + * other remembered entry coming from the unpinDepth level. + * + * But note that it is very important that the condition is a strict + * inequality: we need to keep all remembered entries that came from the + * same shallowest level. (CC-chaining depends on this.) + */ LOG(LM_HH_PROMOTION, LL_INFO, "forgetting remset entry from " FMTOBJPTR " to " FMTOBJPTR, @@ -1456,21 +1599,23 @@ void tryUnpinOrKeepPinned(GC_state s, HM_remembered remElem, void *rawArgs) } } else { - LGC_markAndScan(s, &op, op, rawArgs); + LGC_markAndScan(s, &op, op, rawArgs); } // LGC_markAndScan(s, &(remElem->from), rawArgs); - // if (chunk->pinnedDuringCollection) { + // if (chunk->pinnedDuringCollection) + // { // return; // } - // if (chunk->levelHead != HM_HH_getUFNode(args->fromSpace[opDepth])) { + // if (chunk->levelHead != HM_HH_getUFNode(args->fromSpace[opDepth])) + // { // chunk->levelHead = HM_HH_getUFNode(args->fromSpace[opDepth]); // } // HM_unlinkChunkPreserveLevelHead( - // HM_HH_getChunkList(args->fromSpace[opDepth]), - // chunk); + // HM_HH_getChunkList(args->fromSpace[opDepth]), + // chunk); // HM_appendChunk(&(args->pinned[opDepth]), chunk); // chunk->pinnedDuringCollection = TRUE; // assert(HM_getLevelHead(chunk) == args->fromSpace[opDepth]); @@ -1499,7 +1644,8 @@ void forwardObjptrsOfRemembered(GC_state s, HM_remembered remElem, void *rawArgs &trueObjptrPredicateClosure, &closure, FALSE); - if (remElem->from != BOGUS_OBJPTR) { + if (remElem->from != BOGUS_OBJPTR) + { forwardHHObjptr(s, &(remElem->from), remElem->from, rawArgs); } } @@ -1507,13 +1653,13 @@ void forwardObjptrsOfRemembered(GC_state s, HM_remembered remElem, void *rawArgs /* ========================================================================= */ void forwardHHObjptr( - GC_state s, - objptr* opp, - objptr op, - void* rawArgs) + GC_state s, + objptr *opp, + objptr op, + void *rawArgs) { - struct ForwardHHObjptrArgs* args = ((struct ForwardHHObjptrArgs*)(rawArgs)); - pointer p = objptrToPointer (op, NULL); + struct ForwardHHObjptrArgs *args = ((struct ForwardHHObjptrArgs *)(rawArgs)); + pointer p = objptrToPointer(op, NULL); assert(args->toDepth == HM_HH_INVALID_DEPTH); @@ -1577,11 +1723,10 @@ void forwardHHObjptr( { assert(!hasFwdPtr(objptrToPointer(op, NULL))); assert(!isPinned(op)); - assert(!CC_isPointerMarked(objptrToPointer(op, NULL))); return; } else if (HM_getLevelHead(HM_getChunkOf(objptrToPointer(op, NULL))) != - args->fromSpace[HM_getObjptrDepth(op)]) + args->fromSpace[HM_getObjptrDepth(op)]) { // assert (!decheck(s, op)); return; @@ -1608,10 +1753,10 @@ void forwardHHObjptr( } /** REALLY SUBTLE. CC clears out remset entries, but can't safely perform - * unpinning. So, there could be objects that (for the purposes of LC) are - * semantically unpinned, but just haven't been marked as such yet. Here, - * we are lazily checking to see if this object should have been unpinned. - */ + * unpinning. So, there could be objects that (for the purposes of LC) are + * semantically unpinned, but just haven't been marked as such yet. Here, + * we are lazily checking to see if this object should have been unpinned. + */ if (isPinned(op) && unpinDepthOf(op) < opDepth) { // This is a truly pinned object @@ -1675,7 +1820,10 @@ void forwardHHObjptr( assert(p == objptrToPointer(op, NULL)); /* use the forwarding pointer */ - *opp = relocateObject(s, op, tgtHeap, args); + bool relocateSuccess; + assert(!args->concurrent); + *opp = relocateObject(s, op, tgtHeap, args, &relocateSuccess); + assert(relocateSuccess); } LOG(LM_HH_COLLECTION, LL_DEBUGMORE, @@ -1793,10 +1941,10 @@ GC_objectTypeTag computeObjectCopyParameters(GC_state s, pointer p, stack = (GC_stack)p; /* SAM_NOTE: - * I am disabling shrinking here because it assumes that - * the stack is going to be copied, which doesn't work with the - * "stacks-in-their-own-chunks" strategy. - */ + * I am disabling shrinking here because it assumes that + * the stack is going to be copied, which doesn't work with the + * "stacks-in-their-own-chunks" strategy. + */ #if 0 /* RAM_NOTE: This changes with non-local collection */ /* Check if the pointer is the current stack of my processor. */ @@ -1871,8 +2019,9 @@ void checkRememberedEntry( assert(HM_getLevelHead(theChunk) == hh); assert(!hasFwdPtr(objptrToPointer(object, NULL))); - if (remElem->from != BOGUS_OBJPTR) { - assert(!hasFwdPtr(objptrToPointer(remElem->from, NULL))); + if (remElem->from != BOGUS_OBJPTR) + { + assert(!hasFwdPtr(objptrToPointer(remElem->from, NULL))); HM_chunk fromChunk = HM_getChunkOf(objptrToPointer(remElem->from, NULL)); HM_HierarchicalHeap fromHH = HM_getLevelHead(fromChunk); diff --git a/runtime/gc/hierarchical-heap-collection.h b/runtime/gc/hierarchical-heap-collection.h index 02aacf002..9377789b4 100644 --- a/runtime/gc/hierarchical-heap-collection.h +++ b/runtime/gc/hierarchical-heap-collection.h @@ -14,23 +14,25 @@ * Definition of the HierarchicalHeap collection interface */ - #ifndef HIERARCHICAL_HEAP_COLLECTION_H_ #define HIERARCHICAL_HEAP_COLLECTION_H_ #include "chunk.h" #include "cc-work-list.h" -#if (defined (MLTON_GC_INTERNAL_TYPES)) -struct ForwardHHObjptrArgs { - struct HM_HierarchicalHeap* hh; +#if (defined(MLTON_GC_INTERNAL_TYPES)) +struct ForwardHHObjptrArgs +{ + struct HM_HierarchicalHeap *hh; uint32_t minDepth; uint32_t maxDepth; uint32_t toDepth; /* if == HM_HH_INVALID_DEPTH, preserve level of the forwarded object */ /* arrays of HH objects, e.g. HM_HH_getDepth(toSpace[i]) == i */ - HM_HierarchicalHeap* fromSpace; - HM_HierarchicalHeap* toSpace; + HM_HierarchicalHeap *fromSpace; + HM_HierarchicalHeap *toSpace; + pointer *toSpaceStart; + HM_chunk *toSpaceStartChunk; /* an array of pinned chunklists */ struct HM_chunkList *pinned; @@ -47,12 +49,14 @@ struct ForwardHHObjptrArgs { /*worklist for mark and scan*/ struct CC_workList worklist; + bool concurrent; }; -struct checkDEDepthsArgs { +struct checkDEDepthsArgs +{ int32_t minDisentangledDepth; - HM_HierarchicalHeap* fromSpace; - HM_HierarchicalHeap* toSpace; + HM_HierarchicalHeap *fromSpace; + HM_HierarchicalHeap *toSpace; uint32_t maxDepth; }; @@ -60,10 +64,10 @@ struct checkDEDepthsArgs { #endif /* MLTON_GC_INTERNAL_TYPES */ -#if (defined (MLTON_GC_INTERNAL_BASIS)) +#if (defined(MLTON_GC_INTERNAL_BASIS)) #endif /* MLTON_GC_INTERNAL_BASIS */ -#if (defined (MLTON_GC_INTERNAL_FUNCS)) +#if (defined(MLTON_GC_INTERNAL_FUNCS)) /** * This function performs a local collection on the current hierarchical heap */ @@ -77,12 +81,12 @@ void HM_HHC_collectLocal(uint32_t desiredScope); * @param opp The objptr to forward * @param args The struct ForwardHHObjptrArgs* for this call, cast as a void* */ -void forwardHHObjptr(GC_state s, objptr* opp, objptr op, void* rawArgs); +void forwardHHObjptr(GC_state s, objptr *opp, objptr op, void *rawArgs); /* check if `op` is in args->toSpace[depth(op)] */ bool isObjptrInToSpace(objptr op, struct ForwardHHObjptrArgs *args); -objptr relocateObject(GC_state s, objptr obj, HM_HierarchicalHeap tgtHeap, struct ForwardHHObjptrArgs *args); +objptr relocateObject(GC_state s, objptr obj, HM_HierarchicalHeap tgtHeap, struct ForwardHHObjptrArgs *args, bool *relocSuccess); pointer copyObject(pointer p, size_t objectSize, size_t copySize, HM_HierarchicalHeap tgtHeap); #endif /* MLTON_GC_INTERNAL_FUNCS */ From 395c41ddc945504e4d0c1a2c7811c2e831ba2954 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Fri, 27 May 2022 22:07:38 -0400 Subject: [PATCH 041/114] retire for down pointer reachable chunks --- runtime/gc.c | 2 + runtime/gc.h | 2 + runtime/gc/chunk.c | 2 +- runtime/gc/chunk.h | 2 +- runtime/gc/ebr.c | 167 ++++++++++++++++++++++ runtime/gc/ebr.h | 57 ++++++++ runtime/gc/entangled-ebr.c | 29 ++++ runtime/gc/entangled-ebr.h | 17 +++ runtime/gc/garbage-collection.c | 5 + runtime/gc/gc_state.h | 3 +- runtime/gc/hierarchical-heap-collection.c | 21 ++- runtime/gc/hierarchical-heap-ebr.c | 147 ++----------------- runtime/gc/hierarchical-heap-ebr.h | 28 ---- runtime/gc/hierarchical-heap.c | 1 - runtime/gc/init.c | 2 + 15 files changed, 315 insertions(+), 170 deletions(-) create mode 100644 runtime/gc/ebr.c create mode 100644 runtime/gc/ebr.h create mode 100644 runtime/gc/entangled-ebr.c create mode 100644 runtime/gc/entangled-ebr.h diff --git a/runtime/gc.c b/runtime/gc.c index 0bb9d44ba..df53c2eba 100644 --- a/runtime/gc.c +++ b/runtime/gc.c @@ -73,6 +73,8 @@ extern C_Pthread_Key_t gcstate_key; #include "gc/heap.c" #include "gc/hierarchical-heap.c" #include "gc/hierarchical-heap-collection.c" +#include "gc/ebr.c" +#include "gc/entangled-ebr.c" #include "gc/hierarchical-heap-ebr.c" #include "gc/init-world.c" #include "gc/init.c" diff --git a/runtime/gc.h b/runtime/gc.h index b433f63ac..1660440c5 100644 --- a/runtime/gc.h +++ b/runtime/gc.h @@ -83,7 +83,9 @@ typedef GC_state GCState_t; #include "gc/processor.h" #include "gc/pin.h" #include "gc/hierarchical-heap.h" +#include "gc/ebr.h" #include "gc/hierarchical-heap-ebr.h" +#include "gc/entangled-ebr.h" #include "gc/hierarchical-heap-collection.h" #include "gc/entanglement-suspects.h" #include "gc/local-scope.h" diff --git a/runtime/gc/chunk.c b/runtime/gc/chunk.c index 71d7912a0..3cf61731b 100644 --- a/runtime/gc/chunk.c +++ b/runtime/gc/chunk.c @@ -107,7 +107,7 @@ HM_chunk HM_initializeChunk(pointer start, pointer end) { chunk->mightContainMultipleObjects = TRUE; chunk->tmpHeap = NULL; chunk->decheckState = DECHECK_BOGUS_TID; - chunk->disentangledDepth = INT32_MAX; + chunk->retireChunk = FALSE; chunk->magic = CHUNK_MAGIC; #if ASSERT diff --git a/runtime/gc/chunk.h b/runtime/gc/chunk.h index ee3cfbf62..478041e16 100644 --- a/runtime/gc/chunk.h +++ b/runtime/gc/chunk.h @@ -69,7 +69,7 @@ struct HM_chunk { /** set during entanglement when in "safe" mode, to help temporarily disable * local GCs while the entanglement persists. */ - int32_t disentangledDepth; + bool retireChunk; bool mightContainMultipleObjects; void* tmpHeap; diff --git a/runtime/gc/ebr.c b/runtime/gc/ebr.c new file mode 100644 index 000000000..dfa9072fd --- /dev/null +++ b/runtime/gc/ebr.c @@ -0,0 +1,167 @@ +/* Copyright (C) 2021 Sam Westrick + * Copyright (C) 2022 Jatin Arora + * + * MLton is released under a HPND-style license. + * See the file MLton-LICENSE for details. + */ + +#if (defined(MLTON_GC_INTERNAL_FUNCS)) + +/** Helpers for packing/unpacking announcements. DEBRA packs epochs with a + * "quiescent" bit, the idea being that processors should set the bit during + * quiescent periods (between operations) and have it unset otherwise (i.e. + * during an operation). Being precise about quiescent periods in this way + * is helpful for reclamation, because in order to advance the epoch, all we + * need to know is that every processor has been in a quiescent period since + * the beginning of the last epoch. + * + * But note that updating the quiescent bits is only efficient if we can + * amortize the cost of the setting/unsetting the bit with other nearby + * operations. If we assumed that the typical state for each processor + * is quiescent and then paid for non-quiescent periods, this would + * be WAY too expensive. In our case, processors are USUALLY NON-QUIESCENT, + * due to depth queries at the write-barrier. + * + * So + */ +#define PACK(epoch, qbit) ((((size_t)(epoch)) << 1) | ((qbit)&1)) +#define UNPACK_EPOCH(announcement) ((announcement) >> 1) +#define UNPACK_QBIT(announcement) ((announcement)&1) +#define SET_Q_TRUE(announcement) ((announcement) | (size_t)1) +#define SET_Q_FALSE(announcement) ((announcement) & (~(size_t)1)) + +#define ANNOUNCEMENT_PADDING 16 + +static inline size_t getAnnouncement(EBR_shared ebr, uint32_t pid) +{ + return ebr->announce[ANNOUNCEMENT_PADDING * pid]; +} + +static inline void setAnnouncement(EBR_shared ebr, uint32_t pid, size_t ann) +{ + ebr->announce[ANNOUNCEMENT_PADDING * pid] = ann; +} + +void EBR_enterQuiescentState(GC_state s, EBR_shared ebr) +{ + uint32_t mypid = s->procNumber; + setAnnouncement(ebr, mypid, SET_Q_TRUE(getAnnouncement(ebr, mypid))); +} + +static void rotateAndReclaim(GC_state s, EBR_shared ebr) +{ + uint32_t mypid = s->procNumber; + + int limboIdx = (ebr->local[mypid].limboIdx + 1) % 3; + ebr->local[mypid].limboIdx = limboIdx; + HM_chunkList limboBag = &(ebr->local[mypid].limboBags[limboIdx]); + + // Free all HH records in the limbo bag. + for (HM_chunk chunk = HM_getChunkListFirstChunk(limboBag); + NULL != chunk; + chunk = chunk->nextChunk) + { + for (pointer p = HM_getChunkStart(chunk); + p < HM_getChunkFrontier(chunk); + p += sizeof(void *)) + { + ebr->freeFun(s, *(void **)p); + // HM_UnionFindNode hufp = *(HM_UnionFindNode *)p; + // assert(hufp->payload != NULL); + // freeFixedSize(getHHAllocator(s), hufp->payload); + // freeFixedSize(getUFAllocator(s), hufp); + } + } + + HM_freeChunksInList(s, limboBag); + HM_initChunkList(limboBag); // clear it out +} + +EBR_shared EBR_new(GC_state s, EBR_freeRetiredObj freeFun) +{ + EBR_shared ebr = malloc(sizeof(struct EBR_shared)); + + ebr->epoch = 0; + ebr->announce = + malloc(s->numberOfProcs * ANNOUNCEMENT_PADDING * sizeof(size_t)); + ebr->local = + malloc(s->numberOfProcs * sizeof(struct EBR_local)); + ebr->freeFun = freeFun; + + for (uint32_t i = 0; i < s->numberOfProcs; i++) + { + // Everyone starts by announcing epoch = 0 and is non-quiescent + setAnnouncement(ebr, i, PACK(0, 0)); + ebr->local[i].limboIdx = 0; + ebr->local[i].checkNext = 0; + for (int j = 0; j < 3; j++) + HM_initChunkList(&(ebr->local[i].limboBags[j])); + } + return ebr; +} + +void EBR_leaveQuiescentState(GC_state s, EBR_shared ebr) +{ + uint32_t mypid = s->procNumber; + uint32_t numProcs = s->numberOfProcs; + + size_t globalEpoch = ebr->epoch; + size_t myann = getAnnouncement(ebr, mypid); + size_t myEpoch = UNPACK_EPOCH(myann); + assert(globalEpoch >= myEpoch); + + if (myEpoch != globalEpoch) + { + ebr->local[mypid].checkNext = 0; + /** Advance into the current epoch. To do so, we need to clear the limbo + * bag of the epoch we're moving into. + */ + rotateAndReclaim(s, ebr); + } + // write a function which takes a number of reads of otherann as an argument + uint32_t otherpid = (ebr->local[mypid].checkNext) % numProcs; + size_t otherann = getAnnouncement(ebr, otherpid); + if (UNPACK_EPOCH(otherann) == globalEpoch || UNPACK_QBIT(otherann)) + { + uint32_t c = ++ebr->local[mypid].checkNext; + if (c >= numProcs) + { + __sync_val_compare_and_swap(&(ebr->epoch), globalEpoch, globalEpoch + 1); + } + } + + setAnnouncement(ebr, mypid, PACK(globalEpoch, 0)); +} + +void EBR_retire(GC_state s, EBR_shared ebr, void *ptr) +{ + uint32_t mypid = s->procNumber; + int limboIdx = ebr->local[mypid].limboIdx; + HM_chunkList limboBag = &(ebr->local[mypid].limboBags[limboIdx]); + HM_chunk chunk = HM_getChunkListLastChunk(limboBag); + + // fast path: bump frontier in chunk + + if (NULL != chunk && + HM_getChunkSizePastFrontier(chunk) >= sizeof(void *)) + { + pointer p = HM_getChunkFrontier(chunk); + *(void **) p = ptr; + HM_updateChunkFrontierInList(limboBag, chunk, p + sizeof(void *)); + return; + } + + // slow path: allocate new chunk + + chunk = HM_allocateChunk(limboBag, sizeof(void *)); + + assert(NULL != chunk && + HM_getChunkSizePastFrontier(chunk) >= sizeof(void *)); + + pointer p = HM_getChunkFrontier(chunk); + *(void **) p = ptr; + HM_updateChunkFrontierInList(limboBag, chunk, p + sizeof(void *)); + return; +} + +#endif // MLTON_GC_INTERNAL_FUNCS diff --git a/runtime/gc/ebr.h b/runtime/gc/ebr.h new file mode 100644 index 000000000..dbf2be156 --- /dev/null +++ b/runtime/gc/ebr.h @@ -0,0 +1,57 @@ +/* Copyright (C) 2021 Sam Westrick + * Copyright (C) 2022 Jatin Arora + * + * MLton is released under a HPND-style license. + * See the file MLton-LICENSE for details. + */ + +/** Epoch-based reclamation (EBR) of hierarchical heap records. + */ + +#ifndef EBR_H_ +#define EBR_H_ + +#if (defined(MLTON_GC_INTERNAL_TYPES)) + +struct EBR_local +{ + struct HM_chunkList limboBags[3]; + int limboIdx; + uint32_t checkNext; +} __attribute__((aligned(128))); + +typedef void (*EBR_freeRetiredObj) (GC_state s, void *ptr); + +// There is exactly one of these! Everyone shares a reference to it. +typedef struct EBR_shared +{ + size_t epoch; + + // announcement array, length = num procs + // each announcement is packed: 63 bits for epoch, 1 bit for quiescent bit + size_t *announce; + + // processor-local data, length = num procs + struct EBR_local *local; + + EBR_freeRetiredObj freeFun; +} * EBR_shared; + +#else + +struct EBR_local; +struct EBR_shared; +typedef struct EBR_shared *EBR_shared; + +#endif // MLTON_GC_INTERNAL_TYPES + +#if (defined(MLTON_GC_INTERNAL_FUNCS)) + +EBR_shared EBR_new(GC_state s, EBR_freeRetiredObj freeFun); +void EBR_enterQuiescentState(GC_state s, EBR_shared ebr); +void EBR_leaveQuiescentState(GC_state s, EBR_shared ebr); +void EBR_retire(GC_state s, EBR_shared ebr, void *ptr); + +#endif // MLTON_GC_INTERNAL_FUNCS + +#endif // EBR_H_ diff --git a/runtime/gc/entangled-ebr.c b/runtime/gc/entangled-ebr.c new file mode 100644 index 000000000..d4c539617 --- /dev/null +++ b/runtime/gc/entangled-ebr.c @@ -0,0 +1,29 @@ +/* Copyright (C) 2022 Jatin Arora + * + * MLton is released under a HPND-style license. + * See the file MLton-LICENSE for details. + */ + +#if (defined (MLTON_GC_INTERNAL_FUNCS)) + +void freeChunk (GC_state s, void *ptr) { + HM_freeChunk(s, (HM_chunk)ptr); +} + +void HM_EBR_init(GC_state s) { + s->hmEBR = EBR_new(s, &freeChunk); +} + +void HM_EBR_enterQuiescentState (GC_state s) { + EBR_enterQuiescentState(s, s->hmEBR); +} + +void HM_EBR_leaveQuiescentState(GC_state s) { + EBR_leaveQuiescentState(s, s->hmEBR); +} + +void HM_EBR_retire(GC_state s, HM_chunk chunk) { + EBR_retire(s, s->hmEBR, (void *)chunk); +} + +#endif // MLTON_GC_INTERNAL_FUNCS diff --git a/runtime/gc/entangled-ebr.h b/runtime/gc/entangled-ebr.h new file mode 100644 index 000000000..85cdf59fe --- /dev/null +++ b/runtime/gc/entangled-ebr.h @@ -0,0 +1,17 @@ +/** Epoch-based reclamation (EBR) of hierarchical heap records. + */ + +#ifndef ENTANGLED_EBR_H_ +#define ENTANGLED_EBR_H_ + +#if (defined(MLTON_GC_INTERNAL_FUNCS)) + + +void HM_EBR_init(GC_state s); +void HM_EBR_enterQuiescentState(GC_state s); +void HM_EBR_leaveQuiescentState(GC_state s); +void HM_EBR_retire(GC_state s, HM_chunk chunk); + +#endif // MLTON_GC_INTERNAL_FUNCS + +#endif //CHUNK_EBR_H_ diff --git a/runtime/gc/garbage-collection.c b/runtime/gc/garbage-collection.c index 5168cb3e7..e4b4b1b4d 100644 --- a/runtime/gc/garbage-collection.c +++ b/runtime/gc/garbage-collection.c @@ -95,8 +95,13 @@ void GC_collect (GC_state s, size_t bytesRequested, bool force) { getThreadCurrent(s)->exnStack = s->exnStack; HM_HH_updateValues(getThreadCurrent(s), s->frontier); beginAtomic(s); + // ebr for hh nodes HH_EBR_leaveQuiescentState(s); + // ebr for chunks + HM_EBR_leaveQuiescentState(s); + HM_EBR_enterQuiescentState(s); + // HM_HierarchicalHeap h = getThreadCurrent(s)->hierarchicalHeap; // while (h->nextAncestor != NULL) h = h->nextAncestor; // if (HM_HH_getDepth(h) == 0 && HM_getChunkListSize(HM_HH_getChunkList(h)) > 8192) { diff --git a/runtime/gc/gc_state.h b/runtime/gc/gc_state.h index 3dd95d2be..455a40099 100644 --- a/runtime/gc/gc_state.h +++ b/runtime/gc/gc_state.h @@ -51,7 +51,8 @@ struct GC_state { uint32_t globalsLength; struct FixedSizeAllocator hhAllocator; struct FixedSizeAllocator hhUnionFindAllocator; - struct HH_EBR_shared * hhEBR; + struct EBR_shared * hhEBR; + struct EBR_shared * hmEBR; struct GC_lastMajorStatistics *lastMajorStatistics; pointer limitPlusSlop; /* limit + GC_HEAP_LIMIT_SLOP */ int (*loadGlobals)(FILE *f); /* loads the globals from the file. */ diff --git a/runtime/gc/hierarchical-heap-collection.c b/runtime/gc/hierarchical-heap-collection.c index 38a0601a1..7fb82c651 100644 --- a/runtime/gc/hierarchical-heap-collection.c +++ b/runtime/gc/hierarchical-heap-collection.c @@ -447,6 +447,7 @@ void HM_HHC_collectLocal(uint32_t desiredScope) HM_foreachRemembered(s, HM_HH_getRemSet(cursor), &closure); } forwardHHObjptrArgs.concurrent = false; + CC_workList_free(s, &(forwardHHObjptrArgs.worklist)); forwardHHObjptrArgs.toDepth = HM_HH_INVALID_DEPTH; for (uint32_t i = 0; i <= maxDepth; i++) @@ -690,7 +691,21 @@ void HM_HHC_collectLocal(uint32_t desiredScope) info.depth = HM_HH_getDepth(hhTail); info.freedType = LGC_FREED_NORMAL_CHUNK; - HM_freeChunksInListWithInfo(s, level, &infoc); + // HM_freeChunksInListWithInfo(s, level, &infoc); + HM_chunk chunk = level->firstChunk; + while (chunk != NULL) { + HM_chunk next = chunk->nextChunk; + if (chunk->retireChunk) { + HM_EBR_retire(s, chunk); + chunk->retireChunk = false; + } + else + { + HM_freeChunk(s, chunk); + } + chunk = next; + } + HM_initChunkList(level); HM_HH_freeAllDependants(s, hhTail, TRUE); // freeFixedSize(getUFAllocator(s), HM_HH_getUFNode(hhTail)); // freeFixedSize(getHHAllocator(s), hhTail); @@ -698,6 +713,9 @@ void HM_HHC_collectLocal(uint32_t desiredScope) hhTail = nextAncestor; } + HM_EBR_leaveQuiescentState(s); + HM_EBR_enterQuiescentState(s); + /* after everything has been scavenged, we have to move the pinned chunks */ depth = thread->currentDepth + 1; while (depth > forwardHHObjptrArgs.minDepth) @@ -1241,6 +1259,7 @@ void markAndAdd( objptr op_new = relocateObject(s, op, tgtHeap, args, &relocateSuccess); if (relocateSuccess) { + chunk->retireChunk = true; *opp = op_new; // markObj(objptrToPointer(op_new, NULL)); CC_workList_push(s, &(args->worklist), op_new); diff --git a/runtime/gc/hierarchical-heap-ebr.c b/runtime/gc/hierarchical-heap-ebr.c index 68a914863..ff96120ba 100644 --- a/runtime/gc/hierarchical-heap-ebr.c +++ b/runtime/gc/hierarchical-heap-ebr.c @@ -1,4 +1,5 @@ /* Copyright (C) 2021 Sam Westrick + * Copyright (C) 2022 Jatin Arora * * MLton is released under a HPND-style license. * See the file MLton-LICENSE for details. @@ -6,156 +7,28 @@ #if (defined (MLTON_GC_INTERNAL_FUNCS)) -/** Helpers for packing/unpacking announcements. DEBRA packs epochs with a - * "quiescent" bit, the idea being that processors should set the bit during - * quiescent periods (between operations) and have it unset otherwise (i.e. - * during an operation). Being precise about quiescent periods in this way - * is helpful for reclamation, because in order to advance the epoch, all we - * need to know is that every processor has been in a quiescent period since - * the beginning of the last epoch. - * - * But note that updating the quiescent bits is only efficient if we can - * amortize the cost of the setting/unsetting the bit with other nearby - * operations. If we assumed that the typical state for each processor - * is quiescent and then paid for non-quiescent periods, this would - * be WAY too expensive. In our case, processors are USUALLY NON-QUIESCENT, - * due to depth queries at the write-barrier. - * - * So - */ -#define PACK(epoch, qbit) ((((size_t)(epoch)) << 1) | ((qbit) & 1)) -#define UNPACK_EPOCH(announcement) ((announcement) >> 1) -#define UNPACK_QBIT(announcement) ((announcement) & 1) -#define SET_Q_TRUE(announcement) ((announcement) | (size_t)1) -#define SET_Q_FALSE(announcement) ((announcement) & (~(size_t)1)) - -#define ANNOUNCEMENT_PADDING 16 - -static inline size_t getAnnouncement(GC_state s, uint32_t pid) { - return s->hhEBR->announce[ANNOUNCEMENT_PADDING*pid]; -} - -static inline void setAnnouncement(GC_state s, uint32_t pid, size_t ann) { - s->hhEBR->announce[ANNOUNCEMENT_PADDING*pid] = ann; -} - void HH_EBR_enterQuiescentState(GC_state s) { - uint32_t mypid = s->procNumber; - setAnnouncement(s, mypid, SET_Q_TRUE(getAnnouncement(s, mypid))); + EBR_enterQuiescentState(s, s->hhEBR); } -static void rotateAndReclaim(GC_state s) { - HH_EBR_shared ebr = s->hhEBR; - uint32_t mypid = s->procNumber; - - int limboIdx = (ebr->local[mypid].limboIdx + 1) % 3; - ebr->local[mypid].limboIdx = limboIdx; - HM_chunkList limboBag = &(ebr->local[mypid].limboBags[limboIdx]); - - // Free all HH records in the limbo bag. - for (HM_chunk chunk = HM_getChunkListFirstChunk(limboBag); - NULL != chunk; - chunk = chunk->nextChunk) - { - for (pointer p = HM_getChunkStart(chunk); - p < HM_getChunkFrontier(chunk); - p += sizeof(HM_UnionFindNode *)) - { - HM_UnionFindNode hufp = * (HM_UnionFindNode *) p; - assert (hufp->payload != NULL); - freeFixedSize(getHHAllocator(s), hufp->payload); - freeFixedSize(getUFAllocator(s), hufp); - } - } - - HM_freeChunksInList(s, limboBag); - HM_initChunkList(limboBag); // clear it out +void freeUnionFind (GC_state s, void *ptr) { + HM_UnionFindNode hufp = (HM_UnionFindNode)ptr; + assert(hufp->payload != NULL); + freeFixedSize(getHHAllocator(s), hufp->payload); + freeFixedSize(getUFAllocator(s), hufp); } - void HH_EBR_init(GC_state s) { - HH_EBR_shared ebr = malloc(sizeof(struct HH_EBR_shared)); - s->hhEBR = ebr; - - ebr->epoch = 0; - ebr->announce = - malloc(s->numberOfProcs * ANNOUNCEMENT_PADDING * sizeof(size_t)); - ebr->local = - malloc(s->numberOfProcs * sizeof(struct HH_EBR_local)); - - for (uint32_t i = 0; i < s->numberOfProcs; i++) { - // Everyone starts by announcing epoch = 0 and is non-quiescent - setAnnouncement(s, i, PACK(0,0)); - ebr->local[i].limboIdx = 0; - ebr->local[i].checkNext = 0; - for (int j = 0; j < 3; j++) - HM_initChunkList(&(ebr->local[i].limboBags[j])); - } + s->hhEBR = EBR_new(s, &freeUnionFind); } void HH_EBR_leaveQuiescentState(GC_state s) { - HH_EBR_shared ebr = s->hhEBR; - uint32_t mypid = s->procNumber; - uint32_t numProcs = s->numberOfProcs; - - size_t globalEpoch = ebr->epoch; - size_t myann = getAnnouncement(s, mypid); - size_t myEpoch = UNPACK_EPOCH(myann); - assert(globalEpoch >= myEpoch); - - if (myEpoch != globalEpoch) { - ebr->local[mypid].checkNext = 0; - /** Advance into the current epoch. To do so, we need to clear the limbo - * bag of the epoch we're moving into. - */ - rotateAndReclaim(s); - } - - uint32_t otherpid = (ebr->local[mypid].checkNext) % numProcs; - size_t otherann = getAnnouncement(s, otherpid); - if ( UNPACK_EPOCH(otherann) == globalEpoch || UNPACK_QBIT(otherann) ) { - uint32_t c = ++ebr->local[mypid].checkNext; - if (c >= numProcs) { - __sync_val_compare_and_swap(&(ebr->epoch), globalEpoch, globalEpoch+1); - } - } - - setAnnouncement(s, mypid, PACK(globalEpoch, 0)); + EBR_leaveQuiescentState(s, s->hhEBR); } - void HH_EBR_retire(GC_state s, HM_UnionFindNode hhuf) { - HH_EBR_shared ebr = s->hhEBR; - uint32_t mypid = s->procNumber; - int limboIdx = ebr->local[mypid].limboIdx; - HM_chunkList limboBag = &(ebr->local[mypid].limboBags[limboIdx]); - HM_chunk chunk = HM_getChunkListLastChunk(limboBag); - - // fast path: bump frontier in chunk - - if (NULL != chunk && - HM_getChunkSizePastFrontier(chunk) >= sizeof(HM_UnionFindNode *)) - { - pointer p = HM_getChunkFrontier(chunk); - *(HM_UnionFindNode *)p = hhuf; - HM_updateChunkFrontierInList(limboBag, chunk, p + sizeof(HM_UnionFindNode *)); - return; - } - - // slow path: allocate new chunk - - chunk = HM_allocateChunk(limboBag, sizeof(HM_UnionFindNode *)); - - assert(NULL != chunk && - HM_getChunkSizePastFrontier(chunk) >= sizeof(HM_UnionFindNode *)); - - pointer p = HM_getChunkFrontier(chunk); - *(HM_UnionFindNode *)p = hhuf; - HM_updateChunkFrontierInList(limboBag, chunk, p + sizeof(HM_UnionFindNode *)); - return; + EBR_retire(s, s->hhEBR, (void *)hhuf); } - - #endif // MLTON_GC_INTERNAL_FUNCS diff --git a/runtime/gc/hierarchical-heap-ebr.h b/runtime/gc/hierarchical-heap-ebr.h index 541528684..9e8792d45 100644 --- a/runtime/gc/hierarchical-heap-ebr.h +++ b/runtime/gc/hierarchical-heap-ebr.h @@ -10,34 +10,6 @@ #ifndef HIERARCHICAL_HEAP_EBR_H_ #define HIERARCHICAL_HEAP_EBR_H_ -#if (defined (MLTON_GC_INTERNAL_TYPES)) - -struct HH_EBR_local { - struct HM_chunkList limboBags[3]; - int limboIdx; - uint32_t checkNext; -} __attribute__((aligned(128))); - -// There is exactly one of these! Everyone shares a reference to it. -typedef struct HH_EBR_shared { - size_t epoch; - - // announcement array, length = num procs - // each announcement is packed: 63 bits for epoch, 1 bit for quiescent bit - size_t *announce; - - // processor-local data, length = num procs - struct HH_EBR_local *local; -} * HH_EBR_shared; - -#else - -struct HH_EBR_local; -struct HH_EBR_shared; -typedef struct HH_EBR_shared * HH_EBR_shared; - -#endif // MLTON_GC_INTERNAL_TYPES - #if (defined (MLTON_GC_INTERNAL_FUNCS)) void HH_EBR_init(GC_state s); diff --git a/runtime/gc/hierarchical-heap.c b/runtime/gc/hierarchical-heap.c index 0bb6d4b72..b1d5e8553 100644 --- a/runtime/gc/hierarchical-heap.c +++ b/runtime/gc/hierarchical-heap.c @@ -1348,7 +1348,6 @@ void assertInvariants(GC_thread thread) chunk = chunk->nextChunk) { assert(HM_getLevelHead(chunk) == cursor); - assert(chunk->disentangledDepth >= 1); } } diff --git a/runtime/gc/init.c b/runtime/gc/init.c index 5f0953dbe..470e99559 100644 --- a/runtime/gc/init.c +++ b/runtime/gc/init.c @@ -570,6 +570,7 @@ void GC_lateInit(GC_state s) { HM_configChunks(s); HH_EBR_init(s); + HM_EBR_init(s); initLocalBlockAllocator(s, initGlobalBlockAllocator(s)); @@ -608,6 +609,7 @@ void GC_duplicate (GC_state d, GC_state s) { initFixedSizeAllocator(getHHAllocator(d), sizeof(struct HM_HierarchicalHeap)); initFixedSizeAllocator(getUFAllocator(d), sizeof(struct HM_UnionFindNode)); d->hhEBR = s->hhEBR; + d->hmEBR = s->hmEBR; d->nextChunkAllocSize = s->nextChunkAllocSize; d->lastMajorStatistics = newLastMajorStatistics(); d->numberOfProcs = s->numberOfProcs; From b5a657a1feaea6f35e09e1ecd609523b78edd50e Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Fri, 27 May 2022 22:09:35 -0400 Subject: [PATCH 042/114] worklist free --- runtime/gc/cc-work-list.c | 8 ++++++++ runtime/gc/cc-work-list.h | 2 ++ runtime/gc/concurrent-collection.c | 1 + 3 files changed, 11 insertions(+) diff --git a/runtime/gc/cc-work-list.c b/runtime/gc/cc-work-list.c index 40eb40937..cec2d271b 100644 --- a/runtime/gc/cc-work-list.c +++ b/runtime/gc/cc-work-list.c @@ -13,7 +13,15 @@ void CC_workList_init( w->currentChunk = HM_allocateChunk(c, sizeof(struct CC_workList_elem)); } +void CC_workList_free( + __attribute__((unused)) GC_state s, + CC_workList w) +{ + HM_chunkList c = &(w->storage); + HM_freeChunksInListWithInfo(s, c, NULL); + w->currentChunk = NULL; +} bool CC_workList_isEmpty( __attribute__((unused)) GC_state s, CC_workList w) diff --git a/runtime/gc/cc-work-list.h b/runtime/gc/cc-work-list.h index d66b23c68..cbdae6590 100644 --- a/runtime/gc/cc-work-list.h +++ b/runtime/gc/cc-work-list.h @@ -50,6 +50,8 @@ void CC_workList_push(GC_state s, CC_workList w, objptr op); * Returns NULL if work list is empty */ objptr* CC_workList_pop(GC_state s, CC_workList w); +void CC_workList_free(GC_state s, CC_workList w); + #endif /* MLTON_GC_INTERNAL_FUNCS */ #endif /* CC_WORK_LIST_H */ diff --git a/runtime/gc/concurrent-collection.c b/runtime/gc/concurrent-collection.c index 037784e7c..c6f1f5921 100644 --- a/runtime/gc/concurrent-collection.c +++ b/runtime/gc/concurrent-collection.c @@ -1059,6 +1059,7 @@ size_t CC_collectWithRoots( HM_freeChunksInList(s, removedFromCCBag); assert(CC_workList_isEmpty(s, &(lists.worklist))); + CC_workList_free(s, &(lists.worklist)); #if ASSERT2 // just contains code that is sometimes useful for debugging. HM_assertChunkListInvariants(origList); From aed6577896b8b233839bbb5b353e3032c72f873d Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Wed, 1 Jun 2022 13:31:49 -0400 Subject: [PATCH 043/114] update rb wb sig --- include/c-chunk.h | 8 +- runtime/gc/assign.c | 311 ++++++++++++++++++++++++++++---------------- runtime/gc/assign.h | 2 +- 3 files changed, 200 insertions(+), 121 deletions(-) diff --git a/include/c-chunk.h b/include/c-chunk.h index 23a283529..aa92743a8 100644 --- a/include/c-chunk.h +++ b/include/c-chunk.h @@ -56,7 +56,7 @@ extern void Assignable_writeBarrier(CPointer, Objptr, Objptr*, Objptr); extern Objptr Assignable_readBarrier(CPointer, Objptr, Objptr*); -extern void Assignable_decheckObjptr(Objptr, Objptr); +extern Objptr Assignable_decheckObjptr(Objptr, Objptr); static inline Real64 ArrayR64_cas(Real64* a, Word64 i, Real64 x, Real64 y) { @@ -79,8 +79,7 @@ Real64 ArrayR64_cas(Real64* a, Word64 i, Real64 x, Real64 y) { static inline Objptr RefP_cas(Objptr* r, Objptr x, Objptr y) { Objptr result = __sync_val_compare_and_swap(r, x, y); - Assignable_decheckObjptr(*r, result); - return result; + return Assignable_decheckObjptr(r, result); } #define ArrayW8_cas(a, i, x, y) __sync_val_compare_and_swap(((Word8*)(a)) + (i), (x), (y)) @@ -97,8 +96,7 @@ Objptr RefP_cas(Objptr* r, Objptr x, Objptr y) { static inline Objptr ArrayP_cas(Objptr* a, Word64 i, Objptr x, Objptr y) { Objptr result = __sync_val_compare_and_swap(a + i, x, y); - Assignable_decheckObjptr(*a, result); - return result; + return Assignable_decheckObjptr(a, result); } static inline void GC_writeBarrier(CPointer s, Objptr obj, CPointer dst, Objptr src) { diff --git a/runtime/gc/assign.c b/runtime/gc/assign.c index a261c3f48..815512471 100644 --- a/runtime/gc/assign.c +++ b/runtime/gc/assign.c @@ -6,14 +6,38 @@ * MLton is released under a HPND-style license. * See the file MLton-LICENSE for details. */ -void Assignable_decheckObjptr(objptr dst, objptr src) +#ifdef DETECT_ENTANGLEMENT + +objptr Assignable_decheckObjptr(objptr dst, objptr src) { GC_state s = pthread_getspecific(gcstate_key); s->cumulativeStatistics->numDisentanglementChecks++; + objptr new_src = src; + HM_HierarchicalHeap dstHH = HM_getLevelHead(HM_getChunkOf(dst)); + if (!isObjptr(src) || HM_HH_getDepth(dstHH) == 0 || !ES_contains(NULL, dst)) + { + return src; + } + + HM_EBR_leaveQuiescentState(s); if (!decheck(s, src)) { - manage_entangled(s, src); + assert (isMutable(s, dst)); + // if (!ES_contains(NULL, dst)) { + // if (!decheck(s, dst)) + // { + // assert(false); + // } + // assert (isPinned(src)); + // assert (!hasFwdPtr(src)); + // assert (pinType(getHeader(src)) == PIN_ANY); + // } + objptr new_src = manage_entangled(s, src, getThreadCurrent(s)->decheckState); + assert (isPinned(new_src)); } + HM_EBR_enterQuiescentState(s); + assert (!hasFwdPtr(new_src)); + return new_src; } objptr Assignable_readBarrier( @@ -21,50 +45,46 @@ objptr Assignable_readBarrier( objptr obj, objptr *field) { +// can't rely on obj header becaues it may be forwarded. -#if ASSERT - assert(isObjptr(obj)); - // check that field is actually inside this object - pointer objp = objptrToPointer(obj, NULL); - GC_header header = getHeader(objp); - GC_objectTypeTag tag; - uint16_t bytesNonObjptrs; - uint16_t numObjptrs; - bool hasIdentity; - splitHeader(s, header, &tag, &hasIdentity, &bytesNonObjptrs, &numObjptrs); - pointer objend = objp; - if (!hasIdentity) { - DIE("read barrier: attempting to read immutable object "FMTOBJPTR, obj); - } - if (NORMAL_TAG == tag) { - objend += bytesNonObjptrs + (numObjptrs * OBJPTR_SIZE); - } - else if (SEQUENCE_TAG == tag) { - size_t dataBytes = getSequenceLength(objp) * (bytesNonObjptrs + (numObjptrs * OBJPTR_SIZE)); - objend += alignWithExtra (s, dataBytes, GC_SEQUENCE_METADATA_SIZE); - } - else { - DIE("read barrier: cannot handle tag %u", tag); - } - pointer fieldp = (pointer)field; - ASSERTPRINT( - objp <= fieldp && fieldp + OBJPTR_SIZE <= objend, - "read barrier: objptr field %p outside object "FMTOBJPTR" of size %zu", - (void*)field, - obj, - (size_t)(objend - objp)); -#endif - assert(ES_contains(NULL, obj)); s->cumulativeStatistics->numDisentanglementChecks++; objptr ptr = *field; + HM_HierarchicalHeap objHH = HM_getLevelHead(HM_getChunkOf(obj)); + if (!isObjptr(ptr) || HM_HH_getDepth(objHH) == 0 || !ES_contains(NULL, obj)) + { + return ptr; + } + HM_EBR_leaveQuiescentState(s); if (!decheck(s, ptr)) { - manage_entangled(s, ptr); + // assert (isMutable(s, obj)); + // if (!ES_contains(NULL, obj)) + // { + // if (!decheck(s, obj)) { + // assert (false); + // } + // assert(isPinned(ptr)); + // assert(!hasFwdPtr(ptr)); + // assert(pinType(getHeader(ptr)) == PIN_ANY); + // } + ptr = manage_entangled(s, ptr, getThreadCurrent(s)->decheckState); + // assert (isPinned(ptr)); } + HM_EBR_enterQuiescentState(s); + assert (!hasFwdPtr(ptr)); return ptr; } +#else +objptr Assignable_decheckObjptr(objptr dst, objptr src) {return src;} +objptr Assignable_readBarrier( + GC_state s, + objptr obj, + objptr *field) + {return *field;} +#endif + void Assignable_writeBarrier( GC_state s, objptr dst, @@ -74,36 +94,39 @@ void Assignable_writeBarrier( assert(isObjptr(dst)); pointer dstp = objptrToPointer(dst, NULL); -#if ASSERT - // check that field is actually inside this object - GC_header header = getHeader(dstp); - GC_objectTypeTag tag; - uint16_t bytesNonObjptrs; - uint16_t numObjptrs; - bool hasIdentity; - splitHeader(s, header, &tag, &hasIdentity, &bytesNonObjptrs, &numObjptrs); - pointer objend = dstp; - if (!hasIdentity) { - DIE("write barrier: attempting to modify immutable object "FMTOBJPTR, dst); - } - if (NORMAL_TAG == tag) { - objend += bytesNonObjptrs + (numObjptrs * OBJPTR_SIZE); - } - else if (SEQUENCE_TAG == tag) { - size_t dataBytes = getSequenceLength(dstp) * (bytesNonObjptrs + (numObjptrs * OBJPTR_SIZE)); - objend += alignWithExtra (s, dataBytes, GC_SEQUENCE_METADATA_SIZE); - } - else { - DIE("write barrier: cannot handle tag %u", tag); - } - pointer fieldp = (pointer)field; - ASSERTPRINT( - dstp <= fieldp && fieldp + OBJPTR_SIZE <= objend, - "write barrier: objptr field %p outside object "FMTOBJPTR" of size %zu", - (void*)field, - dst, - (size_t)(objend - dstp)); -#endif + assert (!hasFwdPtr(dst)); + assert (!isObjptr(src) || !hasFwdPtr(src)); + +// #if ASSERT +// // check that field is actually inside this object +// GC_header header = getHeader(dstp); +// GC_objectTypeTag tag; +// uint16_t bytesNonObjptrs; +// uint16_t numObjptrs; +// bool hasIdentity; +// splitHeader(s, header, &tag, &hasIdentity, &bytesNonObjptrs, &numObjptrs); +// pointer objend = dstp; +// if (!hasIdentity) { +// DIE("write barrier: attempting to modify immutable object "FMTOBJPTR, dst); +// } +// if (NORMAL_TAG == tag) { +// objend += bytesNonObjptrs + (numObjptrs * OBJPTR_SIZE); +// } +// else if (SEQUENCE_TAG == tag) { +// size_t dataBytes = getSequenceLength(dstp) * (bytesNonObjptrs + (numObjptrs * OBJPTR_SIZE)); +// objend += alignWithExtra (s, dataBytes, GC_SEQUENCE_METADATA_SIZE); +// } +// else { +// DIE("write barrier: cannot handle tag %u", tag); +// } +// pointer fieldp = (pointer)field; +// ASSERTPRINT( +// dstp <= fieldp && fieldp + OBJPTR_SIZE <= objend, +// "write barrier: objptr field %p outside object "FMTOBJPTR" of size %zu", +// (void*)field, +// dst, +// (size_t)(objend - dstp)); +// #endif HM_HierarchicalHeap dstHH = HM_getLevelHead(HM_getChunkOf(dstp)); @@ -126,69 +149,127 @@ void Assignable_writeBarrier( } /* deque down-pointers are handled separately during collection. */ - if (dst == s->wsQueue) + if (dst == s->wsQueue) { return; + } + uint32_t dd = dstHH->depth; pointer srcp = objptrToPointer(src, NULL); bool src_de = (HM_getLevelHead(HM_getChunkOf(srcp)) == getThreadCurrent(s)->hierarchicalHeap) || decheck(s, src); if (src_de) { bool dst_de = (dd == 1) || decheck(s, dst); - HM_HierarchicalHeap srcHH = HM_getLevelHeadPathCompress(HM_getChunkOf(srcp)); - if (srcHH == dstHH) { - /* internal pointers are always traced */ - return; + if (dst_de) { + HM_HierarchicalHeap srcHH = HM_getLevelHeadPathCompress(HM_getChunkOf(srcp)); + if (srcHH == dstHH) { + /* internal pointers are always traced */ + return; + } + uint32_t sd = srcHH->depth; + + /* up pointer (snapshotted by the closure) + * or internal (within a chain) pointer to a snapshotted heap + */ + if(dd > sd || + ((HM_HH_getConcurrentPack(srcHH)->ccstate != CC_UNREG) && + dd == sd)) + { + return; + } + + uint32_t unpinDepth = dd; + bool success = pinObject(src, unpinDepth, PIN_DOWN); + + if (success || dd == unpinDepthOf(src)) + { + struct HM_remembered remElem_ = {.object = src, .from = dst}; + HM_remembered remElem = &remElem_; + + HM_HierarchicalHeap shh = HM_HH_getHeapAtDepth(s, getThreadCurrent(s), sd); + assert(NULL != shh); + assert(HM_HH_getConcurrentPack(shh)->ccstate == CC_UNREG); + + HM_HH_rememberAtLevel(shh, remElem, false); + LOG(LM_HH_PROMOTION, LL_INFO, + "remembered downptr %" PRIu32 "->%" PRIu32 " from " FMTOBJPTR " to " FMTOBJPTR, + dstHH->depth, srcHH->depth, + dst, src); + } + + if (dd > 0 && !ES_contains(NULL, dst)) { + /*if dst is not a suspect, it must be disentangled*/ + // if (!dst_de) { + // printf("problematix: %p \n", dst); + // DIE("done"); + // } + // assert (dst_de); + HM_HierarchicalHeap dhh = HM_HH_getHeapAtDepth(s, getThreadCurrent(s), dd); + ES_add(s, HM_HH_getSuspects(dhh), dst); + } } - uint32_t sd = srcHH->depth; + else { + if (!isPinned(dst)) { + #ifdef DETECT_ENTANGLEMENT + manage_entangled(s, dst, getThreadCurrent(s)->decheckState); + #endif + } + // assert (isPinned(dst)); + // assert (ES_contains(NULL, dst)); + manage_entangled (s, src, HM_getChunkOf(dstp)->decheckState); + } + + + // if (!dst_de) { + // assert (ES_contains(NULL, dst)); + // } + /* Depth comparisons make sense only when src && dst are on the same root-to-leaf path, * checking this maybe expensive, so we approximate here. * If both dst_de && src_de hold, they are on the same path * Otherwise, we assume they are on different paths. */ - bool snapshotted = dst_de && - ((dd > sd) || /* up pointer (snapshotted by the closure) */ - ((HM_HH_getConcurrentPack(srcHH)->ccstate != CC_UNREG) - && dd == sd) /* internal (within a chain) pointer to a snapshotted heap */ - ); - - if (snapshotted) { - return; - } - /* otherwise pin*/ - // bool primary_down_ptr = dst_de && dd < sd && (HM_HH_getConcurrentPack(dstHH)->ccstate == CC_UNREG); - uint32_t unpinDepth = dst_de ? dd : - (uint32_t)lcaHeapDepth(HM_getChunkOf(srcp)->decheckState, - HM_getChunkOf(dstp)->decheckState); - enum PinType pt = dst_de ? PIN_DOWN : PIN_ANY; - - bool success = pinObject(src, unpinDepth, pt); - if (success || (dst_de && dd == unpinDepthOf (src))) { - objptr fromObj = pt == PIN_DOWN ? dst : BOGUS_OBJPTR; - struct HM_remembered remElem_ = {.object = src, .from = fromObj}; - HM_remembered remElem = &remElem_; - - HM_HierarchicalHeap shh = HM_HH_getHeapAtDepth(s, getThreadCurrent(s), sd); - assert(NULL != shh); - assert(HM_HH_getConcurrentPack(shh)->ccstate == CC_UNREG); - - HM_HH_rememberAtLevel(shh, remElem, false); - LOG(LM_HH_PROMOTION, LL_INFO, - "remembered downptr %"PRIu32"->%"PRIu32" from "FMTOBJPTR" to "FMTOBJPTR, - dstHH->depth, srcHH->depth, - dst, src); - } - /*add dst to the suspect set*/ - if (dd > 0 && dst_de && !ES_contains(NULL, dst)) { - /*if dst is not a suspect, it must be disentangled*/ - // if (!dst_de) { - // printf("problematix: %p \n", dst); - // DIE("done"); - // } - // assert (dst_de); - HM_HierarchicalHeap dhh = HM_HH_getHeapAtDepth(s, getThreadCurrent(s), dd); - ES_add(s, HM_HH_getSuspects(dhh), dst); - } + + + // /* otherwise pin*/ + // // bool primary_down_ptr = dst_de && dd < sd && (HM_HH_getConcurrentPack(dstHH)->ccstate == CC_UNREG); + // = dst_de ? dd : + // (uint32_t)lcaHeapDepth(HM_getChunkOf(srcp)->decheckState, + // HM_getChunkOf(dstp)->decheckState); + // enum PinType pt = dst_de ? PIN_DOWN : PIN_ANY; + + // bool success = pinTemp(s, src, unpinDepth, pt); + // if (success || (dst_de && dd == unpinDepthOf (src))) { + // objptr fromObj = pt == PIN_DOWN ? dst : BOGUS_OBJPTR; + // struct HM_remembered remElem_ = {.object = src, .from = fromObj}; + // HM_remembered remElem = &remElem_; + + // HM_HierarchicalHeap shh = HM_HH_getHeapAtDepth(s, getThreadCurrent(s), sd); + // assert(NULL != shh); + // assert(HM_HH_getConcurrentPack(shh)->ccstate == CC_UNREG); + + // HM_HH_rememberAtLevel(shh, remElem, false); + // LOG(LM_HH_PROMOTION, LL_INFO, + // "remembered downptr %"PRIu32"->%"PRIu32" from "FMTOBJPTR" to "FMTOBJPTR, + // dstHH->depth, srcHH->depth, + // dst, src); + // } + + // /*add dst to the suspect set*/ + // if (dd > 0 && dst_de && !ES_contains(NULL, dst)) { + // /*if dst is not a suspect, it must be disentangled*/ + // // if (!dst_de) { + // // printf("problematix: %p \n", dst); + // // DIE("done"); + // // } + // // assert (dst_de); + // HM_HierarchicalHeap dhh = HM_HH_getHeapAtDepth(s, getThreadCurrent(s), dd); + // ES_add(s, HM_HH_getSuspects(dhh), dst); + // } + } else { + assert (isPinned(src)); + assert (!hasFwdPtr(src)); + assert (pinType(getHeader(src)) == PIN_ANY); } diff --git a/runtime/gc/assign.h b/runtime/gc/assign.h index 9409fa403..73fc5805a 100644 --- a/runtime/gc/assign.h +++ b/runtime/gc/assign.h @@ -22,7 +22,7 @@ PRIVATE objptr Assignable_readBarrier( GC_state s, objptr dst, objptr* field ); -PRIVATE void Assignable_decheckObjptr(objptr dst, objptr src); +PRIVATE objptr Assignable_decheckObjptr(objptr dst, objptr src); #endif /* MLTON_GC_INTERNAL_BASIS */ From 4cd9593861e094e3260b373950b389df00cc2626 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Wed, 1 Jun 2022 13:38:48 -0400 Subject: [PATCH 044/114] manage_entangled proper --- runtime/gc/decheck.c | 188 +++++++++++++++++++++++++++++++++++-------- runtime/gc/decheck.h | 8 +- 2 files changed, 161 insertions(+), 35 deletions(-) diff --git a/runtime/gc/decheck.c b/runtime/gc/decheck.c index 4dad0ec9e..14a74ad04 100644 --- a/runtime/gc/decheck.c +++ b/runtime/gc/decheck.c @@ -316,32 +316,105 @@ bool decheckIsOrdered(GC_thread thread, decheck_tid_t t1) { #endif #ifdef DETECT_ENTANGLEMENT -void manage_entangled(GC_state s, objptr ptr) ; -void mark_mutable_frontier( +#ifdef ASSERT +void traverseAndCheck( + GC_state s, + __attribute__((unused)) objptr *opp, + objptr op, + __attribute__((unused)) void *rawArgs) +{ + GC_header header = getHeader(objptrToPointer(op, NULL)); + assert (pinType(header) == PIN_ANY); + assert (!isFwdHeader(header)); + if (isMutableH(s, header)) { + assert (ES_contains(NULL, op)); + } + else { + struct GC_foreachObjptrClosure echeckClosure = + {.fun = traverseAndCheck, .env = NULL}; + foreachObjptrInObject(s, op, &trueObjptrPredicateClosure, &echeckClosure, FALSE); + } +} +#else +void traverseAndCheck( GC_state s, __attribute__((unused)) objptr *opp, objptr op, - __attribute__((unused)) void *rawArgs) + __attribute__((unused)) void *rawArgs) { - if (isMutable(s, op)) { - manage_entangled (s, op); - } - // else if (ES_contains(NULL, op)) { + (void)s; + (void)op; + return; +} +#endif + +void make_entangled( + GC_state s, + objptr *opp, + objptr ptr, + void *rawArgs) +{ + + struct ManageEntangledArgs* mea = (struct ManageEntangledArgs*) rawArgs; + + HM_chunk chunk = HM_getChunkOf(objptrToPointer(ptr, NULL)); + decheck_tid_t allocator = chunk->decheckState; + + // if (!decheckIsOrdered(mea->root, allocator)) { + // // while managing entanglement, we stay ordered wrt the root of the entanglement // return; // } - else { - struct GC_foreachObjptrClosure emanageClosure = - {.fun = mark_mutable_frontier, .env = NULL}; - foreachObjptrInObject(s, op, &trueObjptrPredicateClosure, &emanageClosure, FALSE); - // ES_mark (NULL, op); + + GC_header header = getRacyHeader(objptrToPointer(ptr, NULL)); + bool mutable = isMutableH(s, header); + bool headerChange = false, pinChange = false; + objptr new_ptr = ptr; + uint32_t unpinDepth = lcaHeapDepth(mea->reader, allocator); + + if (pinType(header) != PIN_ANY || unpinDepthOfH(header) > unpinDepth) + { + if (mutable) { + new_ptr = pinObjectInfo(ptr, unpinDepth, PIN_ANY, &headerChange, &pinChange); + } + else + { + struct GC_foreachObjptrClosure emanageClosure = + {.fun = make_entangled, .env = rawArgs}; + foreachObjptrInObject(s, ptr, &trueObjptrPredicateClosure, &emanageClosure, FALSE); + new_ptr = pinObjectInfo(ptr, unpinDepth, PIN_ANY, &headerChange, &pinChange); + } + if (pinChange) + { + struct HM_remembered remElem_ = {.object = new_ptr, .from = BOGUS_OBJPTR}; + HM_HH_rememberAtLevel(HM_getLevelHeadPathCompress(chunk), &(remElem_), true); + } + } + assert (!hasFwdPtr(new_ptr)); + + if (ptr != new_ptr) { + // Help LGC move along + assert(hasFwdPtr(ptr)); + *opp = new_ptr; + } + + if (mutable && !ES_contains(NULL, new_ptr)) { + HM_HierarchicalHeap lcaHeap = HM_HH_getHeapAtDepth(s, getThreadCurrent(s), unpinDepth); + ES_add(s, HM_HH_getSuspects(lcaHeap), new_ptr); + assert(ES_contains(NULL, new_ptr)); } + + assert (!mutable || ES_contains(NULL, new_ptr)); } -void manage_entangled(GC_state s, objptr ptr) { +objptr manage_entangled( + GC_state s, + objptr ptr, + decheck_tid_t reader) +{ - GC_thread thread = getThreadCurrent(s); - decheck_tid_t tid = thread->decheckState; + // GC_thread thread = getThreadCurrent(s); + // decheck_tid_t tid = thread->decheckState; HM_chunk chunk = HM_getChunkOf(objptrToPointer(ptr, NULL)); decheck_tid_t allocator = chunk->decheckState; @@ -351,36 +424,83 @@ void manage_entangled(GC_state s, objptr ptr) { printf("Allocator tree depth: %d\n", tree_depth(allocator)); printf("Allocator path: 0x%x\n", allocator.internal.path); printf("Allocator dag depth: %d\n", dag_depth(allocator)); - printf("Reader tree depth: %d\n", tree_depth(tid)); - printf("Reader path: 0x%x\n", tid.internal.path); - printf("Reader dag depth: %d\n", dag_depth(tid)); + printf("Reader tree depth: %d\n", tree_depth(allocator)); + printf("Reader path: 0x%x\n", allocator.internal.path); + printf("Reader dag depth: %d\n", dag_depth(allocator)); exit(-1); } + uint32_t unpinDepth = lcaHeapDepth(reader, allocator); + GC_header header = getHeader(objptrToPointer (ptr, NULL)); - uint32_t unpinDepth = lcaHeapDepth(tid, allocator); - bool headerChange, pinChange; - pinObjectInfo(ptr, unpinDepth, PIN_ANY, &headerChange, &pinChange); - if (pinChange) - { - struct HM_remembered remElem_ = {.object = ptr, .from = BOGUS_OBJPTR}; - HM_HH_rememberAtLevel(HM_getLevelHeadPathCompress(chunk), &(remElem_), true); - } - if (isMutable(s, objptrToPointer(ptr, NULL)) && !ES_contains(NULL, ptr)) - { - HM_HierarchicalHeap lcaHeap = HM_HH_getHeapAtDepth(s, thread, unpinDepth); - ES_add(s, HM_HH_getSuspects(lcaHeap), ptr); + bool manage = isFwdHeader(header) || + pinType (header) != PIN_ANY || + unpinDepthOfH(header) > unpinDepth; + + if (manage) { + struct ManageEntangledArgs mea = { + .reader = reader, + .root = allocator + }; + make_entangled(s, &ptr, ptr, (void*) &mea); } - else if (!isMutable(s, objptrToPointer(ptr, NULL)) && headerChange) { - mark_mutable_frontier (s, &ptr, ptr, NULL); + else { + if (isMutableH(s, header) && !ES_contains(NULL, ptr)) { + HM_HierarchicalHeap lcaHeap = HM_HH_getHeapAtDepth(s, getThreadCurrent(s), unpinDepth); + ES_add(s, HM_HH_getSuspects(lcaHeap), ptr); + assert(ES_contains(NULL, ptr)); + } + traverseAndCheck(s, &ptr, ptr, NULL); } + + + traverseAndCheck(s, &ptr, ptr, NULL); + return ptr; + // GC_header header = getRacyHeader(objptrToPointer(ptr, NULL)); + // bool mutable = isMutableH(s, header); + // bool headerChange = false, pinChange = false; + // objptr new_ptr = ptr; + // if (pinType(header) != PIN_ANY || unpinDepthOfH(header) > unpinDepth) + // { + // if (mutable) + // { + // new_ptr = pinObjectInfo(ptr, unpinDepth, PIN_ANY, &headerChange, &pinChange); + // if (!ES_contains(NULL, new_ptr)) { + // HM_HierarchicalHeap lcaHeap = HM_HH_getHeapAtDepth(s, thread, unpinDepth); + // ES_add(s, HM_HH_getSuspects(lcaHeap), new_ptr); + // } + // } + // else + // { + // struct GC_foreachObjptrClosure emanageClosure = + // {.fun = manage_entangled, .env = NULL}; + // foreachObjptrInObject(s, ptr, &trueObjptrPredicateClosure, &emanageClosure, FALSE); + // new_ptr = pinObjectInfo(ptr, unpinDepth, PIN_ANY, &headerChange, &pinChange); + // } + // if (pinChange) + // { + // struct HM_remembered remElem_ = {.object = new_ptr, .from = BOGUS_OBJPTR}; + // HM_HH_rememberAtLevel(HM_getLevelHeadPathCompress(chunk), &(remElem_), true); + // } + // } + // else + // { + // if (!mutable) + // { + // traverseAndCheck(s, &new_ptr, new_ptr, NULL); + // } + // } + + // traverseAndCheck(s, &new_ptr, new_ptr, NULL); + // return new_ptr; } #else -void manage_entangled(GC_state s, objptr ptr) { +objptr manage_entangled(GC_state s, objptr ptr, decheck_tid_t reader) { (void)s; (void)ptr; - return; + (void)reader; + return ptr; } #endif diff --git a/runtime/gc/decheck.h b/runtime/gc/decheck.h index b0d15ec2b..d283e1e2c 100644 --- a/runtime/gc/decheck.h +++ b/runtime/gc/decheck.h @@ -20,6 +20,12 @@ typedef union { uint64_t bits; } decheck_tid_t; +struct ManageEntangledArgs +{ + decheck_tid_t reader; + decheck_tid_t root; +}; + #define DECHECK_BOGUS_BITS ((uint64_t)0) #define DECHECK_BOGUS_TID ((decheck_tid_t){ .bits = DECHECK_BOGUS_BITS }) @@ -44,7 +50,7 @@ bool decheck(GC_state s, objptr ptr); bool decheckIsOrdered(GC_thread thread, decheck_tid_t t1); int lcaHeapDepth(decheck_tid_t t1, decheck_tid_t t2); void disentangleObject(GC_state s, objptr ptr); -void manage_entangled(GC_state s, objptr ptr); +objptr manage_entangled(GC_state s, objptr ptr, decheck_tid_t reader); #endif /* (defined (MLTON_GC_INTERNAL_FUNCS)) */ #endif /* _DECHECK_H_ */ From 36fb7a41ee9dc9478a4858253d9303008ccb7185 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Wed, 1 Jun 2022 13:39:16 -0400 Subject: [PATCH 045/114] forward ptr and foreach --- runtime/gc/foreach.c | 2 +- runtime/gc/forward.c | 6 +++++- runtime/gc/forward.h | 1 + 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/runtime/gc/foreach.c b/runtime/gc/foreach.c index 8c16ab249..da5812e41 100644 --- a/runtime/gc/foreach.c +++ b/runtime/gc/foreach.c @@ -125,7 +125,7 @@ pointer foreachObjptrInObject (GC_state s, pointer p, bool skip = !pred->fun(s, p, pred->env); - header = getHeader (p); + header = getRacyHeader (p); splitHeader(s, header, &tag, NULL, &bytesNonObjptrs, &numObjptrs); if (DEBUG_DETAILED) fprintf (stderr, diff --git a/runtime/gc/forward.c b/runtime/gc/forward.c index 664d94ec7..f94777f5a 100644 --- a/runtime/gc/forward.c +++ b/runtime/gc/forward.c @@ -25,10 +25,14 @@ objptr getFwdPtr (pointer p) { return *(getFwdPtrp(p)); } +bool isFwdHeader (GC_header h) { + return (not (GC_VALID_HEADER_MASK & h)); +} + /* hasFwdPtr (p) * * Returns true if the object pointed to by p has a valid forwarding pointer. */ bool hasFwdPtr (pointer p) { - return (not (GC_VALID_HEADER_MASK & getHeader(p))); + return isFwdHeader (getHeader(p)); } diff --git a/runtime/gc/forward.h b/runtime/gc/forward.h index 154381d81..364b49cef 100644 --- a/runtime/gc/forward.h +++ b/runtime/gc/forward.h @@ -15,5 +15,6 @@ static inline objptr* getFwdPtrp (pointer p); static inline objptr getFwdPtr (pointer p); static inline bool hasFwdPtr (pointer p); +static inline bool isFwdHeader (GC_header h); #endif /* (defined (MLTON_GC_INTERNAL_FUNCS)) */ From c584b0a4f5837317c2dadce9ed7799e385334c8a Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Wed, 1 Jun 2022 13:40:25 -0400 Subject: [PATCH 046/114] delete object --- runtime/gc/hierarchical-heap-collection.c | 64 +++++++++++++++-------- 1 file changed, 41 insertions(+), 23 deletions(-) diff --git a/runtime/gc/hierarchical-heap-collection.c b/runtime/gc/hierarchical-heap-collection.c index 7fb82c651..2a45213c9 100644 --- a/runtime/gc/hierarchical-heap-collection.c +++ b/runtime/gc/hierarchical-heap-collection.c @@ -85,6 +85,8 @@ pointer copyObject(pointer p, size_t copySize, HM_HierarchicalHeap tgtHeap); +void delLastObj(objptr op, size_t objectSize, HM_HierarchicalHeap tgtHeap); + /** * ObjptrPredicateFunction for skipping stacks and threads in the hierarchical * heap. @@ -459,7 +461,7 @@ void HM_HHC_collectLocal(uint32_t desiredScope) { toSpaceStart[i] = HM_getChunkFrontier(toSpaceList->lastChunk); toSpaceStartChunk[i] = toSpaceList->lastChunk; - assert(HM_getChunkOf(toSpaceStart[i]) == toSpaceList->lastChunk); + // assert(HM_getChunkOf(toSpaceStart[i]) == toSpaceList->lastChunk); } } } @@ -1059,11 +1061,13 @@ objptr relocateObject( { if (!__sync_bool_compare_and_swap(getFwdPtrp(p), header, newPointer)) { + delLastObj(newPointer, objectBytes, tgtHeap); assert(isPinned(op)); *relocSuccess = false; return op; } } + assert(hasFwdPtr(p)); args->bytesCopied += copyBytes; @@ -1237,10 +1241,13 @@ void markAndAdd( if (hasFwdPtr(p)) { *opp = getFwdPtr(p); - assert(CC_isPointerMarked(objptrToPointer(getFwdPtr(p), NULL))); } - else if (isPinned(op)) + else if (pinType(getHeader(p)) == PIN_DOWN) { + // it is okay to not trace PIN_DOWN header objects because the remembered set will have them + // and we will trace it from there. + // it is dangerous to skip PIN_ANY objects here because the remSet entry for them might be created + // concurrently to LGC and LGC may miss them. return; } else @@ -1599,27 +1606,28 @@ void tryUnpinOrKeepPinned(GC_state s, HM_remembered remElem, void *rawArgs) /* otherwise, object stays pinned, and we have to scavenge this remembered * entry into the toSpace. */ - HM_remember(HM_HH_getRemSet(args->toSpace[opDepth]), remElem, false); - uint32_t fromDepth = HM_getObjptrDepth(remElem->from); - if ((fromDepth <= args->maxDepth) && (fromDepth >= args->minDepth)) { - HM_chunk chunk = HM_getChunkOf(objptrToPointer(op, NULL)); - uint32_t opDepth = HM_HH_getDepth(HM_getLevelHead(chunk)); - /* if this is a down-ptr completely inside the scope, - * no need to in-place things reachable from it - */ - if (!chunk->pinnedDuringCollection) - { - chunk->pinnedDuringCollection = TRUE; - HM_unlinkChunkPreserveLevelHead( - HM_HH_getChunkList(args->fromSpace[opDepth]), - chunk); - HM_appendChunk(&(args->pinned[opDepth]), chunk); + if (remElem->from != BOGUS_OBJPTR) { + uint32_t fromDepth = HM_getObjptrDepth(remElem->from); + if ((fromDepth <= args->maxDepth) && (fromDepth >= args->minDepth)) { + HM_chunk chunk = HM_getChunkOf(objptrToPointer(op, NULL)); + uint32_t opDepth = HM_HH_getDepth(HM_getLevelHead(chunk)); + /* if this is a down-ptr completely inside the scope, + * no need to in-place things reachable from it + */ + if (!chunk->pinnedDuringCollection) + { + chunk->pinnedDuringCollection = TRUE; + HM_unlinkChunkPreserveLevelHead( + HM_HH_getChunkList(args->fromSpace[opDepth]), + chunk); + HM_appendChunk(&(args->pinned[opDepth]), chunk); + } + return; } } - else { - LGC_markAndScan(s, &op, op, rawArgs); - } + + LGC_markAndScan(s, &op, op, rawArgs); // LGC_markAndScan(s, &(remElem->from), rawArgs); @@ -1741,7 +1749,8 @@ void forwardHHObjptr( if (isObjptrInToSpace(op, args)) { assert(!hasFwdPtr(objptrToPointer(op, NULL))); - assert(!isPinned(op)); + // to space objects may be pinned now. + // assert(!isPinned(op)); return; } else if (HM_getLevelHead(HM_getChunkOf(objptrToPointer(op, NULL))) != @@ -1757,7 +1766,7 @@ void forwardHHObjptr( assert(!hasFwdPtr(objptrToPointer(fop, NULL))); assert(isObjptrInToSpace(fop, args)); assert(HM_getObjptrDepth(fop) == opDepth); - assert(!isPinned(fop)); + // assert(!isPinned(fop)); // assert(!CC_isPointerMarked(fop)); *opp = fop; return; @@ -1913,6 +1922,15 @@ pointer copyObject(pointer p, return frontier; } + +void delLastObj(objptr op, size_t objectSize, HM_HierarchicalHeap tgtHeap) +{ + HM_chunkList tgtChunkList = HM_HH_getChunkList(tgtHeap); + HM_chunk chunk = HM_getChunkOf(op); + assert(listContainsChunk(tgtChunkList, chunk)); + HM_updateChunkFrontierInList(tgtChunkList, chunk, HM_getChunkFrontier(chunk) - objectSize); +} + #endif /* MLTON_GC_INTERNAL_FUNCS */ GC_objectTypeTag computeObjectCopyParameters(GC_state s, pointer p, From 24aa306548daa3c98c0bf63bd7da19e91018bd6d Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Wed, 1 Jun 2022 13:40:44 -0400 Subject: [PATCH 047/114] some asserts --- runtime/gc/hierarchical-heap.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/runtime/gc/hierarchical-heap.c b/runtime/gc/hierarchical-heap.c index b1d5e8553..47db50893 100644 --- a/runtime/gc/hierarchical-heap.c +++ b/runtime/gc/hierarchical-heap.c @@ -455,8 +455,10 @@ void HM_HH_promoteChunks( assert(HM_HH_getDepth(hh) == currentDepth-1); } + assert(hh == thread->hierarchicalHeap); ES_clear(s, thread->hierarchicalHeap); + assert(hh == thread->hierarchicalHeap); #if ASSERT assert(hh == thread->hierarchicalHeap); @@ -1347,8 +1349,7 @@ void assertInvariants(GC_thread thread) NULL != chunk; chunk = chunk->nextChunk) { - assert(HM_getLevelHead(chunk) == cursor); - } + assert(HM_getLevelHead(chunk) == cursor); } } /* check sorted by depth */ From 8f23cfe60e9f4e2f212cca0565a4a7c7927aa579 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Wed, 1 Jun 2022 13:41:13 -0400 Subject: [PATCH 048/114] header ops --- runtime/gc/object.c | 17 +++++++++++++++-- runtime/gc/object.h | 4 +++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/runtime/gc/object.c b/runtime/gc/object.c index 2cd646c6a..c13d98f14 100644 --- a/runtime/gc/object.c +++ b/runtime/gc/object.c @@ -39,6 +39,15 @@ GC_header getHeader (pointer p) { return *(getHeaderp(p)); } +GC_header getRacyHeader (pointer ptr) { + GC_header header = getHeader(ptr); + while (isFwdHeader(header)) { + ptr = (pointer) header; + header = getHeader(ptr); + } + return header; +} + /* * Build the header for an object, given the index to its type info. */ @@ -90,8 +99,7 @@ void splitHeader(GC_state s, GC_header header, *numObjptrsRet = numObjptrs; } -static inline bool isMutable(GC_state s, pointer p) { - GC_header header = getHeader(p); +static inline bool isMutableH(GC_state s, GC_header header) { GC_objectTypeTag tag; uint16_t bytesNonObjptrs; uint16_t numObjptrs; @@ -100,6 +108,11 @@ static inline bool isMutable(GC_state s, pointer p) { return hasIdentity; } +static inline bool isMutable(GC_state s, pointer p) { + GC_header header = getHeader(p); + return isMutableH(s, header); +} + /* advanceToObjectData (s, p) * * If p points at the beginning of an object, then advanceToObjectData diff --git a/runtime/gc/object.h b/runtime/gc/object.h index a8579832d..065b5eb9b 100644 --- a/runtime/gc/object.h +++ b/runtime/gc/object.h @@ -82,7 +82,8 @@ COMPILE_TIME_ASSERT(sizeof_objptr__eq__sizeof_header, static inline GC_header* getHeaderp (pointer p); static inline GC_header getHeader (pointer p); -static inline GC_header buildHeaderFromTypeIndex (uint32_t t); +static inline GC_header getRacyHeader (pointer p); +static inline GC_header buildHeaderFromTypeIndex(uint32_t t); #endif /* (defined (MLTON_GC_INTERNAL_FUNCS)) */ @@ -181,6 +182,7 @@ static inline void splitHeader (GC_state s, GC_header header, GC_objectTypeTag *tagRet, bool *hasIdentityRet, uint16_t *bytesNonObjptrsRet, uint16_t *numObjptrsRet); static inline bool isMutable(GC_state s, pointer p); +static inline bool isMutableH(GC_state s, GC_header h); static inline pointer advanceToObjectData (GC_state s, pointer p); static inline size_t objectSize(GC_state s, pointer p); From 580b8b69a0b562e1b7a8e7c8dac76c676d1522e6 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Wed, 1 Jun 2022 13:41:44 -0400 Subject: [PATCH 049/114] header aware pinning --- runtime/gc/pin.c | 50 +++++++++++++++++++++++++++++------------------- runtime/gc/pin.h | 4 +++- 2 files changed, 33 insertions(+), 21 deletions(-) diff --git a/runtime/gc/pin.c b/runtime/gc/pin.c index 8912f6123..f319a1c24 100644 --- a/runtime/gc/pin.c +++ b/runtime/gc/pin.c @@ -42,41 +42,51 @@ static inline GC_header getRep(enum PinType pt) return h; } -bool pinObject(objptr op, uint32_t unpinDepth, enum PinType pt) { +uint32_t unpinDepthOfH(GC_header h) +{ + return (h & UNPIN_DEPTH_MASK) >> UNPIN_DEPTH_SHIFT; +} + +bool pinObject(objptr op, uint32_t unpinDepth, enum PinType pt) +{ bool a, b; pinObjectInfo(op, unpinDepth, pt, &a, &b); return a; } -void pinObjectInfo(objptr op, uint32_t unpinDepth, enum PinType pt, - bool* headerChange, bool* pinChange) +objptr pinObjectInfo(objptr op, uint32_t unpinDepth, enum PinType pt, + bool *headerChange, bool *pinChange) { pointer p = objptrToPointer(op, NULL); - assert (pt != PIN_NONE); + assert(pt != PIN_NONE); /*initialize with false*/ *headerChange = false; *pinChange = false; uint32_t maxUnpinDepth = TWOPOWER(UNPIN_DEPTH_BITS) - 1; - if (unpinDepth > maxUnpinDepth) { - DIE("unpinDepth %"PRIu32" exceeds max possible value %"PRIu32, + if (unpinDepth > maxUnpinDepth) + { + DIE("unpinDepth %" PRIu32 " exceeds max possible value %" PRIu32, unpinDepth, maxUnpinDepth); - return; + return op; } assert( - ((GC_header)unpinDepth) << UNPIN_DEPTH_SHIFT - == (UNPIN_DEPTH_MASK & ((GC_header)unpinDepth) << UNPIN_DEPTH_SHIFT) - ); + ((GC_header)unpinDepth) << UNPIN_DEPTH_SHIFT == (UNPIN_DEPTH_MASK & ((GC_header)unpinDepth) << UNPIN_DEPTH_SHIFT)); - while (true) { + while (true) + { GC_header header = getHeader(p); - uint32_t newUnpinDepth; - if(isPinned(op)) { - uint32_t previousUnpinDepth = - (header & UNPIN_DEPTH_MASK) >> UNPIN_DEPTH_SHIFT; + if (isFwdHeader(header)) + { + assert(pt != PIN_DOWN); + return pinObjectInfo((objptr)header, unpinDepth, pt, headerChange, pinChange); + } + else if (isPinned(op)) + { + uint32_t previousUnpinDepth = unpinDepthOfH(header); newUnpinDepth = min(previousUnpinDepth, unpinDepth); } else { newUnpinDepth = unpinDepth; @@ -89,18 +99,18 @@ void pinObjectInfo(objptr op, uint32_t unpinDepth, enum PinType pt, | getRep(nt); // setup the pin type if(newHeader == header) { - return; + return op; } else { if (__sync_bool_compare_and_swap(getHeaderp(p), header, newHeader)) { *headerChange = true; *pinChange = (nt != pinType(header)); - return; + return op; } } } DIE("should be impossible to reach here"); - return; + return op; } void unpinObject(objptr op) { @@ -128,14 +138,14 @@ bool isPinned(objptr op) { uint32_t unpinDepthOf(objptr op) { pointer p = objptrToPointer(op, NULL); - uint32_t d = (getHeader(p) & UNPIN_DEPTH_MASK) >> UNPIN_DEPTH_SHIFT; + uint32_t d = unpinDepthOfH(getHeader(p)); return d; } bool tryUnpinWithDepth(objptr op, uint32_t opDepth) { pointer p = objptrToPointer(op, NULL); GC_header header = getHeader(p); - uint32_t d = (header & UNPIN_DEPTH_MASK) >> UNPIN_DEPTH_SHIFT; + uint32_t d = unpinDepthOfH(header); if (d >= opDepth) { GC_header newHeader = diff --git a/runtime/gc/pin.h b/runtime/gc/pin.h index aa2adb07f..4ce451db3 100644 --- a/runtime/gc/pin.h +++ b/runtime/gc/pin.h @@ -40,7 +40,7 @@ enum PinType */ bool pinObject(objptr op, uint32_t unpinDepth, enum PinType pt); -void pinObjectInfo(objptr op, +objptr pinObjectInfo(objptr op, uint32_t unpinDepth, enum PinType pt, bool* headerChange, @@ -60,6 +60,8 @@ void unpinObject(objptr op); /* read the current unpin-depth of an object */ uint32_t unpinDepthOf(objptr op); +uint32_t unpinDepthOfH(GC_header header); + /* unpin an object if its depth allows. Because the unpinDepth can change * concurrently, we want to make sure we use the logic in this function. From 18467c73512bb725fcf2c72d3a8e692e63540a29 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Fri, 3 Jun 2022 00:05:51 -0400 Subject: [PATCH 050/114] progress, but need to fix PIN_DOWN after PIN_ANY --- runtime/gc/assign.c | 10 +-- runtime/gc/concurrent-collection.c | 7 +- runtime/gc/decheck.c | 32 ++++++-- runtime/gc/decheck.h | 1 + runtime/gc/hierarchical-heap-collection.c | 92 +++++++++++++++-------- runtime/gc/hierarchical-heap.c | 3 +- runtime/gc/object.c | 3 +- runtime/gc/pin.c | 14 +++- 8 files changed, 109 insertions(+), 53 deletions(-) diff --git a/runtime/gc/assign.c b/runtime/gc/assign.c index 815512471..b5037aab0 100644 --- a/runtime/gc/assign.c +++ b/runtime/gc/assign.c @@ -93,9 +93,10 @@ void Assignable_writeBarrier( { assert(isObjptr(dst)); pointer dstp = objptrToPointer(dst, NULL); + pointer srcp = objptrToPointer(src, NULL); - assert (!hasFwdPtr(dst)); - assert (!isObjptr(src) || !hasFwdPtr(src)); + assert (!hasFwdPtr(dstp)); + assert (!isObjptr(src) || !hasFwdPtr(srcp)); // #if ASSERT // // check that field is actually inside this object @@ -154,7 +155,6 @@ void Assignable_writeBarrier( } uint32_t dd = dstHH->depth; - pointer srcp = objptrToPointer(src, NULL); bool src_de = (HM_getLevelHead(HM_getChunkOf(srcp)) == getThreadCurrent(s)->hierarchicalHeap) || decheck(s, src); if (src_de) { bool dst_de = (dd == 1) || decheck(s, dst); @@ -268,8 +268,8 @@ void Assignable_writeBarrier( // } } else { assert (isPinned(src)); - assert (!hasFwdPtr(src)); - assert (pinType(getHeader(src)) == PIN_ANY); + assert (!hasFwdPtr(srcp)); + assert (pinType(getHeader(srcp)) == PIN_ANY); } diff --git a/runtime/gc/concurrent-collection.c b/runtime/gc/concurrent-collection.c index c6f1f5921..e56d4a288 100644 --- a/runtime/gc/concurrent-collection.c +++ b/runtime/gc/concurrent-collection.c @@ -787,9 +787,14 @@ void CC_tryUnpinOrKeepPinned( /* otherwise, object stays pinned, and we have to keep this remembered * entry into the toSpace. */ + } else { + GC_header header = getHeader(objptrToPointer(remElem->object, NULL)); + if (pinType(header) == PIN_ANY && + unpinDepthOfH(header) > HM_HH_getDepth(args->tgtHeap)) { + return; + } } - /* grossly overapproximate for entangled objects */ HM_remember(args->newRemSet, remElem, false); diff --git a/runtime/gc/decheck.c b/runtime/gc/decheck.c index 14a74ad04..53c0a23a0 100644 --- a/runtime/gc/decheck.c +++ b/runtime/gc/decheck.c @@ -270,13 +270,15 @@ int lcaHeapDepth(decheck_tid_t t1, decheck_tid_t t2) uint32_t p1mask = (1 << tree_depth(t1)) - 1; uint32_t p2 = norm_path(t2); uint32_t p2mask = (1 << tree_depth(t2)) - 1; - assert(p1 != p2); uint32_t shared_mask = p1mask & p2mask; uint32_t shared_upper_bit = shared_mask+1; uint32_t x = ((p1 ^ p2) & shared_mask) | shared_upper_bit; uint32_t lca_bit = x & -x; // uint32_t lca_mask = lca_bit-1; int llen = bitIndex(lca_bit); + if (p1 == p2) { + return tree_depth(t1) + 1; + } assert(llen == lcaLen(p1, p2)); return llen+1; } @@ -325,6 +327,7 @@ void traverseAndCheck( __attribute__((unused)) void *rawArgs) { GC_header header = getHeader(objptrToPointer(op, NULL)); + pointer p = objptrToPointer (op, NULL); assert (pinType(header) == PIN_ANY); assert (!isFwdHeader(header)); if (isMutableH(s, header)) { @@ -333,7 +336,7 @@ void traverseAndCheck( else { struct GC_foreachObjptrClosure echeckClosure = {.fun = traverseAndCheck, .env = NULL}; - foreachObjptrInObject(s, op, &trueObjptrPredicateClosure, &echeckClosure, FALSE); + foreachObjptrInObject(s, p, &trueObjptrPredicateClosure, &echeckClosure, FALSE); } } #else @@ -365,12 +368,14 @@ void make_entangled( // // while managing entanglement, we stay ordered wrt the root of the entanglement // return; // } - - GC_header header = getRacyHeader(objptrToPointer(ptr, NULL)); + pointer p_ptr = objptrToPointer(ptr, NULL); + GC_header header = getRacyHeader(p_ptr); + assert(!isFwdHeader(header)); bool mutable = isMutableH(s, header); bool headerChange = false, pinChange = false; objptr new_ptr = ptr; - uint32_t unpinDepth = lcaHeapDepth(mea->reader, allocator); + // unpin depth according to the caller + uint32_t unpinDepth = mea->unpinDepth; if (pinType(header) != PIN_ANY || unpinDepthOfH(header) > unpinDepth) { @@ -381,8 +386,11 @@ void make_entangled( { struct GC_foreachObjptrClosure emanageClosure = {.fun = make_entangled, .env = rawArgs}; - foreachObjptrInObject(s, ptr, &trueObjptrPredicateClosure, &emanageClosure, FALSE); + // the unpinDepth of reachable reachab maybe smaller. + mea->unpinDepth = min(unpinDepth, unpinDepthOfH(header)); + foreachObjptrInObject(s, p_ptr, &trueObjptrPredicateClosure, &emanageClosure, FALSE); new_ptr = pinObjectInfo(ptr, unpinDepth, PIN_ANY, &headerChange, &pinChange); + assert(pinType(getHeader(new_ptr)) == PIN_ANY); } if (pinChange) { @@ -390,11 +398,16 @@ void make_entangled( HM_HH_rememberAtLevel(HM_getLevelHeadPathCompress(chunk), &(remElem_), true); } } + mea->unpinDepth = unpinDepth; + assert (!hasFwdPtr(new_ptr)); if (ptr != new_ptr) { - // Help LGC move along + // Help LGC move along--because this reader might traverse this pointer + // and it shouldn't see the forwarded one assert(hasFwdPtr(ptr)); + assert(!hasFwdPtr(new_ptr)); + assert(isPinned(new_ptr)); *opp = new_ptr; } @@ -404,6 +417,8 @@ void make_entangled( assert(ES_contains(NULL, new_ptr)); } + traverseAndCheck(s, &new_ptr, new_ptr, NULL); + assert (!mutable || ES_contains(NULL, new_ptr)); } @@ -440,7 +455,8 @@ objptr manage_entangled( if (manage) { struct ManageEntangledArgs mea = { .reader = reader, - .root = allocator + .root = allocator, + .unpinDepth = min(unpinDepthOfH(header), unpinDepth) }; make_entangled(s, &ptr, ptr, (void*) &mea); } diff --git a/runtime/gc/decheck.h b/runtime/gc/decheck.h index d283e1e2c..8df48aa18 100644 --- a/runtime/gc/decheck.h +++ b/runtime/gc/decheck.h @@ -24,6 +24,7 @@ struct ManageEntangledArgs { decheck_tid_t reader; decheck_tid_t root; + uint32_t unpinDepth; }; #define DECHECK_BOGUS_BITS ((uint64_t)0) diff --git a/runtime/gc/hierarchical-heap-collection.c b/runtime/gc/hierarchical-heap-collection.c index 2a45213c9..57162da1b 100644 --- a/runtime/gc/hierarchical-heap-collection.c +++ b/runtime/gc/hierarchical-heap-collection.c @@ -75,7 +75,8 @@ bool hhContainsChunk(HM_HierarchicalHeap hh, HM_chunk theChunk); * * @return the tag of the object */ -GC_objectTypeTag computeObjectCopyParameters(GC_state s, pointer p, +GC_objectTypeTag computeObjectCopyParameters(GC_state s, GC_header header, + pointer p, size_t *objectSize, size_t *copySize, size_t *metaDataSize); @@ -449,7 +450,6 @@ void HM_HHC_collectLocal(uint32_t desiredScope) HM_foreachRemembered(s, HM_HH_getRemSet(cursor), &closure); } forwardHHObjptrArgs.concurrent = false; - CC_workList_free(s, &(forwardHHObjptrArgs.worklist)); forwardHHObjptrArgs.toDepth = HM_HH_INVALID_DEPTH; for (uint32_t i = 0; i <= maxDepth; i++) @@ -759,6 +759,8 @@ void HM_HHC_collectLocal(uint32_t desiredScope) HM_appendChunkList(HM_HH_getChunkList(fromSpaceLevel), &(pinned[depth])); } + CC_workList_free(s, &(forwardHHObjptrArgs.worklist)); + /* Build the toSpace hh */ HM_HierarchicalHeap hhToSpace = NULL; for (uint32_t i = 0; i <= maxDepth; i++) @@ -984,7 +986,7 @@ bool isObjptrInToSpace(objptr op, struct ForwardHHObjptrArgs *args) HM_chunk c = HM_getChunkOf(objptrToPointer(op, NULL)); HM_HierarchicalHeap levelHead = HM_getLevelHeadPathCompress(c); uint32_t depth = HM_HH_getDepth(levelHead); - assert(depth <= args->maxDepth); + // assert(depth <= args->maxDepth); assert(NULL != levelHead); return args->toSpace[depth] == levelHead; @@ -1007,6 +1009,7 @@ objptr relocateObject( HM_chunkList tgtChunkList = HM_HH_getChunkList(tgtHeap); GC_header header = getHeader(p); + assert (!isFwdHeader(header)); if (pinType(header) != PIN_NONE) { @@ -1023,6 +1026,7 @@ objptr relocateObject( /* compute object size and bytes to be copied */ computeObjectCopyParameters(s, + header, p, &objectBytes, ©Bytes, @@ -1030,6 +1034,9 @@ objptr relocateObject( if (!HM_getChunkOf(p)->mightContainMultipleObjects) { + /// SAM_UNSAFE :: potential bug here because race with reader + // what if the reader tries to access levelHead + /* This chunk contains *only* this object, so no need to copy. Instead, * just move the chunk. Don't forget to update the levelHead, too! */ HM_chunk chunk = HM_getChunkOf(p); @@ -1059,13 +1066,15 @@ objptr relocateObject( } else { - if (!__sync_bool_compare_and_swap(getFwdPtrp(p), header, newPointer)) + bool success = __sync_bool_compare_and_swap(getFwdPtrp(p), header, newPointer); + if (!success) { delLastObj(newPointer, objectBytes, tgtHeap); assert(isPinned(op)); *relocSuccess = false; return op; } + assert (getFwdPtr(p) == newPointer); } assert(hasFwdPtr(p)); @@ -1229,18 +1238,24 @@ void markAndAdd( pointer p = objptrToPointer(op, NULL); HM_chunk chunk = HM_getChunkOf(p); uint32_t opDepth = HM_HH_getDepth(HM_getLevelHead(chunk)); + bool isInToSpace = isObjptrInToSpace(op, args); if ((opDepth > args->maxDepth) || (opDepth < args->minDepth)) { return; } - else if (args->fromSpace[opDepth] != HM_getLevelHead(chunk)) + else if (args->fromSpace[opDepth] != HM_getLevelHead(chunk) && !isInToSpace) { return; } if (hasFwdPtr(p)) { - *opp = getFwdPtr(p); + objptr fop = getFwdPtr(p); + assert(!hasFwdPtr(objptrToPointer(fop, NULL))); + assert(isObjptrInToSpace(fop, args)); + assert(HM_getObjptrDepth(fop) == opDepth); + *opp = fop; // SAM_UNSAFE :: potential bug here because race with reader + return; } else if (pinType(getHeader(p)) == PIN_DOWN) { @@ -1250,7 +1265,7 @@ void markAndAdd( // concurrently to LGC and LGC may miss them. return; } - else + else if (!isInToSpace) { assert(!hasFwdPtr(p)); assert(args->concurrent); @@ -1268,13 +1283,14 @@ void markAndAdd( { chunk->retireChunk = true; *opp = op_new; + assert(!hasFwdPtr(objptrToPointer(op_new, NULL))); // markObj(objptrToPointer(op_new, NULL)); CC_workList_push(s, &(args->worklist), op_new); } else { // markObj(p); - CC_workList_push(s, &(args->worklist), op); + assert (isPinned(op)); if (!chunk->pinnedDuringCollection) { chunk->pinnedDuringCollection = TRUE; @@ -1288,6 +1304,7 @@ void markAndAdd( chunk); HM_appendChunk(&(args->pinned[opDepth]), chunk); } + CC_workList_push(s, &(args->worklist), op); } } // else if () @@ -1397,9 +1414,10 @@ void LGC_markAndScan( HM_chunk chunk = HM_getChunkOf(p); struct ForwardHHObjptrArgs *args = (struct ForwardHHObjptrArgs *)rawArgs; uint32_t opDepth = HM_HH_getDepth(HM_getLevelHead(chunk)); - assert(args->fromSpace[opDepth] == HM_getLevelHead(chunk)); + assert(!hasFwdPtr(p)); CC_workList_push(s, &(args->worklist), op); - if (!chunk->pinnedDuringCollection) + + if (!isObjptrInToSpace(op, args) && !chunk->pinnedDuringCollection) { chunk->pinnedDuringCollection = TRUE; @@ -1417,6 +1435,13 @@ void LGC_markAndScan( // markAndAdd (s, opp, op, rawArgs); // CC_workList_push(s, &(args->worklist), op); phaseLoop(s, rawArgs, &markClosure); + assert(CC_workList_isEmpty(s, &(args->worklist))); + + struct GC_foreachObjptrClosure checkClosure = + {.fun = checkFun, .env = (void *)args}; + + CC_workList_push(s, &(args->worklist), op); + phaseLoop(s, rawArgs, &checkClosure); // } } // void LGC_markAndScan( @@ -1607,25 +1632,25 @@ void tryUnpinOrKeepPinned(GC_state s, HM_remembered remElem, void *rawArgs) /* otherwise, object stays pinned, and we have to scavenge this remembered * entry into the toSpace. */ HM_remember(HM_HH_getRemSet(args->toSpace[opDepth]), remElem, false); - if (remElem->from != BOGUS_OBJPTR) { - uint32_t fromDepth = HM_getObjptrDepth(remElem->from); - if ((fromDepth <= args->maxDepth) && (fromDepth >= args->minDepth)) { - HM_chunk chunk = HM_getChunkOf(objptrToPointer(op, NULL)); - uint32_t opDepth = HM_HH_getDepth(HM_getLevelHead(chunk)); - /* if this is a down-ptr completely inside the scope, - * no need to in-place things reachable from it - */ - if (!chunk->pinnedDuringCollection) - { - chunk->pinnedDuringCollection = TRUE; - HM_unlinkChunkPreserveLevelHead( - HM_HH_getChunkList(args->fromSpace[opDepth]), - chunk); - HM_appendChunk(&(args->pinned[opDepth]), chunk); - } - return; - } - } + // if (remElem->from != BOGUS_OBJPTR) { + // uint32_t fromDepth = HM_getObjptrDepth(remElem->from); + // if ((fromDepth <= args->maxDepth) && (fromDepth >= args->minDepth)) { + // HM_chunk chunk = HM_getChunkOf(objptrToPointer(op, NULL)); + // uint32_t opDepth = HM_HH_getDepth(HM_getLevelHead(chunk)); + // /* if this is a down-ptr completely inside the scope, + // * no need to in-place things reachable from it + // */ + // if (!chunk->pinnedDuringCollection) + // { + // chunk->pinnedDuringCollection = TRUE; + // HM_unlinkChunkPreserveLevelHead( + // HM_HH_getChunkList(args->fromSpace[opDepth]), + // chunk); + // HM_appendChunk(&(args->pinned[opDepth]), chunk); + // } + // return; + // } + // } LGC_markAndScan(s, &op, op, rawArgs); @@ -1819,6 +1844,7 @@ void forwardHHObjptr( /* compute object size and bytes to be copied */ tag = computeObjectCopyParameters(s, + getHeader(p), p, &objectBytes, ©Bytes, @@ -1926,23 +1952,23 @@ pointer copyObject(pointer p, void delLastObj(objptr op, size_t objectSize, HM_HierarchicalHeap tgtHeap) { HM_chunkList tgtChunkList = HM_HH_getChunkList(tgtHeap); - HM_chunk chunk = HM_getChunkOf(op); + HM_chunk chunk = HM_getChunkOf(objptrToPointer(op, NULL)); assert(listContainsChunk(tgtChunkList, chunk)); HM_updateChunkFrontierInList(tgtChunkList, chunk, HM_getChunkFrontier(chunk) - objectSize); } #endif /* MLTON_GC_INTERNAL_FUNCS */ -GC_objectTypeTag computeObjectCopyParameters(GC_state s, pointer p, +GC_objectTypeTag computeObjectCopyParameters(GC_state s, + GC_header header, + pointer p, size_t *objectSize, size_t *copySize, size_t *metaDataSize) { - GC_header header; GC_objectTypeTag tag; uint16_t bytesNonObjptrs; uint16_t numObjptrs; - header = getHeader(p); splitHeader(s, header, &tag, NULL, &bytesNonObjptrs, &numObjptrs); /* Compute the space taken by the metadata and object body. */ diff --git a/runtime/gc/hierarchical-heap.c b/runtime/gc/hierarchical-heap.c index 47db50893..a7c0e996f 100644 --- a/runtime/gc/hierarchical-heap.c +++ b/runtime/gc/hierarchical-heap.c @@ -391,7 +391,8 @@ void HM_HH_promoteChunks( thread->hierarchicalHeap = parent; hh = parent; } - else { + else + { assert(HM_getLevelHead(thread->currentChunk) == hh); #if ASSERT diff --git a/runtime/gc/object.c b/runtime/gc/object.c index c13d98f14..91d6ccf91 100644 --- a/runtime/gc/object.c +++ b/runtime/gc/object.c @@ -36,7 +36,8 @@ GC_header* getHeaderp (pointer p) { * Returns the header for the object pointed to by p. */ GC_header getHeader (pointer p) { - return *(getHeaderp(p)); + GC_header h = *(getHeaderp(p)); + return h; } GC_header getRacyHeader (pointer ptr) { diff --git a/runtime/gc/pin.c b/runtime/gc/pin.c index f319a1c24..884f4ad59 100644 --- a/runtime/gc/pin.c +++ b/runtime/gc/pin.c @@ -74,7 +74,6 @@ objptr pinObjectInfo(objptr op, uint32_t unpinDepth, enum PinType pt, } assert( ((GC_header)unpinDepth) << UNPIN_DEPTH_SHIFT == (UNPIN_DEPTH_MASK & ((GC_header)unpinDepth) << UNPIN_DEPTH_SHIFT)); - while (true) { GC_header header = getHeader(p); @@ -82,9 +81,11 @@ objptr pinObjectInfo(objptr op, uint32_t unpinDepth, enum PinType pt, if (isFwdHeader(header)) { assert(pt != PIN_DOWN); - return pinObjectInfo((objptr)header, unpinDepth, pt, headerChange, pinChange); + op = getFwdPtr(p); + p = objptrToPointer(op, NULL); + continue; } - else if (isPinned(op)) + else if (pinType(header) != PIN_NONE) { uint32_t previousUnpinDepth = unpinDepthOfH(header); newUnpinDepth = min(previousUnpinDepth, unpinDepth); @@ -99,12 +100,15 @@ objptr pinObjectInfo(objptr op, uint32_t unpinDepth, enum PinType pt, | getRep(nt); // setup the pin type if(newHeader == header) { + assert (!hasFwdPtr(p)); return op; } else { if (__sync_bool_compare_and_swap(getHeaderp(p), header, newHeader)) { *headerChange = true; *pinChange = (nt != pinType(header)); + assert (!hasFwdPtr(p)); + assert(pinType(newHeader) == nt); return op; } } @@ -132,8 +136,10 @@ bool isPinned(objptr op) { * (otherwise, there could be a forward pointer in this spot) * ...and then check the mark */ - return (1 == (h & GC_VALID_HEADER_MASK)) && + bool result = (1 == (h & GC_VALID_HEADER_MASK)) && (((h & PIN_MASK) >> PIN_SHIFT) > 0); + assert (result == (pinType(h) != PIN_NONE)); + return result; } uint32_t unpinDepthOf(objptr op) { From 146045379bccf4d18edc03074b0abb19c928723d Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Sun, 5 Jun 2022 12:45:03 -0400 Subject: [PATCH 051/114] fix read barrier ref calls --- include/c-chunk.h | 8 ++-- runtime/gc/assign.c | 72 +++++++++++++++--------------- runtime/gc/concurrent-collection.c | 3 +- 3 files changed, 41 insertions(+), 42 deletions(-) diff --git a/include/c-chunk.h b/include/c-chunk.h index aa92743a8..c0a74b34f 100644 --- a/include/c-chunk.h +++ b/include/c-chunk.h @@ -78,8 +78,8 @@ Real64 ArrayR64_cas(Real64* a, Word64 i, Real64 x, Real64 y) { static inline Objptr RefP_cas(Objptr* r, Objptr x, Objptr y) { - Objptr result = __sync_val_compare_and_swap(r, x, y); - return Assignable_decheckObjptr(r, result); + Objptr res = __sync_val_compare_and_swap(r, x, y); + return Assignable_decheckObjptr(r, res); } #define ArrayW8_cas(a, i, x, y) __sync_val_compare_and_swap(((Word8*)(a)) + (i), (x), (y)) @@ -95,8 +95,8 @@ Objptr RefP_cas(Objptr* r, Objptr x, Objptr y) { static inline Objptr ArrayP_cas(Objptr* a, Word64 i, Objptr x, Objptr y) { - Objptr result = __sync_val_compare_and_swap(a + i, x, y); - return Assignable_decheckObjptr(a, result); + Objptr res = __sync_val_compare_and_swap(a + i, x, y); + return Assignable_decheckObjptr(a, res); } static inline void GC_writeBarrier(CPointer s, Objptr obj, CPointer dst, Objptr src) { diff --git a/runtime/gc/assign.c b/runtime/gc/assign.c index b5037aab0..14aac6767 100644 --- a/runtime/gc/assign.c +++ b/runtime/gc/assign.c @@ -13,30 +13,24 @@ objptr Assignable_decheckObjptr(objptr dst, objptr src) GC_state s = pthread_getspecific(gcstate_key); s->cumulativeStatistics->numDisentanglementChecks++; objptr new_src = src; - HM_HierarchicalHeap dstHH = HM_getLevelHead(HM_getChunkOf(dst)); - if (!isObjptr(src) || HM_HH_getDepth(dstHH) == 0 || !ES_contains(NULL, dst)) + pointer dstp = objptrToPointer(dst, NULL); + HM_HierarchicalHeap dstHH = HM_getLevelHead(HM_getChunkOf(dstp)); + + if (!isObjptr(src) || HM_HH_getDepth(dstHH) == 0) { return src; } + assert(ES_contains(NULL, dst)); HM_EBR_leaveQuiescentState(s); if (!decheck(s, src)) { - assert (isMutable(s, dst)); - // if (!ES_contains(NULL, dst)) { - // if (!decheck(s, dst)) - // { - // assert(false); - // } - // assert (isPinned(src)); - // assert (!hasFwdPtr(src)); - // assert (pinType(getHeader(src)) == PIN_ANY); - // } - objptr new_src = manage_entangled(s, src, getThreadCurrent(s)->decheckState); + assert (isMutable(s, dstp)); + new_src = manage_entangled(s, src, getThreadCurrent(s)->decheckState); assert (isPinned(new_src)); } HM_EBR_enterQuiescentState(s); - assert (!hasFwdPtr(new_src)); + assert (!hasFwdPtr(objptrToPointer(new_src, NULL))); return new_src; } @@ -49,11 +43,13 @@ objptr Assignable_readBarrier( s->cumulativeStatistics->numDisentanglementChecks++; objptr ptr = *field; - HM_HierarchicalHeap objHH = HM_getLevelHead(HM_getChunkOf(obj)); - if (!isObjptr(ptr) || HM_HH_getDepth(objHH) == 0 || !ES_contains(NULL, obj)) + pointer objp = objptrToPointer(obj, NULL); + HM_HierarchicalHeap objHH = HM_getLevelHead(HM_getChunkOf(objp)); + if (!isObjptr(ptr) || HM_HH_getDepth(objHH) == 0) { return ptr; } + assert (ES_contains(NULL, obj)); HM_EBR_leaveQuiescentState(s); if (!decheck(s, ptr)) { @@ -68,23 +64,31 @@ objptr Assignable_readBarrier( // assert(pinType(getHeader(ptr)) == PIN_ANY); // } ptr = manage_entangled(s, ptr, getThreadCurrent(s)->decheckState); - // assert (isPinned(ptr)); } HM_EBR_enterQuiescentState(s); - assert (!hasFwdPtr(ptr)); + assert (!hasFwdPtr(objptrToPointer(ptr, NULL))); return ptr; } #else -objptr Assignable_decheckObjptr(objptr dst, objptr src) {return src;} + +objptr Assignable_decheckObjptr(objptr dst, objptr src) { + (void) dst; + return src; +} + objptr Assignable_readBarrier( - GC_state s, - objptr obj, - objptr *field) - {return *field;} + GC_state s, + objptr obj, + objptr *field) { + (void)s; + (void)obj; + return *field; +} #endif + void Assignable_writeBarrier( GC_state s, objptr dst, @@ -206,14 +210,8 @@ void Assignable_writeBarrier( ES_add(s, HM_HH_getSuspects(dhh), dst); } } - else { - if (!isPinned(dst)) { - #ifdef DETECT_ENTANGLEMENT - manage_entangled(s, dst, getThreadCurrent(s)->decheckState); - #endif - } - // assert (isPinned(dst)); - // assert (ES_contains(NULL, dst)); + else if(dstHH->depth != 0) { + traverseAndCheck(s, &dst, dst, NULL); manage_entangled (s, src, HM_getChunkOf(dstp)->decheckState); } @@ -267,9 +265,10 @@ void Assignable_writeBarrier( // ES_add(s, HM_HH_getSuspects(dhh), dst); // } } else { - assert (isPinned(src)); - assert (!hasFwdPtr(srcp)); - assert (pinType(getHeader(srcp)) == PIN_ANY); + // assert (isPinned(src)); + // assert (!hasFwdPtr(srcp)); + // assert (pinType(getHeader(srcp)) == PIN_ANY); + traverseAndCheck(s, &src, src, NULL); } @@ -390,8 +389,9 @@ void Assignable_writeBarrier( // Assignable_writeBarrier(s, dst, field, src, false); // // *field = src; // } -// void Assignable_casBarrier (GC_state s, objptr dst, objptr* field, objptr src) { -// Assignable_writeBarrier(s, dst, field, src, true); +// void Assignable_casBarrier (objptr dst, objptr* field, objptr src) { +// GC_state s = pthread_getspecific(gcstate_key); +// Assignable_writeBarrier(s, dst, field, src); // // cas(field, (*field), dst); //return? // } diff --git a/runtime/gc/concurrent-collection.c b/runtime/gc/concurrent-collection.c index e56d4a288..fe3eee11b 100644 --- a/runtime/gc/concurrent-collection.c +++ b/runtime/gc/concurrent-collection.c @@ -790,14 +790,13 @@ void CC_tryUnpinOrKeepPinned( } else { GC_header header = getHeader(objptrToPointer(remElem->object, NULL)); if (pinType(header) == PIN_ANY && - unpinDepthOfH(header) > HM_HH_getDepth(args->tgtHeap)) { + unpinDepthOfH(header) >= HM_HH_getDepth(args->tgtHeap)) { return; } } HM_remember(args->newRemSet, remElem, false); - /** SAM_NOTE: The goal of the following was to filter remset entries * to only keep the "shallowest" entries. But this is really tricky, * because all three of the unpinDepth, opDepth, and fromDepth can From e31a01b37a0c6383dc57ab1158ed1de1ba77101d Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Sun, 5 Jun 2022 12:47:28 -0400 Subject: [PATCH 052/114] fix pin change issue --- runtime/gc/decheck.c | 28 ++++++++++++++-------------- runtime/gc/decheck.h | 7 ++++++- runtime/gc/pin.c | 8 +++++--- 3 files changed, 25 insertions(+), 18 deletions(-) diff --git a/runtime/gc/decheck.c b/runtime/gc/decheck.c index 53c0a23a0..1d07d2e77 100644 --- a/runtime/gc/decheck.c +++ b/runtime/gc/decheck.c @@ -362,12 +362,11 @@ void make_entangled( struct ManageEntangledArgs* mea = (struct ManageEntangledArgs*) rawArgs; HM_chunk chunk = HM_getChunkOf(objptrToPointer(ptr, NULL)); - decheck_tid_t allocator = chunk->decheckState; - // if (!decheckIsOrdered(mea->root, allocator)) { // // while managing entanglement, we stay ordered wrt the root of the entanglement // return; // } + pointer p_ptr = objptrToPointer(ptr, NULL); GC_header header = getRacyHeader(p_ptr); assert(!isFwdHeader(header)); @@ -386,28 +385,27 @@ void make_entangled( { struct GC_foreachObjptrClosure emanageClosure = {.fun = make_entangled, .env = rawArgs}; - // the unpinDepth of reachable reachab maybe smaller. + // the unpinDepth of reachable maybe smaller. mea->unpinDepth = min(unpinDepth, unpinDepthOfH(header)); foreachObjptrInObject(s, p_ptr, &trueObjptrPredicateClosure, &emanageClosure, FALSE); new_ptr = pinObjectInfo(ptr, unpinDepth, PIN_ANY, &headerChange, &pinChange); - assert(pinType(getHeader(new_ptr)) == PIN_ANY); + assert(pinType(getHeader(objptrToPointer(new_ptr, NULL))) == PIN_ANY); } if (pinChange) { struct HM_remembered remElem_ = {.object = new_ptr, .from = BOGUS_OBJPTR}; - HM_HH_rememberAtLevel(HM_getLevelHeadPathCompress(chunk), &(remElem_), true); + HM_HH_rememberAtLevel(HM_getLevelHead(chunk), &(remElem_), true); } } mea->unpinDepth = unpinDepth; - assert (!hasFwdPtr(new_ptr)); + assert(!hasFwdPtr(objptrToPointer(new_ptr, NULL))); + assert(isPinned(new_ptr)); if (ptr != new_ptr) { // Help LGC move along--because this reader might traverse this pointer // and it shouldn't see the forwarded one - assert(hasFwdPtr(ptr)); - assert(!hasFwdPtr(new_ptr)); - assert(isPinned(new_ptr)); + assert(hasFwdPtr(objptrToPointer(ptr, NULL))); *opp = new_ptr; } @@ -617,12 +615,14 @@ void GC_HH_copySyncDepthsFromThread(GC_state s, objptr victimThread, uint32_t st } #endif -void disentangleObject(GC_state s, objptr ptr) { - if (isPinned(ptr)) { - unpinObject(ptr); - if (ES_contains(NULL, ptr)) { - ES_unmark(s, ptr); +bool disentangleObject(GC_state s, objptr op, uint32_t opDepth) { + if (isPinned(op)) { + bool success = tryUnpinWithDepth(op, opDepth); + if (success && ES_contains(NULL, op)) { + ES_unmark(s, op); + return true; } } + return false; } diff --git a/runtime/gc/decheck.h b/runtime/gc/decheck.h index 8df48aa18..6b8040d22 100644 --- a/runtime/gc/decheck.h +++ b/runtime/gc/decheck.h @@ -50,8 +50,13 @@ void decheckInit(GC_state s); bool decheck(GC_state s, objptr ptr); bool decheckIsOrdered(GC_thread thread, decheck_tid_t t1); int lcaHeapDepth(decheck_tid_t t1, decheck_tid_t t2); -void disentangleObject(GC_state s, objptr ptr); +bool disentangleObject(GC_state s, objptr op, uint32_t opDepth); objptr manage_entangled(GC_state s, objptr ptr, decheck_tid_t reader); +void traverseAndCheck( + GC_state s, + __attribute__((unused)) objptr *opp, + objptr op, + __attribute__((unused)) void *rawArgs); #endif /* (defined (MLTON_GC_INTERNAL_FUNCS)) */ #endif /* _DECHECK_H_ */ diff --git a/runtime/gc/pin.c b/runtime/gc/pin.c index 884f4ad59..5c07fb718 100644 --- a/runtime/gc/pin.c +++ b/runtime/gc/pin.c @@ -92,7 +92,10 @@ objptr pinObjectInfo(objptr op, uint32_t unpinDepth, enum PinType pt, } else { newUnpinDepth = unpinDepth; } - enum PinType nt = maxPT(pt, pinType(header)); + + /* if we are changing the unpinDepth, then the new pinType (nt) is + * equal to the function argument pt. Otherwise its the max. */ + enum PinType nt = newUnpinDepth < unpinDepthOfH(header) ? pt : maxPT(pt, pinType(header)); GC_header newHeader = (header & (~UNPIN_DEPTH_MASK)) // clear unpin bits @@ -149,6 +152,7 @@ uint32_t unpinDepthOf(objptr op) { } bool tryUnpinWithDepth(objptr op, uint32_t opDepth) { + pointer p = objptrToPointer(op, NULL); GC_header header = getHeader(p); uint32_t d = unpinDepthOfH(header); @@ -161,9 +165,7 @@ bool tryUnpinWithDepth(objptr op, uint32_t opDepth) { return __sync_bool_compare_and_swap(getHeaderp(p), header, newHeader); } - return false; - } From 2a76a346045f846a9d9d9ec37c70fa053aaaaf13 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Sun, 5 Jun 2022 12:53:28 -0400 Subject: [PATCH 053/114] add mark&sweep phased for entangled objects, add unpinnning in mark and add --- runtime/gc/hierarchical-heap-collection.c | 158 +++++++++++++++------- 1 file changed, 111 insertions(+), 47 deletions(-) diff --git a/runtime/gc/hierarchical-heap-collection.c b/runtime/gc/hierarchical-heap-collection.c index 57162da1b..93170a8bb 100644 --- a/runtime/gc/hierarchical-heap-collection.c +++ b/runtime/gc/hierarchical-heap-collection.c @@ -44,7 +44,7 @@ void tryUnpinOrKeepPinned( HM_remembered remElem, void *rawArgs); -void LGC_markAndScan(GC_state s, objptr *opp, objptr op, void *rawArgs); +void LGC_markAndScan(GC_state s, HM_remembered remElem, void *rawArgs); void unmark(GC_state s, objptr *opp, objptr op, void *rawArgs); void copySuspect(GC_state s, objptr *opp, objptr op, void *rawArghh); @@ -54,8 +54,11 @@ void forwardObjptrsOfRemembered( HM_remembered remElem, void *rawArgs); -void unmarkWrapper(GC_state s, HM_remembered remElem, void *rawArgs); - +void unmarkWrapper( + __attribute__((unused)) GC_state s, + HM_remembered remElem, + __attribute__((unused)) void *rawArgs); +void addEntangledToRemSet(GC_state s, objptr op, uint32_t opDepth, struct ForwardHHObjptrArgs *args); // void scavengeChunkOfPinnedObject(GC_state s, objptr op, void* rawArgs); #if ASSERT @@ -593,9 +596,11 @@ void HM_HHC_collectLocal(uint32_t desiredScope) /* use the remembered (pinned) entries at this level as extra roots */ /* forward the from-elements of the down-ptrs */ - struct HM_foreachDownptrClosure closure = - {.fun = forwardObjptrsOfRemembered, .env = (void *)&forwardHHObjptrArgs}; - HM_foreachRemembered(s, HM_HH_getRemSet(toSpaceLevel), &closure); + // struct HM_foreachDownptrClosure closure = + // {.fun = forwardObjptrsOfRemembered, .env = (void *)&forwardHHObjptrArgs}; + // HM_foreachRemembered pops the public remSet into private. So it interferes + // with the unmarking phase of GC. + // HM_foreachRemembered(s, HM_HH_getRemSet(toSpaceLevel), &closure); if (NULL != HM_HH_getChunkList(toSpaceLevel)->firstChunk) { @@ -735,16 +740,23 @@ void HM_HHC_collectLocal(uint32_t desiredScope) continue; } - // HM_HierarchicalHeap toSpaceLevel = toSpace[depth]; - // if (toSpaceLevel != NULL) { + HM_HierarchicalHeap toSpaceLevel = toSpace[depth]; + // if (fromSpaceLevel != NULL) { // struct HM_foreachDownptrClosure closure = - // {.fun = unmarkWrapper, .env = (void *)&forwardHHObjptrArgs}; - // HM_foreachRemembered(s, HM_HH_getRemSet(toSpaceLevel), &closure); + // {.fun = tryUnpinOrKeepPinned, .env = (void *)&forwardHHObjptrArgs}; + // // HM_foreachRemembered(s, HM_HH_getRemSet(toSpaceLevel), &closure); // /*go through the public of fromSpaceLevel, they will be joined later anyway*/ - // assert((HM_HH_getRemSet(fromSpaceLevel)->private).firstChunk == NULL); + // // assert((HM_HH_getRemSet(fromSpaceLevel)->private).firstChunk == NULL); + // forwardHHObjptrArgs.toDepth = depth; // HM_foreachRemembered(s, HM_HH_getRemSet(fromSpaceLevel), &closure); // } + if (toSpaceLevel != NULL) { + struct HM_foreachDownptrClosure unmarkClosure = + {.fun = unmarkWrapper, .env = NULL}; + HM_foreachPublic(s, HM_HH_getRemSet(toSpaceLevel), &unmarkClosure); + } + /* unset the flags on pinned chunks and update their HH pointer */ for (HM_chunk chunkCursor = pinned[depth].firstChunk; chunkCursor != NULL; @@ -753,6 +765,7 @@ void HM_HHC_collectLocal(uint32_t desiredScope) assert(chunkCursor->levelHead == HM_HH_getUFNode(fromSpaceLevel)); assert(chunkCursor->pinnedDuringCollection); chunkCursor->pinnedDuringCollection = FALSE; + chunkCursor->retireChunk = FALSE; } /* put the pinned chunks into the toSpace */ @@ -1062,7 +1075,10 @@ objptr relocateObject( objptr newPointer = pointerToObjptr(copyPointer + metaDataBytes, NULL); if (!args->concurrent) { + assert(!isPinned(op)); + assert (__sync_bool_compare_and_swap(getFwdPtrp(p), header, newPointer)); *(getFwdPtrp(p)) = newPointer; + // printf("forwarding %p from %p", p, header); } else { @@ -1074,9 +1090,8 @@ objptr relocateObject( *relocSuccess = false; return op; } - assert (getFwdPtr(p) == newPointer); } - + assert (getFwdPtr(p) == newPointer); assert(hasFwdPtr(p)); args->bytesCopied += copyBytes; @@ -1243,7 +1258,11 @@ void markAndAdd( { return; } - else if (args->fromSpace[opDepth] != HM_getLevelHead(chunk) && !isInToSpace) + else if (isInToSpace) { + assert(!hasFwdPtr(p)); + return; + } + else if (args->fromSpace[opDepth] != HM_getLevelHead(chunk)) { return; } @@ -1257,16 +1276,27 @@ void markAndAdd( *opp = fop; // SAM_UNSAFE :: potential bug here because race with reader return; } - else if (pinType(getHeader(p)) == PIN_DOWN) + else if (CC_isPointerMarked(p)) { + assert (pinType(getHeader(p)) == PIN_ANY); + return; + } + + if (pinType(getHeader(p)) == PIN_DOWN && unpinDepthOf(op) >= opDepth) { + // try unpinning, if it succeeds we will trace this object now. + disentangleObject(s, op, opDepth); + } + + if (pinType(getHeader(p)) == PIN_DOWN) { - // it is okay to not trace PIN_DOWN header objects because the remembered set will have them - // and we will trace it from there. + // it is okay to not trace PIN_DOWN objects because the remembered set will have them + // and we will definitely trace; this relies on the failure of unpinning. // it is dangerous to skip PIN_ANY objects here because the remSet entry for them might be created // concurrently to LGC and LGC may miss them. return; } - else if (!isInToSpace) + else { + assert (!CC_isPointerMarked(p)); assert(!hasFwdPtr(p)); assert(args->concurrent); HM_HierarchicalHeap tgtHeap = args->toSpace[opDepth]; @@ -1289,8 +1319,13 @@ void markAndAdd( } else { - // markObj(p); assert (isPinned(op)); + GC_header header = getHeader(p); + + if (pinType(header) == PIN_ANY) { + addEntangledToRemSet(s, op, opDepth, args); + } + if (!chunk->pinnedDuringCollection) { chunk->pinnedDuringCollection = TRUE; @@ -1404,18 +1439,43 @@ void phaseLoop(GC_state s, void *rawArgs, GC_foreachObjptrClosure fClosure) assert(CC_workList_isEmpty(s, worklist)); } +void addEntangledToRemSet( + GC_state s, + objptr op, + uint32_t opDepth, + struct ForwardHHObjptrArgs *args) { + pointer p = objptrToPointer(op, NULL); + GC_header header = getHeader(p); + + if (pinType(header) == PIN_ANY && !CC_isPointerMarked(p)) + { + markObj(p); + + if (NULL == args->toSpace[opDepth]) + { + args->toSpace[opDepth] = HM_HH_new(s, opDepth); + } + + struct HM_remembered remElem_ = {.object = op, .from = BOGUS_OBJPTR}; + HM_remember (HM_HH_getRemSet(args->toSpace[opDepth]), &remElem_, true); + + traverseAndCheck(s, &op, op, NULL); + } +} + void LGC_markAndScan( GC_state s, - objptr *opp, - objptr op, + HM_remembered remElem, void *rawArgs) { + objptr op = remElem->object; pointer p = objptrToPointer(op, NULL); HM_chunk chunk = HM_getChunkOf(p); struct ForwardHHObjptrArgs *args = (struct ForwardHHObjptrArgs *)rawArgs; uint32_t opDepth = HM_HH_getDepth(HM_getLevelHead(chunk)); assert(!hasFwdPtr(p)); - CC_workList_push(s, &(args->worklist), op); + assert(isPinned(op)); + addEntangledToRemSet(s, op, opDepth, args); if (!isObjptrInToSpace(op, args) && !chunk->pinnedDuringCollection) { @@ -1430,19 +1490,12 @@ void LGC_markAndScan( chunk); HM_appendChunk(&(args->pinned[opDepth]), chunk); } + + CC_workList_push(s, &(args->worklist), op); struct GC_foreachObjptrClosure markClosure = {.fun = markAndAdd, .env = (void *)args}; - // markAndAdd (s, opp, op, rawArgs); - // CC_workList_push(s, &(args->worklist), op); phaseLoop(s, rawArgs, &markClosure); assert(CC_workList_isEmpty(s, &(args->worklist))); - - struct GC_foreachObjptrClosure checkClosure = - {.fun = checkFun, .env = (void *)args}; - - CC_workList_push(s, &(args->worklist), op); - phaseLoop(s, rawArgs, &checkClosure); - // } } // void LGC_markAndScan( // GC_state s, @@ -1518,15 +1571,22 @@ void LGC_markAndScan( // } // } -void unmarkWrapper(GC_state s, HM_remembered remElem, void *rawArgs) +void unmarkWrapper( + __attribute__((unused)) GC_state s, + HM_remembered remElem, + __attribute__((unused)) void *rawArgs) { - struct ForwardHHObjptrArgs *args = (struct ForwardHHObjptrArgs *)rawArgs; - struct GC_foreachObjptrClosure unmarkClosure = - {.fun = unmarkAndAdd, .env = args}; - - unmarkAndAdd(s, &(remElem->object), remElem->object, rawArgs); - CC_workList_push(s, &(args->worklist), remElem->object); - phaseLoop(s, rawArgs, &unmarkClosure); + objptr op = remElem->object; + pointer p = objptrToPointer (op, NULL); + assert (pinType(getHeader(p)) == PIN_ANY); + if (CC_isPointerMarked(p)) {markObj(p);} + // struct ForwardHHObjptrArgs *args = (struct ForwardHHObjptrArgs *)rawArgs; + // struct GC_foreachObjptrClosure unmarkClosure = + // {.fun = unmarkAndAdd, .env = args}; + + // unmarkAndAdd(s, &(remElem->object), remElem->object, rawArgs); + // CC_workList_push(s, &(args->worklist), remElem->object); + // phaseLoop(s, rawArgs, &unmarkClosure); // unmark(s, &(remElem->object), remElem->object, rawArgs); } @@ -1567,8 +1627,8 @@ void tryUnpinOrKeepPinned(GC_state s, HM_remembered remElem, void *rawArgs) args->toSpace[opDepth] = HM_HH_new(s, opDepth); } - HM_chunk chunk = HM_getChunkOf(objptrToPointer(op, NULL)); #if ASSERT + HM_chunk chunk = HM_getChunkOf(objptrToPointer(op, NULL)); assert(opDepth <= args->maxDepth); HM_HierarchicalHeap hh = HM_getLevelHead(chunk); assert(args->fromSpace[opDepth] == hh); @@ -1576,6 +1636,8 @@ void tryUnpinOrKeepPinned(GC_state s, HM_remembered remElem, void *rawArgs) assert(listContainsChunk(&(args->pinned[opDepth]), chunk)); else assert(hhContainsChunk(args->fromSpace[opDepth], chunk)); + assert(HM_getObjptrDepth(op) == opDepth); + assert(HM_getLevelHead(chunk) == args->fromSpace[opDepth]); #endif #if 0 @@ -1594,8 +1656,6 @@ void tryUnpinOrKeepPinned(GC_state s, HM_remembered remElem, void *rawArgs) } #endif - assert(HM_getObjptrDepth(op) == opDepth); - assert(HM_getLevelHead(chunk) == args->fromSpace[opDepth]); bool unpin = tryUnpinWithDepth(op, opDepth); @@ -1630,8 +1690,13 @@ void tryUnpinOrKeepPinned(GC_state s, HM_remembered remElem, void *rawArgs) } /* otherwise, object stays pinned, and we have to scavenge this remembered - * entry into the toSpace. */ - HM_remember(HM_HH_getRemSet(args->toSpace[opDepth]), remElem, false); + * entry into the toSpace. + * Entangled entries are added later because we use mark&sweep on them + * and use the rememebered set later for unmarking. + */ + if (remElem->from != BOGUS_OBJPTR) { + HM_remember(HM_HH_getRemSet(args->toSpace[opDepth]), remElem, false); + } // if (remElem->from != BOGUS_OBJPTR) { // uint32_t fromDepth = HM_getObjptrDepth(remElem->from); // if ((fromDepth <= args->maxDepth) && (fromDepth >= args->minDepth)) { @@ -1651,8 +1716,7 @@ void tryUnpinOrKeepPinned(GC_state s, HM_remembered remElem, void *rawArgs) // return; // } // } - - LGC_markAndScan(s, &op, op, rawArgs); + LGC_markAndScan(s, remElem, rawArgs); // LGC_markAndScan(s, &(remElem->from), rawArgs); @@ -1819,7 +1883,7 @@ void forwardHHObjptr( } else { - disentangleObject(s, op); + disentangleObject(s, op, opDepth); // This object should have been previously unpinned // unpinObject(op); } From 90e28a3da3f049bb2fbcff5f22eb63760c0c2030 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Sun, 5 Jun 2022 12:56:43 -0400 Subject: [PATCH 054/114] make concurrent remSet very cautious, to be fixed --- runtime/gc/concurrent-list.c | 9 ++++++--- runtime/gc/remembered-set.c | 24 ++++++++++++++++++------ runtime/gc/remembered-set.h | 1 + 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/runtime/gc/concurrent-list.c b/runtime/gc/concurrent-list.c index 1553a423b..c4ece3b1a 100644 --- a/runtime/gc/concurrent-list.c +++ b/runtime/gc/concurrent-list.c @@ -10,9 +10,9 @@ void allocateChunkInConcList( HM_chunk lastChunk) { GC_state s = pthread_getspecific(gcstate_key); - pthread_mutex_lock(&concList->mutex); + // pthread_mutex_lock(&concList->mutex); if(concList->lastChunk != lastChunk) { - pthread_mutex_unlock(&concList->mutex); + // pthread_mutex_unlock(&concList->mutex); return; } @@ -39,11 +39,12 @@ void allocateChunkInConcList( { concList->firstChunk = chunk; } - pthread_mutex_unlock(&concList->mutex); + // pthread_mutex_unlock(&concList->mutex); } pointer CC_storeInConcList(CC_concList concList, void* p, size_t objSize){ assert(concList != NULL); + pthread_mutex_lock(&concList->mutex); while(TRUE) { HM_chunk chunk = concList->lastChunk; if (NULL == chunk) { @@ -61,10 +62,12 @@ pointer CC_storeInConcList(CC_concList concList, void* p, size_t objSize){ bool success = __sync_bool_compare_and_swap(&(chunk->frontier), frontier, new_frontier); if (success) { memcpy(frontier, p, objSize); + pthread_mutex_unlock(&concList->mutex); return frontier; } } } + pthread_mutex_unlock(&concList->mutex); DIE("should never come here"); return NULL; } diff --git a/runtime/gc/remembered-set.c b/runtime/gc/remembered-set.c index edc52035a..07504d16b 100644 --- a/runtime/gc/remembered-set.c +++ b/runtime/gc/remembered-set.c @@ -37,27 +37,39 @@ void HM_foreachPrivate( } } -void HM_foreachRemembered( +void HM_foreachPublic ( GC_state s, HM_remSet remSet, HM_foreachDownptrClosure f) { - assert(remSet != NULL); - HM_foreachPrivate(s, &(remSet->private), f); - while (TRUE) { + while (TRUE) + { struct HM_chunkList _chunkList; HM_chunkList chunkList = &(_chunkList); CC_popAsChunkList(&(remSet->public), chunkList); - if (chunkList->firstChunk == NULL) { + if (chunkList->firstChunk == NULL) + { break; } - else { + else + { HM_foreachPrivate(s, chunkList, f); HM_appendChunkList(&(remSet->private), chunkList); } } } +void HM_foreachRemembered( + GC_state s, + HM_remSet remSet, + HM_foreachDownptrClosure f) +{ + assert(remSet != NULL); + HM_foreachPrivate(s, &(remSet->private), f); + HM_foreachPublic(s, remSet, f); +} + + size_t HM_numRemembered(HM_remSet remSet) { assert(remSet != NULL); size_t count = 0; diff --git a/runtime/gc/remembered-set.h b/runtime/gc/remembered-set.h index 96df9b7f2..b67e5fa61 100644 --- a/runtime/gc/remembered-set.h +++ b/runtime/gc/remembered-set.h @@ -54,6 +54,7 @@ void HM_remember(HM_remSet remSet, HM_remembered remElem, bool conc); void HM_appendRemSet(HM_remSet r1, HM_remSet r2); void HM_foreachRemembered(GC_state s, HM_remSet remSet, HM_foreachDownptrClosure f); size_t HM_numRemembered(HM_remSet remSet); +void HM_foreachPublic(GC_state s, HM_remSet remSet, HM_foreachDownptrClosure f); #endif /* defined (MLTON_GC_INTERNAL_BASIS) */ From 4b2090c1b659cf64a56d4e1ec5c0c9c8e6834753 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Sun, 5 Jun 2022 16:01:41 -0400 Subject: [PATCH 055/114] cleanup logic a bit --- runtime/gc/hierarchical-heap-collection.c | 33 ++++++++++------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/runtime/gc/hierarchical-heap-collection.c b/runtime/gc/hierarchical-heap-collection.c index 93170a8bb..da59eea76 100644 --- a/runtime/gc/hierarchical-heap-collection.c +++ b/runtime/gc/hierarchical-heap-collection.c @@ -1018,9 +1018,6 @@ objptr relocateObject( pointer p = objptrToPointer(op, NULL); assert(!hasFwdPtr(p)); assert(HM_HH_isLevelHead(tgtHeap)); - - HM_chunkList tgtChunkList = HM_HH_getChunkList(tgtHeap); - GC_header header = getHeader(p); assert (!isFwdHeader(header)); @@ -1033,6 +1030,8 @@ objptr relocateObject( return op; } + HM_chunkList tgtChunkList = HM_HH_getChunkList(tgtHeap); + size_t metaDataBytes; size_t objectBytes; size_t copyBytes; @@ -1047,13 +1046,10 @@ objptr relocateObject( if (!HM_getChunkOf(p)->mightContainMultipleObjects) { - /// SAM_UNSAFE :: potential bug here because race with reader - // what if the reader tries to access levelHead - /* This chunk contains *only* this object, so no need to copy. Instead, * just move the chunk. Don't forget to update the levelHead, too! */ HM_chunk chunk = HM_getChunkOf(p); - HM_unlinkChunk(HM_HH_getChunkList(HM_getLevelHead(chunk)), chunk); + HM_unlinkChunkPreserveLevelHead(HM_HH_getChunkList(HM_getLevelHead(chunk)), chunk); HM_appendChunk(tgtChunkList, chunk); chunk->levelHead = HM_HH_getUFNode(tgtHeap); @@ -1066,6 +1062,7 @@ objptr relocateObject( return op; } + /* Otherwise try copying the object */ pointer copyPointer = copyObject(p - metaDataBytes, objectBytes, copyBytes, @@ -1078,7 +1075,6 @@ objptr relocateObject( assert(!isPinned(op)); assert (__sync_bool_compare_and_swap(getFwdPtrp(p), header, newPointer)); *(getFwdPtrp(p)) = newPointer; - // printf("forwarding %p from %p", p, header); } else { @@ -1256,6 +1252,7 @@ void markAndAdd( bool isInToSpace = isObjptrInToSpace(op, args); if ((opDepth > args->maxDepth) || (opDepth < args->minDepth)) { + /*object is outside the scope of collection*/ return; } else if (isInToSpace) { @@ -1264,6 +1261,7 @@ void markAndAdd( } else if (args->fromSpace[opDepth] != HM_getLevelHead(chunk)) { + /*object is outside the scope of collection*/ return; } @@ -1281,15 +1279,13 @@ void markAndAdd( return; } - if (pinType(getHeader(p)) == PIN_DOWN && unpinDepthOf(op) >= opDepth) { - // try unpinning, if it succeeds we will trace this object now. - disentangleObject(s, op, opDepth); - } + disentangleObject(s, op, opDepth); + enum PinType pt = pinType(getHeader(p)); - if (pinType(getHeader(p)) == PIN_DOWN) + if (pt == PIN_DOWN) { // it is okay to not trace PIN_DOWN objects because the remembered set will have them - // and we will definitely trace; this relies on the failure of unpinning. + // and we will definitely trace; this relies on the failure of unpinning in disentangleObject. // it is dangerous to skip PIN_ANY objects here because the remSet entry for them might be created // concurrently to LGC and LGC may miss them. return; @@ -1320,11 +1316,10 @@ void markAndAdd( else { assert (isPinned(op)); - GC_header header = getHeader(p); - - if (pinType(header) == PIN_ANY) { - addEntangledToRemSet(s, op, opDepth, args); - } + assert (pinType(getHeader(p)) == PIN_ANY); + // this is purely an optimization to prevent retracing of PIN_ANY objects + // so it is okay if this header read is racy. worst case the object is retraced. + addEntangledToRemSet(s, op, opDepth, args); if (!chunk->pinnedDuringCollection) { From 283ecd956e0ca6f51b711788237d6bab5bb2a010 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Sun, 5 Jun 2022 16:19:51 -0400 Subject: [PATCH 056/114] cleanup 2 --- runtime/gc/hierarchical-heap-collection.c | 64 +++++------------------ 1 file changed, 14 insertions(+), 50 deletions(-) diff --git a/runtime/gc/hierarchical-heap-collection.c b/runtime/gc/hierarchical-heap-collection.c index da59eea76..846739a3f 100644 --- a/runtime/gc/hierarchical-heap-collection.c +++ b/runtime/gc/hierarchical-heap-collection.c @@ -59,6 +59,15 @@ void unmarkWrapper( HM_remembered remElem, __attribute__((unused)) void *rawArgs); void addEntangledToRemSet(GC_state s, objptr op, uint32_t opDepth, struct ForwardHHObjptrArgs *args); + +static inline HM_HierarchicalHeap toSpaceHH (GC_state s, struct ForwardHHObjptrArgs *args, uint32_t depth) { + if (args->toSpace[depth] == NULL) + { + /* Level does not exist, so create it */ + args->toSpace[depth] = HM_HH_new(s, depth); + } + return args->toSpace[depth]; +} // void scavengeChunkOfPinnedObject(GC_state s, objptr op, void* rawArgs); #if ASSERT @@ -1227,11 +1236,7 @@ void copySuspect( return; } uint32_t opDepth = args->toDepth; - if (NULL == args->toSpace[opDepth]) - { - args->toSpace[opDepth] = HM_HH_new(s, opDepth); - } - HM_storeInchunkList(HM_HH_getSuspects(args->toSpace[opDepth]), &new_ptr, sizeof(objptr)); + HM_storeInchunkList(HM_HH_getSuspects(toSpaceHH(s, args, opDepth)), &new_ptr, sizeof(objptr)); } bool headerForwarded(GC_header h) @@ -1295,13 +1300,7 @@ void markAndAdd( assert (!CC_isPointerMarked(p)); assert(!hasFwdPtr(p)); assert(args->concurrent); - HM_HierarchicalHeap tgtHeap = args->toSpace[opDepth]; - if (tgtHeap == NULL) - { - /* Level does not exist, so create it */ - tgtHeap = HM_HH_new(s, opDepth); - args->toSpace[opDepth] = tgtHeap; - } + HM_HierarchicalHeap tgtHeap = toSpaceHH(s, args, opDepth); assert(p == objptrToPointer(op, NULL)); bool relocateSuccess; objptr op_new = relocateObject(s, op, tgtHeap, args, &relocateSuccess); @@ -1445,14 +1444,8 @@ void addEntangledToRemSet( if (pinType(header) == PIN_ANY && !CC_isPointerMarked(p)) { markObj(p); - - if (NULL == args->toSpace[opDepth]) - { - args->toSpace[opDepth] = HM_HH_new(s, opDepth); - } - struct HM_remembered remElem_ = {.object = op, .from = BOGUS_OBJPTR}; - HM_remember (HM_HH_getRemSet(args->toSpace[opDepth]), &remElem_, true); + HM_remember (HM_HH_getRemSet(toSpaceHH(s, args, opDepth)), &remElem_, true); traverseAndCheck(s, &op, op, NULL); } @@ -1616,12 +1609,6 @@ void tryUnpinOrKeepPinned(GC_state s, HM_remembered remElem, void *rawArgs) * (getLevelHead, etc.), but this should be faster. The toDepth field * is set by the loop that calls this function */ uint32_t opDepth = args->toDepth; - - if (NULL == args->toSpace[opDepth]) - { - args->toSpace[opDepth] = HM_HH_new(s, opDepth); - } - #if ASSERT HM_chunk chunk = HM_getChunkOf(objptrToPointer(op, NULL)); assert(opDepth <= args->maxDepth); @@ -1635,23 +1622,6 @@ void tryUnpinOrKeepPinned(GC_state s, HM_remembered remElem, void *rawArgs) assert(HM_getLevelHead(chunk) == args->fromSpace[opDepth]); #endif -#if 0 - /** If it's not in our from-space, then it's entangled. - * KEEP THE ENTRY but don't do any of the other nasty stuff. - * - * SAM_NOTE: pinned chunks still have their HH set to the from-space, - * despite living in separate chunklists. - */ - if (opDepth > args->maxDepth || args->fromSpace[opDepth] != hh) { - assert(s->controls->manageEntanglement); - /** TODO: assert entangled here */ - - HM_remember(HM_HH_getRemSet(args->toSpace[opDepth]), remElem); - return; - } -#endif - - bool unpin = tryUnpinWithDepth(op, opDepth); if (unpin) @@ -1690,7 +1660,7 @@ void tryUnpinOrKeepPinned(GC_state s, HM_remembered remElem, void *rawArgs) * and use the rememebered set later for unmarking. */ if (remElem->from != BOGUS_OBJPTR) { - HM_remember(HM_HH_getRemSet(args->toSpace[opDepth]), remElem, false); + HM_remember(HM_HH_getRemSet(toSpaceHH(s, args, opDepth)), remElem, false); } // if (remElem->from != BOGUS_OBJPTR) { // uint32_t fromDepth = HM_getObjptrDepth(remElem->from); @@ -1923,13 +1893,7 @@ void forwardHHObjptr( break; } - HM_HierarchicalHeap tgtHeap = args->toSpace[opDepth]; - if (tgtHeap == NULL) - { - /* Level does not exist, so create it */ - tgtHeap = HM_HH_new(s, opDepth); - args->toSpace[opDepth] = tgtHeap; - } + HM_HierarchicalHeap tgtHeap = toSpaceHH(s, args, opDepth); assert(p == objptrToPointer(op, NULL)); /* use the forwarding pointer */ From 62af8e99d8e3809c392408aae44db8a63ed71b29 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Mon, 6 Jun 2022 09:23:40 -0400 Subject: [PATCH 057/114] forward fromObjs separately --- runtime/gc/hierarchical-heap-collection.c | 68 +++++++---------------- 1 file changed, 19 insertions(+), 49 deletions(-) diff --git a/runtime/gc/hierarchical-heap-collection.c b/runtime/gc/hierarchical-heap-collection.c index 846739a3f..b33919636 100644 --- a/runtime/gc/hierarchical-heap-collection.c +++ b/runtime/gc/hierarchical-heap-collection.c @@ -49,7 +49,7 @@ void unmark(GC_state s, objptr *opp, objptr op, void *rawArgs); void copySuspect(GC_state s, objptr *opp, objptr op, void *rawArghh); -void forwardObjptrsOfRemembered( +void forwardFromObjsOfRemembered( GC_state s, HM_remembered remElem, void *rawArgs); @@ -603,13 +603,12 @@ void HM_HHC_collectLocal(uint32_t desiredScope) depth, HM_numRemembered(HM_HH_getRemSet(toSpaceLevel))); - /* use the remembered (pinned) entries at this level as extra roots */ /* forward the from-elements of the down-ptrs */ - // struct HM_foreachDownptrClosure closure = - // {.fun = forwardObjptrsOfRemembered, .env = (void *)&forwardHHObjptrArgs}; + struct HM_foreachDownptrClosure closure = + {.fun = forwardFromObjsOfRemembered, .env = (void *)&forwardHHObjptrArgs}; // HM_foreachRemembered pops the public remSet into private. So it interferes - // with the unmarking phase of GC. - // HM_foreachRemembered(s, HM_HH_getRemSet(toSpaceLevel), &closure); + // with the unmarking phase of GC. So use HM_foreachPrivate instead. + HM_foreachPrivate(s, &(HM_HH_getRemSet(toSpaceLevel)->private), &closure); if (NULL != HM_HH_getChunkList(toSpaceLevel)->firstChunk) { @@ -1309,7 +1308,6 @@ void markAndAdd( chunk->retireChunk = true; *opp = op_new; assert(!hasFwdPtr(objptrToPointer(op_new, NULL))); - // markObj(objptrToPointer(op_new, NULL)); CC_workList_push(s, &(args->worklist), op_new); } else @@ -1336,33 +1334,6 @@ void markAndAdd( CC_workList_push(s, &(args->worklist), op); } } - // else if () - // { - // if (!CC_isPointerMarked(p)) - // { - // assert(args->fromSpace[opDepth] == HM_getLevelHead(chunk)); - // markObj(p); - - // } else { - // assert(args->fromSpace[opDepth] == HM_getLevelHead(chunk)); - // assert(chunk->pinnedDuringCollection); - // } - // } - // else if (!hasFwdPtr(p)) { - // assert(args->concurrent); - // HM_HierarchicalHeap tgtHeap = args->toSpace[opDepth]; - // if (tgtHeap == NULL) - // { - // /* Level does not exist, so create it */ - // tgtHeap = HM_HH_new(s, opDepth); - // args->toSpace[opDepth] = tgtHeap; - // } - // assert(p == objptrToPointer(op, NULL)); - - // /* use the forwarding pointer */ - // *opp = relocateObject(s, op, tgtHeap, args); - - // } return; } @@ -1447,7 +1418,7 @@ void addEntangledToRemSet( struct HM_remembered remElem_ = {.object = op, .from = BOGUS_OBJPTR}; HM_remember (HM_HH_getRemSet(toSpaceHH(s, args, opDepth)), &remElem_, true); - traverseAndCheck(s, &op, op, NULL); + // traverseAndCheck(s, &op, op, NULL); } } @@ -1710,25 +1681,23 @@ void tryUnpinOrKeepPinned(GC_state s, HM_remembered remElem, void *rawArgs) // JATIN_TODO: CHANGE NAME TO FORWARD FROM /// COULD BE HEKLPFUL FOR DEGBUGGING TO FORWARD THE OBJECTS ANYWAY -void forwardObjptrsOfRemembered(GC_state s, HM_remembered remElem, void *rawArgs) +void forwardFromObjsOfRemembered(GC_state s, HM_remembered remElem, void *rawArgs) { objptr op = remElem->object; assert(isPinned(op)); - struct GC_foreachObjptrClosure closure = - {.fun = forwardHHObjptr, .env = rawArgs}; - - foreachObjptrInObject( - s, - objptrToPointer(op, NULL), - &trueObjptrPredicateClosure, - &closure, - FALSE); - if (remElem->from != BOGUS_OBJPTR) - { - forwardHHObjptr(s, &(remElem->from), remElem->from, rawArgs); - } + // struct GC_foreachObjptrClosure closure = + // {.fun = forwardHHObjptr, .env = rawArgs}; + + // foreachObjptrInObject( + // s, + // objptrToPointer(op, NULL), + // &trueObjptrPredicateClosure, + // &closure, + // FALSE); + assert (remElem->from != BOGUS_OBJPTR); + forwardHHObjptr(s, &(remElem->from), remElem->from, rawArgs); } /* ========================================================================= */ @@ -2092,6 +2061,7 @@ void checkRememberedEntry( HM_remembered remElem, void *args) { + return; objptr object = remElem->object; HM_HierarchicalHeap hh = (HM_HierarchicalHeap)args; From bbe250bc40b97418a519303fc6fa67b4cf137fc6 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Mon, 6 Jun 2022 09:24:19 -0400 Subject: [PATCH 058/114] comm out --- runtime/gc/assign.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/gc/assign.c b/runtime/gc/assign.c index 14aac6767..135610193 100644 --- a/runtime/gc/assign.c +++ b/runtime/gc/assign.c @@ -211,7 +211,7 @@ void Assignable_writeBarrier( } } else if(dstHH->depth != 0) { - traverseAndCheck(s, &dst, dst, NULL); + // traverseAndCheck(s, &dst, dst, NULL); manage_entangled (s, src, HM_getChunkOf(dstp)->decheckState); } From 07b4e0b42b70d186a376f07e4103a88edd8a90ff Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Mon, 6 Jun 2022 09:24:37 -0400 Subject: [PATCH 059/114] de only if depth --- runtime/gc/decheck.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/runtime/gc/decheck.c b/runtime/gc/decheck.c index 1d07d2e77..920926268 100644 --- a/runtime/gc/decheck.c +++ b/runtime/gc/decheck.c @@ -615,14 +615,16 @@ void GC_HH_copySyncDepthsFromThread(GC_state s, objptr victimThread, uint32_t st } #endif +// returns true if the object is unpinned. bool disentangleObject(GC_state s, objptr op, uint32_t opDepth) { - if (isPinned(op)) { + if (isPinned(op) && unpinDepthOf(op) >= opDepth) { bool success = tryUnpinWithDepth(op, opDepth); if (success && ES_contains(NULL, op)) { ES_unmark(s, op); return true; } + return false; } - return false; + return true; } From 06604bb6ccf70354c8ac9ef709de1284579dff06 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Mon, 6 Jun 2022 09:24:53 -0400 Subject: [PATCH 060/114] new funx --- runtime/gc/remembered-set.h | 1 + 1 file changed, 1 insertion(+) diff --git a/runtime/gc/remembered-set.h b/runtime/gc/remembered-set.h index b67e5fa61..b84dfa8c8 100644 --- a/runtime/gc/remembered-set.h +++ b/runtime/gc/remembered-set.h @@ -55,6 +55,7 @@ void HM_appendRemSet(HM_remSet r1, HM_remSet r2); void HM_foreachRemembered(GC_state s, HM_remSet remSet, HM_foreachDownptrClosure f); size_t HM_numRemembered(HM_remSet remSet); void HM_foreachPublic(GC_state s, HM_remSet remSet, HM_foreachDownptrClosure f); +void HM_foreachPrivate(GC_state s, HM_chunkList list,HM_foreachDownptrClosure f); #endif /* defined (MLTON_GC_INTERNAL_BASIS) */ From b2eb0480bc473f3f2eaa6f708d3875f6cbdaa6c4 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Sat, 11 Jun 2022 14:08:06 -0400 Subject: [PATCH 061/114] de check opt --- runtime/gc/assign.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/runtime/gc/assign.c b/runtime/gc/assign.c index 135610193..7ed604512 100644 --- a/runtime/gc/assign.c +++ b/runtime/gc/assign.c @@ -88,6 +88,11 @@ objptr Assignable_readBarrier( } #endif +static inline bool decheck_opt_fast (GC_state s, pointer p) { + HM_HierarchicalHeap hh = HM_getLevelHead(HM_getChunkOf(p)); + return (hh->depth == 1) || hh == getThreadCurrent(s)->hierarchicalHeap; +} + void Assignable_writeBarrier( GC_state s, @@ -158,18 +163,18 @@ void Assignable_writeBarrier( return; } + HM_HierarchicalHeap srcHH = HM_getLevelHead(HM_getChunkOf(srcp)); + if (srcHH == dstHH) { + /* internal pointers are always traced */ + return; + } + uint32_t dd = dstHH->depth; - bool src_de = (HM_getLevelHead(HM_getChunkOf(srcp)) == getThreadCurrent(s)->hierarchicalHeap) || decheck(s, src); + bool src_de = decheck_opt_fast(s, srcp) || decheck(s, src); if (src_de) { - bool dst_de = (dd == 1) || decheck(s, dst); + bool dst_de = decheck_opt_fast(s, dstp) || decheck(s, dst); if (dst_de) { - HM_HierarchicalHeap srcHH = HM_getLevelHeadPathCompress(HM_getChunkOf(srcp)); - if (srcHH == dstHH) { - /* internal pointers are always traced */ - return; - } uint32_t sd = srcHH->depth; - /* up pointer (snapshotted by the closure) * or internal (within a chain) pointer to a snapshotted heap */ From 2fcab07ff1dfe632af220a389d3cbfe1a91c7e19 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Sat, 11 Jun 2022 14:19:06 -0400 Subject: [PATCH 062/114] merge prob --- runtime/gc/hierarchical-heap-collection.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/runtime/gc/hierarchical-heap-collection.c b/runtime/gc/hierarchical-heap-collection.c index 694a6b18a..8a14434c4 100644 --- a/runtime/gc/hierarchical-heap-collection.c +++ b/runtime/gc/hierarchical-heap-collection.c @@ -44,12 +44,8 @@ void tryUnpinOrKeepPinned( HM_remembered remElem, void *rawArgs); -<<<<<<< HEAD void LGC_markAndScan(GC_state s, HM_remembered remElem, void *rawArgs); void unmark(GC_state s, objptr *opp, objptr op, void *rawArgs); -======= -void copySuspect(GC_state s, objptr *opp, objptr op, void *rawArghh); ->>>>>>> 8be0c306b8060889b99715b867139e446c8c273f void copySuspect(GC_state s, objptr *opp, objptr op, void *rawArghh); @@ -1218,17 +1214,10 @@ void unfreezeDisentangledDepthAfter( /* ========================================================================= */ void copySuspect( -<<<<<<< HEAD - GC_state s, - __attribute__((unused)) objptr *opp, - objptr op, - void *rawArghh) -======= GC_state s, __attribute__((unused)) objptr *opp, objptr op, void *rawArghh) ->>>>>>> 8be0c306b8060889b99715b867139e446c8c273f { struct ForwardHHObjptrArgs *args = (struct ForwardHHObjptrArgs *)rawArghh; assert(isObjptr(op)); From ddae81082983c4de580a013bdff394ce9f6f9097 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Sat, 11 Jun 2022 14:20:13 -0400 Subject: [PATCH 063/114] merge prob --- runtime/gc/entanglement-suspects.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/runtime/gc/entanglement-suspects.c b/runtime/gc/entanglement-suspects.c index e42cea1f7..53de88b80 100644 --- a/runtime/gc/entanglement-suspects.c +++ b/runtime/gc/entanglement-suspects.c @@ -6,7 +6,6 @@ static inline bool mark_suspect(objptr op) { pointer p = objptrToPointer(op, NULL); GC_header header = __sync_fetch_and_or(getHeaderp(p), SUSPECT_MASK); -<<<<<<< HEAD assert(1 == (header & GC_VALID_HEADER_MASK)); // while (TRUE) // { @@ -31,9 +30,6 @@ static inline bool mark_suspect(objptr op) // } // DIE("should be impossible to reach here"); /*return true if this call marked the header, false if someone else did*/ -======= - assert (1 == (header & GC_VALID_HEADER_MASK)); ->>>>>>> 8be0c306b8060889b99715b867139e446c8c273f return !suspicious_header(header); } From 62d23614458022609106438104c6326be3933d64 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Sun, 12 Jun 2022 16:45:14 -0400 Subject: [PATCH 064/114] wrong assert --- runtime/gc/assign.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/runtime/gc/assign.c b/runtime/gc/assign.c index 7ed604512..fdce227de 100644 --- a/runtime/gc/assign.c +++ b/runtime/gc/assign.c @@ -20,11 +20,11 @@ objptr Assignable_decheckObjptr(objptr dst, objptr src) { return src; } - assert(ES_contains(NULL, dst)); HM_EBR_leaveQuiescentState(s); if (!decheck(s, src)) { + assert(ES_contains(NULL, dst)); assert (isMutable(s, dstp)); new_src = manage_entangled(s, src, getThreadCurrent(s)->decheckState); assert (isPinned(new_src)); @@ -49,10 +49,10 @@ objptr Assignable_readBarrier( { return ptr; } - assert (ES_contains(NULL, obj)); HM_EBR_leaveQuiescentState(s); if (!decheck(s, ptr)) { + assert (ES_contains(NULL, obj)); // assert (isMutable(s, obj)); // if (!ES_contains(NULL, obj)) // { @@ -90,7 +90,7 @@ objptr Assignable_readBarrier( static inline bool decheck_opt_fast (GC_state s, pointer p) { HM_HierarchicalHeap hh = HM_getLevelHead(HM_getChunkOf(p)); - return (hh->depth == 1) || hh == getThreadCurrent(s)->hierarchicalHeap; + return (hh->depth <= 1) || hh == getThreadCurrent(s)->hierarchicalHeap; } From 40823b0fb6ffb487622cddaeb1e21117acefae37 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Fri, 24 Jun 2022 10:03:56 -0400 Subject: [PATCH 065/114] force depth 1 eagerly --- basis-library/schedulers/shh/Scheduler.sml | 1 + 1 file changed, 1 insertion(+) diff --git a/basis-library/schedulers/shh/Scheduler.sml b/basis-library/schedulers/shh/Scheduler.sml index a97c85312..ca5c5f5c8 100644 --- a/basis-library/schedulers/shh/Scheduler.sml +++ b/basis-library/schedulers/shh/Scheduler.sml @@ -568,6 +568,7 @@ struct if HH.getDepth originalThread = 0 then () else die (fn _ => "scheduler bug: root depth <> 0") val _ = HH.setDepth (originalThread, 1) + val _ = HH.forceLeftHeap (myWorkerId(), originalThread) (* implicitly attaches worker child heaps *) val _ = MLton.Parallel.initializeProcessors () From 99d06e785ea163a51e75c63f2ae0de634b284ae2 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Fri, 24 Jun 2022 10:04:33 -0400 Subject: [PATCH 066/114] cas enters RB without candidate, so skip such cases --- runtime/gc/assign.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/runtime/gc/assign.c b/runtime/gc/assign.c index fdce227de..08e856722 100644 --- a/runtime/gc/assign.c +++ b/runtime/gc/assign.c @@ -16,7 +16,7 @@ objptr Assignable_decheckObjptr(objptr dst, objptr src) pointer dstp = objptrToPointer(dst, NULL); HM_HierarchicalHeap dstHH = HM_getLevelHead(HM_getChunkOf(dstp)); - if (!isObjptr(src) || HM_HH_getDepth(dstHH) == 0) + if (!isObjptr(src) || HM_HH_getDepth(dstHH) == 0 || !ES_contains(NULL, dst)) { return src; } @@ -24,7 +24,6 @@ objptr Assignable_decheckObjptr(objptr dst, objptr src) HM_EBR_leaveQuiescentState(s); if (!decheck(s, src)) { - assert(ES_contains(NULL, dst)); assert (isMutable(s, dstp)); new_src = manage_entangled(s, src, getThreadCurrent(s)->decheckState); assert (isPinned(new_src)); @@ -45,7 +44,7 @@ objptr Assignable_readBarrier( objptr ptr = *field; pointer objp = objptrToPointer(obj, NULL); HM_HierarchicalHeap objHH = HM_getLevelHead(HM_getChunkOf(objp)); - if (!isObjptr(ptr) || HM_HH_getDepth(objHH) == 0) + if (!isObjptr(ptr) || HM_HH_getDepth(objHH) == 0 || !ES_contains(NULL, obj)) { return ptr; } From 7ff262193a84e998ef36d4b1fbebc8a9438df408 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Fri, 24 Jun 2022 10:39:17 -0400 Subject: [PATCH 067/114] fishy commit and only add to concurrent remset if you are the root of entanglement; i don't like any of this but i will live with it --- runtime/gc/concurrent-collection.c | 6 +- runtime/gc/decheck.c | 16 +++- runtime/gc/decheck.h | 1 + runtime/gc/hierarchical-heap-collection.c | 8 +- runtime/gc/remembered-set.c | 106 ++++++++++++++++++++-- runtime/gc/remembered-set.h | 4 +- 6 files changed, 120 insertions(+), 21 deletions(-) diff --git a/runtime/gc/concurrent-collection.c b/runtime/gc/concurrent-collection.c index fe3eee11b..833d81225 100644 --- a/runtime/gc/concurrent-collection.c +++ b/runtime/gc/concurrent-collection.c @@ -853,7 +853,7 @@ void CC_filterPinned( /** Save "valid" entries to newRemSet, throw away old entries, and store * valid entries back into the main remembered set. */ - HM_foreachRemembered(s, oldRemSet, &closure); + HM_foreachRemembered(s, oldRemSet, &closure, false); struct CC_chunkInfo info = {.initialDepth = initialDepth, @@ -986,7 +986,7 @@ size_t CC_collectWithRoots( struct HM_foreachDownptrClosure forwardPinnedClosure = {.fun = forwardPinned, .env = (void*)&lists}; - HM_foreachRemembered(s, HM_HH_getRemSet(targetHH), &forwardPinnedClosure); + HM_foreachRemembered(s, HM_HH_getRemSet(targetHH), &forwardPinnedClosure, false); // forward closures, stack and deque? forceForward(s, &(cp->snapLeft), &lists); @@ -1046,7 +1046,7 @@ size_t CC_collectWithRoots( struct HM_foreachDownptrClosure unmarkPinnedClosure = {.fun = unmarkPinned, .env = &lists}; - HM_foreachRemembered(s, HM_HH_getRemSet(targetHH), &unmarkPinnedClosure); + HM_foreachRemembered(s, HM_HH_getRemSet(targetHH), &unmarkPinnedClosure, false); forceUnmark(s, &(cp->snapLeft), &lists); forceUnmark(s, &(cp->snapRight), &lists); diff --git a/runtime/gc/decheck.c b/runtime/gc/decheck.c index 920926268..77be7114d 100644 --- a/runtime/gc/decheck.c +++ b/runtime/gc/decheck.c @@ -378,20 +378,22 @@ void make_entangled( if (pinType(header) != PIN_ANY || unpinDepthOfH(header) > unpinDepth) { + bool addToRemSet = mea->firstCall; if (mutable) { new_ptr = pinObjectInfo(ptr, unpinDepth, PIN_ANY, &headerChange, &pinChange); } else { + mea->firstCall = false; struct GC_foreachObjptrClosure emanageClosure = {.fun = make_entangled, .env = rawArgs}; // the unpinDepth of reachable maybe smaller. - mea->unpinDepth = min(unpinDepth, unpinDepthOfH(header)); + mea->unpinDepth = pinType(header) == PIN_NONE ? unpinDepth : min(unpinDepth, unpinDepthOfH(header)); foreachObjptrInObject(s, p_ptr, &trueObjptrPredicateClosure, &emanageClosure, FALSE); new_ptr = pinObjectInfo(ptr, unpinDepth, PIN_ANY, &headerChange, &pinChange); assert(pinType(getHeader(objptrToPointer(new_ptr, NULL))) == PIN_ANY); } - if (pinChange) + if (pinChange && addToRemSet) { struct HM_remembered remElem_ = {.object = new_ptr, .from = BOGUS_OBJPTR}; HM_HH_rememberAtLevel(HM_getLevelHead(chunk), &(remElem_), true); @@ -446,15 +448,23 @@ objptr manage_entangled( uint32_t unpinDepth = lcaHeapDepth(reader, allocator); GC_header header = getHeader(objptrToPointer (ptr, NULL)); + bool manage = isFwdHeader(header) || pinType (header) != PIN_ANY || unpinDepthOfH(header) > unpinDepth; + if (pinType(header) != PIN_NONE && unpinDepthOfH(header) == 0) + { + return ptr; + } + if (manage) { + uint32_t newUnpinDepth = pinType(header) == PIN_NONE ? unpinDepth : min(unpinDepthOfH(header), unpinDepth); struct ManageEntangledArgs mea = { .reader = reader, .root = allocator, - .unpinDepth = min(unpinDepthOfH(header), unpinDepth) + .unpinDepth = newUnpinDepth, + .firstCall = true }; make_entangled(s, &ptr, ptr, (void*) &mea); } diff --git a/runtime/gc/decheck.h b/runtime/gc/decheck.h index 6b8040d22..b43a91ef3 100644 --- a/runtime/gc/decheck.h +++ b/runtime/gc/decheck.h @@ -25,6 +25,7 @@ struct ManageEntangledArgs decheck_tid_t reader; decheck_tid_t root; uint32_t unpinDepth; + bool firstCall; }; #define DECHECK_BOGUS_BITS ((uint64_t)0) diff --git a/runtime/gc/hierarchical-heap-collection.c b/runtime/gc/hierarchical-heap-collection.c index 8a14434c4..c189c0928 100644 --- a/runtime/gc/hierarchical-heap-collection.c +++ b/runtime/gc/hierarchical-heap-collection.c @@ -459,7 +459,7 @@ void HM_HHC_collectLocal(uint32_t desiredScope) struct HM_foreachDownptrClosure closure = {.fun = tryUnpinOrKeepPinned, .env = (void *)&forwardHHObjptrArgs}; - HM_foreachRemembered(s, HM_HH_getRemSet(cursor), &closure); + HM_foreachRemembered(s, HM_HH_getRemSet(cursor), &closure, true); } forwardHHObjptrArgs.concurrent = false; forwardHHObjptrArgs.toDepth = HM_HH_INVALID_DEPTH; @@ -762,7 +762,7 @@ void HM_HHC_collectLocal(uint32_t desiredScope) if (toSpaceLevel != NULL) { struct HM_foreachDownptrClosure unmarkClosure = {.fun = unmarkWrapper, .env = NULL}; - HM_foreachPublic(s, HM_HH_getRemSet(toSpaceLevel), &unmarkClosure); + HM_foreachPublic(s, HM_HH_getRemSet(toSpaceLevel), &unmarkClosure, true); } /* unset the flags on pinned chunks and update their HH pointer */ @@ -877,7 +877,7 @@ void HM_HHC_collectLocal(uint32_t desiredScope) { struct HM_foreachDownptrClosure closure = {.fun = checkRememberedEntry, .env = (void *)cursor}; - HM_foreachRemembered(s, HM_HH_getRemSet(cursor), &closure); + HM_foreachRemembered(s, HM_HH_getRemSet(cursor), &closure, false); } // make sure that original representatives haven't been messed up @@ -1417,8 +1417,6 @@ void addEntangledToRemSet( markObj(p); struct HM_remembered remElem_ = {.object = op, .from = BOGUS_OBJPTR}; HM_remember (HM_HH_getRemSet(toSpaceHH(s, args, opDepth)), &remElem_, true); - - // traverseAndCheck(s, &op, op, NULL); } } diff --git a/runtime/gc/remembered-set.c b/runtime/gc/remembered-set.c index 07504d16b..d67798801 100644 --- a/runtime/gc/remembered-set.c +++ b/runtime/gc/remembered-set.c @@ -37,36 +37,126 @@ void HM_foreachPrivate( } } +typedef struct FishyChunk +{ + HM_chunk chunk; + pointer scanned; +} FishyChunk; + +void makeChunkFishy(FishyChunk * fc, HM_chunk chunk, pointer frontier, int* numFishyChunks) { + (fc[*numFishyChunks]).chunk = chunk; + (fc[*numFishyChunks]).scanned = frontier; + *numFishyChunks = *numFishyChunks + 1; + return; +} + +void checkFishyChunks(GC_state s, + FishyChunk * fishyChunks, + int numFishyChunks, + HM_foreachDownptrClosure f) +{ + if (fishyChunks == NULL) { + return; + } + while (TRUE) { + int i = numFishyChunks - 1; + bool changed = false; + while (i >= 0) + { + HM_chunk chunk = fishyChunks[i].chunk; + pointer p = fishyChunks[i].scanned; + pointer frontier = HM_getChunkFrontier(chunk); + while (p < frontier) + { + f->fun(s, (HM_remembered)p, f->env); + p += sizeof(struct HM_remembered); + } + if (p != fishyChunks[i].scanned) { + fishyChunks[i].scanned = p; + changed = true; + } + i --; + } + if (!changed) { + return; + } + } +} + void HM_foreachPublic ( GC_state s, HM_remSet remSet, - HM_foreachDownptrClosure f) + HM_foreachDownptrClosure f, + bool trackFishyChunks) { - while (TRUE) - { + + if ((remSet->public).firstChunk == NULL) { + return; + } + + if (!trackFishyChunks) { struct HM_chunkList _chunkList; HM_chunkList chunkList = &(_chunkList); CC_popAsChunkList(&(remSet->public), chunkList); - if (chunkList->firstChunk == NULL) + HM_foreachPrivate(s, chunkList, f); + HM_appendChunkList(&(remSet->private), chunkList); + return; + } + + HM_chunk chunk = (remSet->public).firstChunk; + FishyChunk* fishyChunks = malloc(sizeof(struct FishyChunk) * 2 * s->numberOfProcs); + int numFishyChunks = 0; + while (TRUE) + { + if (chunk == NULL) { break; } else { - HM_foreachPrivate(s, chunkList, f); - HM_appendChunkList(&(remSet->private), chunkList); + while (chunk != NULL) + { + pointer p = HM_getChunkStart(chunk); + pointer frontier = HM_getChunkFrontier(chunk); + while (p < frontier) + { + f->fun(s, (HM_remembered)p, f->env); + p += sizeof(struct HM_remembered); + } + if ((chunk->retireChunk || chunk->nextChunk == NULL)) + { + if (numFishyChunks < 2 * s->numberOfProcs) { + makeChunkFishy(fishyChunks, chunk, p, &numFishyChunks); + } + else { + DIE("Fishy chunk limit exceeded"); + } + } + chunk = chunk->nextChunk; + } + checkFishyChunks(s, fishyChunks, numFishyChunks, f); + HM_chunk lastChunk = (remSet->public).lastChunk; + if (lastChunk != fishyChunks[numFishyChunks - 1].chunk) { + chunk = lastChunk; + } } + struct HM_chunkList _chunkList; + HM_chunkList chunkList = &(_chunkList); + CC_popAsChunkList(&(remSet->public), chunkList); + HM_appendChunkList(&(remSet->private), chunkList); } + free(fishyChunks); } void HM_foreachRemembered( GC_state s, HM_remSet remSet, - HM_foreachDownptrClosure f) + HM_foreachDownptrClosure f, + bool trackFishyChunks) { assert(remSet != NULL); HM_foreachPrivate(s, &(remSet->private), f); - HM_foreachPublic(s, remSet, f); + HM_foreachPublic(s, remSet, f, trackFishyChunks); } diff --git a/runtime/gc/remembered-set.h b/runtime/gc/remembered-set.h index b84dfa8c8..ce0cfad40 100644 --- a/runtime/gc/remembered-set.h +++ b/runtime/gc/remembered-set.h @@ -52,9 +52,9 @@ void HM_initRemSet(HM_remSet remSet); void HM_freeRemSetWithInfo(GC_state s, HM_remSet remSet, void* info); void HM_remember(HM_remSet remSet, HM_remembered remElem, bool conc); void HM_appendRemSet(HM_remSet r1, HM_remSet r2); -void HM_foreachRemembered(GC_state s, HM_remSet remSet, HM_foreachDownptrClosure f); +void HM_foreachRemembered(GC_state s, HM_remSet remSet, HM_foreachDownptrClosure f, bool trackFishyChunks); size_t HM_numRemembered(HM_remSet remSet); -void HM_foreachPublic(GC_state s, HM_remSet remSet, HM_foreachDownptrClosure f); +void HM_foreachPublic(GC_state s, HM_remSet remSet, HM_foreachDownptrClosure f, bool trackFishyChunks); void HM_foreachPrivate(GC_state s, HM_chunkList list,HM_foreachDownptrClosure f); #endif /* defined (MLTON_GC_INTERNAL_BASIS) */ From 76ac4d4e32edae9401bcec8970075974fdc9313b Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Fri, 24 Jun 2022 10:40:08 -0400 Subject: [PATCH 068/114] lock-free for the most part --- runtime/gc/concurrent-list.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/runtime/gc/concurrent-list.c b/runtime/gc/concurrent-list.c index c4ece3b1a..1b3c3ae65 100644 --- a/runtime/gc/concurrent-list.c +++ b/runtime/gc/concurrent-list.c @@ -10,9 +10,9 @@ void allocateChunkInConcList( HM_chunk lastChunk) { GC_state s = pthread_getspecific(gcstate_key); - // pthread_mutex_lock(&concList->mutex); + pthread_mutex_lock(&concList->mutex); if(concList->lastChunk != lastChunk) { - // pthread_mutex_unlock(&concList->mutex); + pthread_mutex_unlock(&concList->mutex); return; } @@ -39,12 +39,12 @@ void allocateChunkInConcList( { concList->firstChunk = chunk; } - // pthread_mutex_unlock(&concList->mutex); + pthread_mutex_unlock(&concList->mutex); } pointer CC_storeInConcList(CC_concList concList, void* p, size_t objSize){ assert(concList != NULL); - pthread_mutex_lock(&concList->mutex); + // pthread_mutex_lock(&concList->mutex); while(TRUE) { HM_chunk chunk = concList->lastChunk; if (NULL == chunk) { @@ -62,12 +62,12 @@ pointer CC_storeInConcList(CC_concList concList, void* p, size_t objSize){ bool success = __sync_bool_compare_and_swap(&(chunk->frontier), frontier, new_frontier); if (success) { memcpy(frontier, p, objSize); - pthread_mutex_unlock(&concList->mutex); + // pthread_mutex_unlock(&concList->mutex); return frontier; } } } - pthread_mutex_unlock(&concList->mutex); + // pthread_mutex_unlock(&concList->mutex); DIE("should never come here"); return NULL; } @@ -166,6 +166,7 @@ void CC_appendConcList(CC_concList concList1, CC_concList concList2) { return; } + pthread_mutex_lock(&concList1->mutex); if (concList1->lastChunk == NULL) { concList1->firstChunk = firstChunk; @@ -173,6 +174,7 @@ void CC_appendConcList(CC_concList concList1, CC_concList concList2) { } else { concList1->lastChunk->nextChunk = firstChunk; + concList1->lastChunk->retireChunk = true; firstChunk->prevChunk = concList1->lastChunk; concList1->lastChunk = lastChunk; } From ce9b69a0cee450aee9bf70b80bcde9bb5bba1bfc Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Fri, 24 Jun 2022 16:55:01 -0400 Subject: [PATCH 069/114] bddm dm dm another one bites the dust --- runtime/gc/decheck.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/runtime/gc/decheck.c b/runtime/gc/decheck.c index 77be7114d..88b7a67b0 100644 --- a/runtime/gc/decheck.c +++ b/runtime/gc/decheck.c @@ -352,6 +352,13 @@ void traverseAndCheck( } #endif +static inline objptr getRacyFwdPtr(pointer p) { + while (isFwdHeader(getHeader(p))) { + p = objptrToPointer(getFwdPtr(p), NULL); + } + return pointerToObjptr(p, NULL); +} + void make_entangled( GC_state s, objptr *opp, @@ -372,10 +379,10 @@ void make_entangled( assert(!isFwdHeader(header)); bool mutable = isMutableH(s, header); bool headerChange = false, pinChange = false; - objptr new_ptr = ptr; // unpin depth according to the caller uint32_t unpinDepth = mea->unpinDepth; + objptr new_ptr; if (pinType(header) != PIN_ANY || unpinDepthOfH(header) > unpinDepth) { bool addToRemSet = mea->firstCall; @@ -399,6 +406,10 @@ void make_entangled( HM_HH_rememberAtLevel(HM_getLevelHead(chunk), &(remElem_), true); } } + else { + new_ptr = getRacyFwdPtr(ptr); + } + mea->unpinDepth = unpinDepth; assert(!hasFwdPtr(objptrToPointer(new_ptr, NULL))); @@ -555,7 +566,6 @@ bool decheck(GC_state s, objptr ptr) { return false; #if 0 - s->cumulativeStatistics->numEntanglementsDetected++; /** set the chunk's disentangled depth. This synchronizes with GC, if there * is GC happening by the owner of this chunk. From 9fb56c288b80b0d6115793741100dd064bc56856 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Sat, 25 Jun 2022 13:44:25 -0400 Subject: [PATCH 070/114] race factor sampling at lgc --- basis-library/mpl/gc.sig | 2 +- basis-library/mpl/gc.sml | 4 ++-- basis-library/primitive/prim-mlton.sml | 2 +- runtime/gc/gc_state.c | 7 ++++--- runtime/gc/gc_state.h | 2 +- runtime/gc/hierarchical-heap-collection.c | 18 ++++++++++++++++++ runtime/gc/hierarchical-heap-collection.h | 1 + runtime/gc/statistics.c | 2 +- runtime/gc/statistics.h | 2 +- 9 files changed, 30 insertions(+), 10 deletions(-) diff --git a/basis-library/mpl/gc.sig b/basis-library/mpl/gc.sig index 8d0a6a87c..8020fd306 100644 --- a/basis-library/mpl/gc.sig +++ b/basis-library/mpl/gc.sig @@ -19,7 +19,7 @@ sig (* How many times entanglement has been detected at a read barrier. *) - val numberEntanglementsDetected: unit -> IntInf.int + val approxRaceFactor: unit -> Real32.real val numberSuspectsMarked: unit -> IntInf.int val numberSuspectsCleared: unit -> IntInf.int diff --git a/basis-library/mpl/gc.sml b/basis-library/mpl/gc.sml index 54e3e4021..e055d7a26 100644 --- a/basis-library/mpl/gc.sml +++ b/basis-library/mpl/gc.sml @@ -40,8 +40,8 @@ struct fun numberDisentanglementChecks () = C_UIntmax.toLargeInt (GC.numberDisentanglementChecks (gcState ())) - fun numberEntanglementsDetected () = - C_UIntmax.toLargeInt (GC.numberEntanglementsDetected (gcState ())) + fun approxRaceFactor () = + (GC.approxRaceFactor (gcState ())) fun numberSuspectsMarked () = C_UIntmax.toLargeInt (GC.numberSuspectsMarked (gcState ())) diff --git a/basis-library/primitive/prim-mlton.sml b/basis-library/primitive/prim-mlton.sml index d3df153d1..3224afac0 100644 --- a/basis-library/primitive/prim-mlton.sml +++ b/basis-library/primitive/prim-mlton.sml @@ -166,7 +166,7 @@ structure GC = val numberDisentanglementChecks = _import "GC_numDisentanglementChecks" runtime private: GCState.t -> C_UIntmax.t; - val numberEntanglementsDetected = _import "GC_numEntanglementsDetected" runtime private: GCState.t -> C_UIntmax.t; + val approxRaceFactor = _import "GC_approxRaceFactor" runtime private: GCState.t -> Real32.real; val numberSuspectsMarked = _import "GC_numSuspectsMarked" runtime private: GCState.t -> C_UIntmax.t; diff --git a/runtime/gc/gc_state.c b/runtime/gc/gc_state.c index 968b370c2..2afce5eb7 100644 --- a/runtime/gc/gc_state.c +++ b/runtime/gc/gc_state.c @@ -235,10 +235,11 @@ uintmax_t GC_numSuspectsCleared(GC_state s) return count; } -uintmax_t GC_numEntanglementsDetected(GC_state s) { - uintmax_t count = 0; +float GC_approxRaceFactor(GC_state s) +{ + float count = 0; for (uint32_t p = 0; p < s->numberOfProcs; p++) { - count += s->procStates[p].cumulativeStatistics->numEntanglementsDetected; + count = max(count, s->procStates[p].cumulativeStatistics->approxRaceFactor); } return count; } diff --git a/runtime/gc/gc_state.h b/runtime/gc/gc_state.h index 80e991943..104b3c32b 100644 --- a/runtime/gc/gc_state.h +++ b/runtime/gc/gc_state.h @@ -139,7 +139,7 @@ PRIVATE uintmax_t GC_getInternalCCMillisecondsOfProc(GC_state s, uint32_t proc); PRIVATE uintmax_t GC_getRootCCBytesReclaimedOfProc(GC_state s, uint32_t proc); PRIVATE uintmax_t GC_getInternalCCBytesReclaimedOfProc(GC_state s, uint32_t proc); PRIVATE uintmax_t GC_numDisentanglementChecks(GC_state s); -PRIVATE uintmax_t GC_numEntanglementsDetected(GC_state s); +PRIVATE float GC_approxRaceFactor(GC_state s); PRIVATE uintmax_t GC_numChecksSkipped(GC_state s); PRIVATE uintmax_t GC_numSuspectsMarked(GC_state s); PRIVATE uintmax_t GC_numSuspectsCleared(GC_state s); diff --git a/runtime/gc/hierarchical-heap-collection.c b/runtime/gc/hierarchical-heap-collection.c index c189c0928..2539fc669 100644 --- a/runtime/gc/hierarchical-heap-collection.c +++ b/runtime/gc/hierarchical-heap-collection.c @@ -309,6 +309,7 @@ void HM_HHC_collectLocal(uint32_t desiredScope) .pinned = NULL, .containingObject = BOGUS_OBJPTR, .bytesCopied = 0, + .entangledBytes = 0, .objectsCopied = 0, .stacksCopied = 0, .bytesMoved = 0, @@ -903,6 +904,10 @@ void HM_HHC_collectLocal(uint32_t desiredScope) thread->bytesSurvivedLastCollection = forwardHHObjptrArgs.bytesMoved + forwardHHObjptrArgs.bytesCopied; + float new_rf = forwardHHObjptrArgs.entangledBytes; + + s->cumulativeStatistics->approxRaceFactor = max(s->cumulativeStatistics->approxRaceFactor, new_rf); + thread->bytesAllocatedSinceLastCollection = 0; // sizes info and stats @@ -1417,6 +1422,19 @@ void addEntangledToRemSet( markObj(p); struct HM_remembered remElem_ = {.object = op, .from = BOGUS_OBJPTR}; HM_remember (HM_HH_getRemSet(toSpaceHH(s, args, opDepth)), &remElem_, true); + + size_t metaDataBytes; + size_t objectBytes; + size_t copyBytes; + + /* compute object size and bytes to be copied */ + computeObjectCopyParameters(s, + header, + p, + &objectBytes, + ©Bytes, + &metaDataBytes); + args->entangledBytes += copyBytes; } } diff --git a/runtime/gc/hierarchical-heap-collection.h b/runtime/gc/hierarchical-heap-collection.h index 9377789b4..a5014f559 100644 --- a/runtime/gc/hierarchical-heap-collection.h +++ b/runtime/gc/hierarchical-heap-collection.h @@ -40,6 +40,7 @@ struct ForwardHHObjptrArgs objptr containingObject; size_t bytesCopied; + size_t entangledBytes; uint64_t objectsCopied; uint64_t stacksCopied; diff --git a/runtime/gc/statistics.c b/runtime/gc/statistics.c index 176908d4d..b8eff6bbe 100644 --- a/runtime/gc/statistics.c +++ b/runtime/gc/statistics.c @@ -73,7 +73,7 @@ struct GC_cumulativeStatistics *newCumulativeStatistics(void) { cumulativeStatistics->numChecksSkipped = 0; cumulativeStatistics->numSuspectsMarked = 0; cumulativeStatistics->numSuspectsCleared = 0; - cumulativeStatistics->numEntanglementsDetected = 0; + cumulativeStatistics->approxRaceFactor = 0; cumulativeStatistics->timeLocalGC.tv_sec = 0; cumulativeStatistics->timeLocalGC.tv_nsec = 0; diff --git a/runtime/gc/statistics.h b/runtime/gc/statistics.h index 349aaf7f7..b93154268 100644 --- a/runtime/gc/statistics.h +++ b/runtime/gc/statistics.h @@ -69,7 +69,7 @@ struct GC_cumulativeStatistics { uintmax_t numChecksSkipped; uintmax_t numSuspectsMarked; uintmax_t numSuspectsCleared; - uintmax_t numEntanglementsDetected; + float approxRaceFactor; struct timespec timeLocalGC; struct timespec timeLocalPromo; From 2ed27e8db579e35eb0b831febb11d6d43e37c39f Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Sun, 26 Jun 2022 22:50:48 -0400 Subject: [PATCH 071/114] hijack root collected cc to report R* --- basis-library/mpl/gc.sml | 2 +- runtime/gc/concurrent-collection.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/basis-library/mpl/gc.sml b/basis-library/mpl/gc.sml index e055d7a26..c87b6f718 100644 --- a/basis-library/mpl/gc.sml +++ b/basis-library/mpl/gc.sml @@ -163,7 +163,7 @@ struct fun rootBytesReclaimed () = C_UIntmax.toLargeInt - (sumAllProcs C_UIntmax.+ getRootCCBytesReclaimedOfProc) + (sumAllProcs C_UIntmax.max getRootCCBytesReclaimedOfProc) fun internalBytesReclaimed () = C_UIntmax.toLargeInt diff --git a/runtime/gc/concurrent-collection.c b/runtime/gc/concurrent-collection.c index 833d81225..321993b28 100644 --- a/runtime/gc/concurrent-collection.c +++ b/runtime/gc/concurrent-collection.c @@ -1189,7 +1189,7 @@ size_t CC_collectWithRoots( if (isConcurrent) { timespec_add(&(s->cumulativeStatistics->timeRootCC), &stopTime); s->cumulativeStatistics->numRootCCs++; - s->cumulativeStatistics->bytesReclaimedByRootCC += bytesScanned-bytesSaved; + s->cumulativeStatistics->bytesReclaimedByRootCC = max(s->cumulativeStatistics->bytesReclaimedByRootCC, bytesSaved); } else { timespec_add(&(s->cumulativeStatistics->timeInternalCC), &stopTime); s->cumulativeStatistics->numInternalCCs++; From 364ab74b4eca9d834b7274555fbfa6d44c8f1031 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Sun, 26 Jun 2022 22:58:42 -0400 Subject: [PATCH 072/114] fix concurrency bug b/w collector and remset writer --- runtime/gc/concurrent-list.c | 14 +++++- runtime/gc/concurrent-list.h | 1 + runtime/gc/remembered-set.c | 91 ++++++++++++++++++++---------------- 3 files changed, 63 insertions(+), 43 deletions(-) diff --git a/runtime/gc/concurrent-list.c b/runtime/gc/concurrent-list.c index 1b3c3ae65..ad299f150 100644 --- a/runtime/gc/concurrent-list.c +++ b/runtime/gc/concurrent-list.c @@ -58,10 +58,12 @@ pointer CC_storeInConcList(CC_concList concList, void* p, size_t objSize){ allocateChunkInConcList(concList, objSize, chunk); continue; } + + memcpy(frontier, p, objSize); pointer new_frontier = frontier + objSize; bool success = __sync_bool_compare_and_swap(&(chunk->frontier), frontier, new_frontier); - if (success) { - memcpy(frontier, p, objSize); + if (memcmp(frontier, p, objSize) == 0) + { // pthread_mutex_unlock(&concList->mutex); return frontier; } @@ -152,6 +154,14 @@ void CC_popAsChunkList(CC_concList concList, HM_chunkList chunkList) { pthread_mutex_unlock(&concList->mutex); } +HM_chunk CC_getLastChunk (CC_concList concList) { + HM_chunk c; + pthread_mutex_lock(&concList->mutex); + c = concList->lastChunk; + pthread_mutex_unlock(&concList->mutex); + return c; +} + void CC_appendConcList(CC_concList concList1, CC_concList concList2) { HM_chunk firstChunk, lastChunk; diff --git a/runtime/gc/concurrent-list.h b/runtime/gc/concurrent-list.h index a2d2392ff..99c1cd30f 100644 --- a/runtime/gc/concurrent-list.h +++ b/runtime/gc/concurrent-list.h @@ -30,6 +30,7 @@ pointer CC_storeInConcList(CC_concList concList, void* p, size_t objSize); // void CC_foreachRemInConc(GC_state s, CC_concList concList, struct HM_foreachDownptrClosure* f); void CC_popAsChunkList(CC_concList concList, HM_chunkList chunkList); +HM_chunk CC_getLastChunk (CC_concList concList); void CC_freeChunksInConcListWithInfo(GC_state s, CC_concList concList, void *info); void CC_appendConcList(CC_concList concList1, CC_concList concList2); diff --git a/runtime/gc/remembered-set.c b/runtime/gc/remembered-set.c index d67798801..f7766b7be 100644 --- a/runtime/gc/remembered-set.c +++ b/runtime/gc/remembered-set.c @@ -50,6 +50,16 @@ void makeChunkFishy(FishyChunk * fc, HM_chunk chunk, pointer frontier, int* numF return; } +FishyChunk * resizeFishyArray (FishyChunk * fishyChunks, int * currentSize) { + int cs = *currentSize; + int new_size = 2 * cs; + FishyChunk * fc = malloc(sizeof(struct FishyChunk) * new_size); + memcpy(fc, fishyChunks, sizeof(struct FishyChunk) * cs); + *currentSize = new_size; + free(fishyChunks); + return fc; +} + void checkFishyChunks(GC_state s, FishyChunk * fishyChunks, int numFishyChunks, @@ -58,18 +68,26 @@ void checkFishyChunks(GC_state s, if (fishyChunks == NULL) { return; } - while (TRUE) { + bool changed = true; + while (changed) { int i = numFishyChunks - 1; - bool changed = false; + changed = false; while (i >= 0) { HM_chunk chunk = fishyChunks[i].chunk; pointer p = fishyChunks[i].scanned; pointer frontier = HM_getChunkFrontier(chunk); - while (p < frontier) + while (TRUE) { - f->fun(s, (HM_remembered)p, f->env); - p += sizeof(struct HM_remembered); + while (p < frontier) + { + f->fun(s, (HM_remembered)p, f->env); + p += sizeof(struct HM_remembered); + } + frontier = HM_getChunkFrontier(chunk); + if (p >= frontier) { + break; + } } if (p != fishyChunks[i].scanned) { fishyChunks[i].scanned = p; @@ -77,9 +95,6 @@ void checkFishyChunks(GC_state s, } i --; } - if (!changed) { - return; - } } } @@ -104,48 +119,42 @@ void HM_foreachPublic ( } HM_chunk chunk = (remSet->public).firstChunk; - FishyChunk* fishyChunks = malloc(sizeof(struct FishyChunk) * 2 * s->numberOfProcs); + HM_chunk lastChunk = CC_getLastChunk (&(remSet->public)); + int array_size = 2 * s->numberOfProcs; + FishyChunk* fishyChunks = malloc(sizeof(struct FishyChunk) * array_size); int numFishyChunks = 0; - while (TRUE) + while (chunk != NULL) { - if (chunk == NULL) - { - break; - } - else + while (chunk != NULL) { - while (chunk != NULL) + pointer p = HM_getChunkStart(chunk); + pointer frontier = HM_getChunkFrontier(chunk); + while (p < frontier) { - pointer p = HM_getChunkStart(chunk); - pointer frontier = HM_getChunkFrontier(chunk); - while (p < frontier) - { - f->fun(s, (HM_remembered)p, f->env); - p += sizeof(struct HM_remembered); - } - if ((chunk->retireChunk || chunk->nextChunk == NULL)) - { - if (numFishyChunks < 2 * s->numberOfProcs) { - makeChunkFishy(fishyChunks, chunk, p, &numFishyChunks); - } - else { - DIE("Fishy chunk limit exceeded"); - } - } - chunk = chunk->nextChunk; + f->fun(s, (HM_remembered)p, f->env); + p += sizeof(struct HM_remembered); } - checkFishyChunks(s, fishyChunks, numFishyChunks, f); - HM_chunk lastChunk = (remSet->public).lastChunk; - if (lastChunk != fishyChunks[numFishyChunks - 1].chunk) { - chunk = lastChunk; + if ((chunk->retireChunk || chunk->nextChunk == NULL)) + { + if (numFishyChunks >= array_size) { + fishyChunks = resizeFishyArray(fishyChunks, &array_size); + } + makeChunkFishy(fishyChunks, chunk, p, &numFishyChunks); } + chunk = chunk->nextChunk; + } + checkFishyChunks(s, fishyChunks, numFishyChunks, f); + lastChunk = CC_getLastChunk(&(remSet->public)); + if (lastChunk != fishyChunks[numFishyChunks - 1].chunk) { + assert (chunk->nextChunk != NULL); + chunk = chunk->nextChunk; } - struct HM_chunkList _chunkList; - HM_chunkList chunkList = &(_chunkList); - CC_popAsChunkList(&(remSet->public), chunkList); - HM_appendChunkList(&(remSet->private), chunkList); } free(fishyChunks); + struct HM_chunkList _chunkList; + HM_chunkList chunkList = &(_chunkList); + CC_popAsChunkList(&(remSet->public), chunkList); + HM_appendChunkList(&(remSet->private), chunkList); } void HM_foreachRemembered( From 807e41eebd469964e58f81b51c625aea4cfb13f1 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Sun, 26 Jun 2022 22:59:17 -0400 Subject: [PATCH 073/114] make remset changes in cc concurrent --- runtime/gc/concurrent-collection.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/runtime/gc/concurrent-collection.c b/runtime/gc/concurrent-collection.c index 321993b28..71dddd8d6 100644 --- a/runtime/gc/concurrent-collection.c +++ b/runtime/gc/concurrent-collection.c @@ -863,8 +863,12 @@ void CC_filterPinned( struct writeFreedBlockInfoFnClosure infoc = {.fun = CC_writeFreeChunkInfo, .env = &info}; - HM_freeRemSetWithInfo(s, oldRemSet, &infoc); - *oldRemSet = newRemSet; // this moves all data into remset of hh + // HM_freeRemSetWithInfo(s, oldRemSet, &infoc); + // this reintializes the private remset + HM_freeChunksInListWithInfo(s, &(oldRemSet->private), &infoc); + assert (newRemSet.public.firstChunk == NULL); + // this moves all data into remset of hh + HM_appendRemSet(oldRemSet, &newRemSet); // assert(HM_HH_getRemSet(hh)->firstChunk == newRemSet.firstChunk); // assert(HM_HH_getRemSet(hh)->lastChunk == newRemSet.lastChunk); From 1af991c7bf38e69b01c6511367aa70867c367b75 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Sun, 26 Jun 2022 23:00:10 -0400 Subject: [PATCH 074/114] brand new optimization that skips addition to concurrent remset --- runtime/gc/decheck.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/runtime/gc/decheck.c b/runtime/gc/decheck.c index 88b7a67b0..d437d6a1a 100644 --- a/runtime/gc/decheck.c +++ b/runtime/gc/decheck.c @@ -404,10 +404,11 @@ void make_entangled( { struct HM_remembered remElem_ = {.object = new_ptr, .from = BOGUS_OBJPTR}; HM_HH_rememberAtLevel(HM_getLevelHead(chunk), &(remElem_), true); + assert (HM_HH_getDepth(HM_getLevelHead(chunk)) != 1); } } else { - new_ptr = getRacyFwdPtr(ptr); + new_ptr = getRacyFwdPtr(p_ptr); } mea->unpinDepth = unpinDepth; @@ -460,22 +461,24 @@ objptr manage_entangled( GC_header header = getHeader(objptrToPointer (ptr, NULL)); + uint32_t current_ud = unpinDepthOfH(header); + enum PinType current_pt = pinType(header); bool manage = isFwdHeader(header) || - pinType (header) != PIN_ANY || - unpinDepthOfH(header) > unpinDepth; + current_pt != PIN_ANY || + current_ud > unpinDepth; - if (pinType(header) != PIN_NONE && unpinDepthOfH(header) == 0) + if (current_pt != PIN_NONE && current_ud == 0) { return ptr; } if (manage) { - uint32_t newUnpinDepth = pinType(header) == PIN_NONE ? unpinDepth : min(unpinDepthOfH(header), unpinDepth); + uint32_t newUnpinDepth = current_pt == PIN_NONE ? unpinDepth : min(current_ud, unpinDepth); struct ManageEntangledArgs mea = { .reader = reader, .root = allocator, .unpinDepth = newUnpinDepth, - .firstCall = true + .firstCall = !(current_pt == PIN_DOWN && current_ud == 1) }; make_entangled(s, &ptr, ptr, (void*) &mea); } From 3918e0e44e1ffac6f910c92ef95a348ab020aeba Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Mon, 27 Jun 2022 22:30:37 -0400 Subject: [PATCH 075/114] fix pinning bug --- runtime/gc/pin.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/runtime/gc/pin.c b/runtime/gc/pin.c index 5c07fb718..cea8a6d26 100644 --- a/runtime/gc/pin.c +++ b/runtime/gc/pin.c @@ -96,9 +96,10 @@ objptr pinObjectInfo(objptr op, uint32_t unpinDepth, enum PinType pt, /* if we are changing the unpinDepth, then the new pinType (nt) is * equal to the function argument pt. Otherwise its the max. */ enum PinType nt = newUnpinDepth < unpinDepthOfH(header) ? pt : maxPT(pt, pinType(header)); + GC_header unpinnedHeader = header & (~UNPIN_DEPTH_MASK) & (~PIN_MASK); GC_header newHeader = - (header & (~UNPIN_DEPTH_MASK)) // clear unpin bits + unpinnedHeader | ((GC_header)newUnpinDepth << UNPIN_DEPTH_SHIFT) // put in new unpinDepth | getRep(nt); // setup the pin type From 9ca04cb001f8ba8722ccb414d2037bd910fe4688 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Mon, 27 Jun 2022 22:31:16 -0400 Subject: [PATCH 076/114] initialize chunk before adding to concurrent list --- runtime/gc/concurrent-list.c | 7 +++++-- runtime/gc/remembered-set.c | 4 ++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/runtime/gc/concurrent-list.c b/runtime/gc/concurrent-list.c index ad299f150..01aa193ef 100644 --- a/runtime/gc/concurrent-list.c +++ b/runtime/gc/concurrent-list.c @@ -39,9 +39,12 @@ void allocateChunkInConcList( { concList->firstChunk = chunk; } + + memset((void *)HM_getChunkStart(chunk), '\0', HM_getChunkLimit(chunk) - HM_getChunkStart(chunk)); pthread_mutex_unlock(&concList->mutex); } + pointer CC_storeInConcList(CC_concList concList, void* p, size_t objSize){ assert(concList != NULL); // pthread_mutex_lock(&concList->mutex); @@ -59,11 +62,11 @@ pointer CC_storeInConcList(CC_concList concList, void* p, size_t objSize){ continue; } - memcpy(frontier, p, objSize); pointer new_frontier = frontier + objSize; bool success = __sync_bool_compare_and_swap(&(chunk->frontier), frontier, new_frontier); - if (memcmp(frontier, p, objSize) == 0) + if (success) { + memcpy(frontier, p, objSize); // pthread_mutex_unlock(&concList->mutex); return frontier; } diff --git a/runtime/gc/remembered-set.c b/runtime/gc/remembered-set.c index f7766b7be..209b833c0 100644 --- a/runtime/gc/remembered-set.c +++ b/runtime/gc/remembered-set.c @@ -79,7 +79,7 @@ void checkFishyChunks(GC_state s, pointer frontier = HM_getChunkFrontier(chunk); while (TRUE) { - while (p < frontier) + while (p < frontier && ((HM_remembered)p)->object != 0) { f->fun(s, (HM_remembered)p, f->env); p += sizeof(struct HM_remembered); @@ -129,7 +129,7 @@ void HM_foreachPublic ( { pointer p = HM_getChunkStart(chunk); pointer frontier = HM_getChunkFrontier(chunk); - while (p < frontier) + while (p < frontier && ((HM_remembered)p)->object != 0) { f->fun(s, (HM_remembered)p, f->env); p += sizeof(struct HM_remembered); From 0af5dab9c71c5cc6dc59e4f4bbfc945ca3617f64 Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Mon, 27 Jun 2022 22:31:43 -0400 Subject: [PATCH 077/114] don't need fishy chunks for unmarking phase --- runtime/gc/hierarchical-heap-collection.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/gc/hierarchical-heap-collection.c b/runtime/gc/hierarchical-heap-collection.c index 2539fc669..7bbc672da 100644 --- a/runtime/gc/hierarchical-heap-collection.c +++ b/runtime/gc/hierarchical-heap-collection.c @@ -763,7 +763,7 @@ void HM_HHC_collectLocal(uint32_t desiredScope) if (toSpaceLevel != NULL) { struct HM_foreachDownptrClosure unmarkClosure = {.fun = unmarkWrapper, .env = NULL}; - HM_foreachPublic(s, HM_HH_getRemSet(toSpaceLevel), &unmarkClosure, true); + HM_foreachPublic(s, HM_HH_getRemSet(toSpaceLevel), &unmarkClosure, false); } /* unset the flags on pinned chunks and update their HH pointer */ From a182c624b6ed2dab2dcfb5862c7f2c034730356f Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Tue, 28 Jun 2022 20:03:20 -0400 Subject: [PATCH 078/114] need true for unmarking --- runtime/gc/hierarchical-heap-collection.c | 2 +- runtime/gc/remembered-set.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/runtime/gc/hierarchical-heap-collection.c b/runtime/gc/hierarchical-heap-collection.c index 7bbc672da..2539fc669 100644 --- a/runtime/gc/hierarchical-heap-collection.c +++ b/runtime/gc/hierarchical-heap-collection.c @@ -763,7 +763,7 @@ void HM_HHC_collectLocal(uint32_t desiredScope) if (toSpaceLevel != NULL) { struct HM_foreachDownptrClosure unmarkClosure = {.fun = unmarkWrapper, .env = NULL}; - HM_foreachPublic(s, HM_HH_getRemSet(toSpaceLevel), &unmarkClosure, false); + HM_foreachPublic(s, HM_HH_getRemSet(toSpaceLevel), &unmarkClosure, true); } /* unset the flags on pinned chunks and update their HH pointer */ diff --git a/runtime/gc/remembered-set.c b/runtime/gc/remembered-set.c index 209b833c0..7bab8924f 100644 --- a/runtime/gc/remembered-set.c +++ b/runtime/gc/remembered-set.c @@ -28,7 +28,7 @@ void HM_foreachPrivate( { pointer p = HM_getChunkStart(chunk); pointer frontier = HM_getChunkFrontier(chunk); - while (p < frontier) + while (p < frontier && ((HM_remembered)p)->object != 0) { f->fun(s, (HM_remembered)p, f->env); p += sizeof(struct HM_remembered); From ca719703a7d9ce104bc0f140b746a01fc1ae3f9e Mon Sep 17 00:00:00 2001 From: Jatin Arora Date: Wed, 29 Jun 2022 10:14:02 -0400 Subject: [PATCH 079/114] fix perf bug and potential deadlock --- runtime/gc/concurrent-list.c | 42 ++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/runtime/gc/concurrent-list.c b/runtime/gc/concurrent-list.c index 01aa193ef..839b3323c 100644 --- a/runtime/gc/concurrent-list.c +++ b/runtime/gc/concurrent-list.c @@ -10,9 +10,9 @@ void allocateChunkInConcList( HM_chunk lastChunk) { GC_state s = pthread_getspecific(gcstate_key); - pthread_mutex_lock(&concList->mutex); + // pthread_mutex_lock(&concList->mutex); if(concList->lastChunk != lastChunk) { - pthread_mutex_unlock(&concList->mutex); + // pthread_mutex_unlock(&concList->mutex); return; } @@ -30,18 +30,38 @@ void allocateChunkInConcList( assert(chunk != NULL); chunk->prevChunk = lastChunk; - if (lastChunk != NULL) - { - lastChunk->nextChunk = chunk; + // if (lastChunk != NULL) + // { + // lastChunk->nextChunk = chunk; + // } + // concList->lastChunk = chunk; + + memset((void *)HM_getChunkStart(chunk), '\0', HM_getChunkLimit(chunk) - HM_getChunkStart(chunk)); + + bool success = false; + if(concList->lastChunk == lastChunk) { + pthread_mutex_lock(&concList->mutex); + if (concList->lastChunk == lastChunk) { + if (concList->firstChunk == NULL) { + concList->firstChunk = chunk; + } + if(lastChunk != NULL) { + lastChunk->nextChunk = chunk; + } + concList->lastChunk = chunk; + success = true; + } + pthread_mutex_unlock(&concList->mutex); } - concList->lastChunk = chunk; - if (concList->firstChunk == NULL) - { - concList->firstChunk = chunk; + if (!success) { + HM_freeChunk(s, chunk); } - memset((void *)HM_getChunkStart(chunk), '\0', HM_getChunkLimit(chunk) - HM_getChunkStart(chunk)); - pthread_mutex_unlock(&concList->mutex); + // if (!__sync_bool_compare_and_swap(&(concList->lastChunk), lastChunk, chunk)) { + // HM_freeChunk(s, chunk); + // } else if (lastChunk != NULL) { + // lastChunk->nextChunk = chunk; + // } } From 25990e62260e472f7ab34759db60e7fae1b05c0f Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Tue, 5 Jul 2022 22:56:31 -0400 Subject: [PATCH 080/114] add bytes pinned entangled stat --- basis-library/mpl/gc.sig | 1 + basis-library/mpl/gc.sml | 3 +++ basis-library/primitive/prim-mlton.sml | 2 ++ runtime/gc/assign.c | 2 +- runtime/gc/decheck.c | 4 ++-- runtime/gc/gc_state.c | 10 ++++++++++ runtime/gc/gc_state.h | 1 + runtime/gc/pin.c | 19 ++++++++++++++----- runtime/gc/pin.h | 6 ++++-- runtime/gc/statistics.c | 1 + runtime/gc/statistics.h | 1 + 11 files changed, 40 insertions(+), 10 deletions(-) diff --git a/basis-library/mpl/gc.sig b/basis-library/mpl/gc.sig index 8020fd306..767e0f67a 100644 --- a/basis-library/mpl/gc.sig +++ b/basis-library/mpl/gc.sig @@ -23,6 +23,7 @@ sig val numberSuspectsMarked: unit -> IntInf.int val numberSuspectsCleared: unit -> IntInf.int + val bytesPinnedEntangled: unit -> IntInf.int (* The following are all cumulative statistics (initially 0, and only * increase throughout execution). diff --git a/basis-library/mpl/gc.sml b/basis-library/mpl/gc.sml index c87b6f718..0e0beaa07 100644 --- a/basis-library/mpl/gc.sml +++ b/basis-library/mpl/gc.sml @@ -48,6 +48,9 @@ struct fun numberSuspectsCleared () = C_UIntmax.toLargeInt (GC.numberSuspectsCleared (gcState ())) + + fun bytesPinnedEntangled () = + C_UIntmax.toLargeInt (GC.bytesPinnedEntangled (gcState ())) end exception NotYetImplemented of string diff --git a/basis-library/primitive/prim-mlton.sml b/basis-library/primitive/prim-mlton.sml index 3224afac0..1f7b5116e 100644 --- a/basis-library/primitive/prim-mlton.sml +++ b/basis-library/primitive/prim-mlton.sml @@ -171,6 +171,8 @@ structure GC = val numberSuspectsMarked = _import "GC_numSuspectsMarked" runtime private: GCState.t -> C_UIntmax.t; val numberSuspectsCleared = _import "GC_numSuspectsCleared" runtime private: GCState.t -> C_UIntmax.t; + + val bytesPinnedEntangled = _import "GC_bytesPinnedEntangled" runtime private: GCState.t -> C_UIntmax.t; end structure HM = diff --git a/runtime/gc/assign.c b/runtime/gc/assign.c index 08e856722..ccf7325af 100644 --- a/runtime/gc/assign.c +++ b/runtime/gc/assign.c @@ -185,7 +185,7 @@ void Assignable_writeBarrier( } uint32_t unpinDepth = dd; - bool success = pinObject(src, unpinDepth, PIN_DOWN); + bool success = pinObject(s, src, unpinDepth, PIN_DOWN); if (success || dd == unpinDepthOf(src)) { diff --git a/runtime/gc/decheck.c b/runtime/gc/decheck.c index d437d6a1a..1f2fa462e 100644 --- a/runtime/gc/decheck.c +++ b/runtime/gc/decheck.c @@ -387,7 +387,7 @@ void make_entangled( { bool addToRemSet = mea->firstCall; if (mutable) { - new_ptr = pinObjectInfo(ptr, unpinDepth, PIN_ANY, &headerChange, &pinChange); + new_ptr = pinObjectInfo(s, ptr, unpinDepth, PIN_ANY, &headerChange, &pinChange); } else { @@ -397,7 +397,7 @@ void make_entangled( // the unpinDepth of reachable maybe smaller. mea->unpinDepth = pinType(header) == PIN_NONE ? unpinDepth : min(unpinDepth, unpinDepthOfH(header)); foreachObjptrInObject(s, p_ptr, &trueObjptrPredicateClosure, &emanageClosure, FALSE); - new_ptr = pinObjectInfo(ptr, unpinDepth, PIN_ANY, &headerChange, &pinChange); + new_ptr = pinObjectInfo(s, ptr, unpinDepth, PIN_ANY, &headerChange, &pinChange); assert(pinType(getHeader(objptrToPointer(new_ptr, NULL))) == PIN_ANY); } if (pinChange && addToRemSet) diff --git a/runtime/gc/gc_state.c b/runtime/gc/gc_state.c index 2afce5eb7..909336808 100644 --- a/runtime/gc/gc_state.c +++ b/runtime/gc/gc_state.c @@ -235,6 +235,16 @@ uintmax_t GC_numSuspectsCleared(GC_state s) return count; } +uintmax_t GC_bytesPinnedEntangled(GC_state s) +{ + uintmax_t count = 0; + for (uint32_t p = 0; p < s->numberOfProcs; p++) + { + count += s->procStates[p].cumulativeStatistics->bytesPinnedEntangled; + } + return count; +} + float GC_approxRaceFactor(GC_state s) { float count = 0; diff --git a/runtime/gc/gc_state.h b/runtime/gc/gc_state.h index 104b3c32b..6811727d9 100644 --- a/runtime/gc/gc_state.h +++ b/runtime/gc/gc_state.h @@ -143,6 +143,7 @@ PRIVATE float GC_approxRaceFactor(GC_state s); PRIVATE uintmax_t GC_numChecksSkipped(GC_state s); PRIVATE uintmax_t GC_numSuspectsMarked(GC_state s); PRIVATE uintmax_t GC_numSuspectsCleared(GC_state s); +PRIVATE uintmax_t GC_bytesPinnedEntangled(GC_state s); PRIVATE pointer GC_getCallFromCHandlerThread (GC_state s); PRIVATE void GC_setCallFromCHandlerThreads (GC_state s, pointer p); diff --git a/runtime/gc/pin.c b/runtime/gc/pin.c index cea8a6d26..afcd48e02 100644 --- a/runtime/gc/pin.c +++ b/runtime/gc/pin.c @@ -47,15 +47,20 @@ uint32_t unpinDepthOfH(GC_header h) return (h & UNPIN_DEPTH_MASK) >> UNPIN_DEPTH_SHIFT; } -bool pinObject(objptr op, uint32_t unpinDepth, enum PinType pt) +bool pinObject(GC_state s, objptr op, uint32_t unpinDepth, enum PinType pt) { bool a, b; - pinObjectInfo(op, unpinDepth, pt, &a, &b); + pinObjectInfo(s, op, unpinDepth, pt, &a, &b); return a; } -objptr pinObjectInfo(objptr op, uint32_t unpinDepth, enum PinType pt, - bool *headerChange, bool *pinChange) +objptr pinObjectInfo( + GC_state s, + objptr op, + uint32_t unpinDepth, + enum PinType pt, + bool *headerChange, + bool *pinChange) { pointer p = objptrToPointer(op, NULL); assert(pt != PIN_NONE); @@ -110,7 +115,11 @@ objptr pinObjectInfo(objptr op, uint32_t unpinDepth, enum PinType pt, else { if (__sync_bool_compare_and_swap(getHeaderp(p), header, newHeader)) { *headerChange = true; - *pinChange = (nt != pinType(header)); + bool didPinChange = (nt != pinType(header)); + *pinChange = didPinChange; + if (nt == PIN_ANY && didPinChange) { + s->cumulativeStatistics->bytesPinnedEntangled += objectSize(s, p); + } assert (!hasFwdPtr(p)); assert(pinType(newHeader) == nt); return op; diff --git a/runtime/gc/pin.h b/runtime/gc/pin.h index 4ce451db3..7591d3044 100644 --- a/runtime/gc/pin.h +++ b/runtime/gc/pin.h @@ -38,9 +38,11 @@ enum PinType * Note that regardless of whether or not the object was previously * pinned, this does a writeMin on the unpinDepth of the object. */ -bool pinObject(objptr op, uint32_t unpinDepth, enum PinType pt); +bool pinObject(GC_state s, objptr op, uint32_t unpinDepth, enum PinType pt); -objptr pinObjectInfo(objptr op, +objptr pinObjectInfo( + GC_state s, + objptr op, uint32_t unpinDepth, enum PinType pt, bool* headerChange, diff --git a/runtime/gc/statistics.c b/runtime/gc/statistics.c index b8eff6bbe..4317611e6 100644 --- a/runtime/gc/statistics.c +++ b/runtime/gc/statistics.c @@ -73,6 +73,7 @@ struct GC_cumulativeStatistics *newCumulativeStatistics(void) { cumulativeStatistics->numChecksSkipped = 0; cumulativeStatistics->numSuspectsMarked = 0; cumulativeStatistics->numSuspectsCleared = 0; + cumulativeStatistics->bytesPinnedEntangled = 0; cumulativeStatistics->approxRaceFactor = 0; cumulativeStatistics->timeLocalGC.tv_sec = 0; diff --git a/runtime/gc/statistics.h b/runtime/gc/statistics.h index b93154268..d1abc974a 100644 --- a/runtime/gc/statistics.h +++ b/runtime/gc/statistics.h @@ -69,6 +69,7 @@ struct GC_cumulativeStatistics { uintmax_t numChecksSkipped; uintmax_t numSuspectsMarked; uintmax_t numSuspectsCleared; + uintmax_t bytesPinnedEntangled; float approxRaceFactor; struct timespec timeLocalGC; From 00973a4061d5890224ead3e5cfea5ce89412e1dd Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Mon, 3 Oct 2022 11:30:04 -0400 Subject: [PATCH 081/114] initial tracking of total blocks allocated and purposes --- runtime/gc/block-allocator.c | 112 +++++++++++++++++++++++++++----- runtime/gc/block-allocator.h | 31 +++++++-- runtime/gc/chunk.c | 1 + runtime/gc/garbage-collection.c | 4 ++ runtime/gc/init.c | 7 ++ runtime/gc/logger.c | 1 + runtime/gc/logger.h | 1 + 7 files changed, 137 insertions(+), 20 deletions(-) diff --git a/runtime/gc/block-allocator.c b/runtime/gc/block-allocator.c index 20821cdce..d1c6ebe5e 100644 --- a/runtime/gc/block-allocator.c +++ b/runtime/gc/block-allocator.c @@ -134,9 +134,11 @@ static void initBlockAllocator(GC_state s, BlockAllocator ball) { ball->completelyEmptyGroup.firstSuperBlock = NULL; - ball->numBlocks = 0; - ball->numBlocksInUse = 0; ball->firstFreedByOther = NULL; + ball->numBlocks = 0; + for (enum BlockPurpose p = 0; p < NUM_BLOCK_PURPOSES; p++) { + ball->numBlocksInUse[p] = 0; + } for (size_t i = 0; i < numMegaBlockSizeClasses; i++) { ball->megaBlockSizeClass[i].firstMegaBlock = NULL; @@ -246,7 +248,8 @@ static void mmapNewSuperBlocks( static Blocks allocateInSuperBlock( GC_state s, SuperBlock sb, - int sizeClass) + int sizeClass, + enum BlockPurpose purpose) { if ((size_t)sb->numBlocksFree == SUPERBLOCK_SIZE(s)) { // It's completely empty! We can reuse. @@ -262,10 +265,11 @@ static Blocks allocateInSuperBlock( sb->numBlocksFree -= (1 << sb->sizeClass); assert(sb->owner != NULL); - sb->owner->numBlocksInUse += (1 << sb->sizeClass); + sb->owner->numBlocksInUse[purpose] += (1 << sb->sizeClass); result->container = sb; result->numBlocks = 1 << sb->sizeClass; + result->purpose = purpose; return result; } @@ -276,11 +280,12 @@ static Blocks allocateInSuperBlock( sb->numBlocksFree -= (1 << sb->sizeClass); assert(sb->owner != NULL); - sb->owner->numBlocksInUse += (1 << sb->sizeClass); + sb->owner->numBlocksInUse[purpose] += (1 << sb->sizeClass); Blocks bs = (Blocks)result; bs->container = sb; bs->numBlocks = (1 << sb->sizeClass); + bs->purpose = purpose; return bs; } @@ -300,16 +305,20 @@ static void deallocateInSuperBlock( sb->firstFree = block; sb->numBlocksFree += (1 << sb->sizeClass); + enum BlockPurpose purpose = block->purpose; + assert( purpose < NUM_BLOCK_PURPOSES ); + assert(sb->owner != NULL); - assert(sb->owner->numBlocksInUse >= ((size_t)1 << sb->sizeClass)); - sb->owner->numBlocksInUse -= (1 << sb->sizeClass); + assert(sb->owner->numBlocksInUse[purpose] >= ((size_t)1 << sb->sizeClass)); + sb->owner->numBlocksInUse[purpose] -= (1 << sb->sizeClass); } static Blocks tryAllocateAndAdjustSuperBlocks( GC_state s, BlockAllocator ball, - int class) + int class, + enum BlockPurpose purpose) { SuperBlockList targetList = NULL; @@ -337,7 +346,7 @@ static Blocks tryAllocateAndAdjustSuperBlocks( SuperBlock sb = targetList->firstSuperBlock; assert( sb != NULL ); enum FullnessGroup fg = fullness(s, sb); - Blocks result = allocateInSuperBlock(s, sb, class); + Blocks result = allocateInSuperBlock(s, sb, class, purpose); enum FullnessGroup newfg = fullness(s, sb); if (fg != newfg) { @@ -401,11 +410,17 @@ static void freeMegaBlock(GC_state s, MegaBlock mb, size_t sizeClass) { if (sizeClass >= s->controls->megablockThreshold) { size_t nb = mb->numBlocks; + enum BlockPurpose purpose = mb->purpose; GC_release((pointer)mb, s->controls->blockSize * mb->numBlocks); LOG(LM_CHUNK_POOL, LL_INFO, "Released large allocation of %zu blocks (unmap threshold: %zu)", nb, (size_t)1 << (s->controls->megablockThreshold - 1)); + + pthread_mutex_lock(&(global->megaBlockLock)); + global->numBlocks -= nb; + global->numBlocksInUse[purpose] -= nb; + pthread_mutex_unlock(&(global->megaBlockLock)); return; } @@ -414,6 +429,8 @@ static void freeMegaBlock(GC_state s, MegaBlock mb, size_t sizeClass) { pthread_mutex_lock(&(global->megaBlockLock)); mb->nextMegaBlock = global->megaBlockSizeClass[mbClass].firstMegaBlock; global->megaBlockSizeClass[mbClass].firstMegaBlock = mb; + assert( global->numBlocksInUse[mb->purpose] >= mb->numBlocks ); + global->numBlocksInUse[mb->purpose] -= mb->numBlocks; pthread_mutex_unlock(&(global->megaBlockLock)); return; } @@ -422,7 +439,8 @@ static void freeMegaBlock(GC_state s, MegaBlock mb, size_t sizeClass) { static MegaBlock tryFindMegaBlock( GC_state s, size_t numBlocksNeeded, - size_t sizeClass) + size_t sizeClass, + enum BlockPurpose purpose) { BlockAllocator global = s->blockAllocatorGlobal; assert(sizeClass >= s->controls->superblockThreshold); @@ -450,6 +468,7 @@ static MegaBlock tryFindMegaBlock( if (mb->numBlocks >= numBlocksNeeded) { *mbp = mb->nextMegaBlock; mb->nextMegaBlock = NULL; + global->numBlocksInUse[purpose] += mb->numBlocks; pthread_mutex_unlock(&(global->megaBlockLock)); LOG(LM_CHUNK_POOL, LL_INFO, @@ -468,7 +487,7 @@ static MegaBlock tryFindMegaBlock( } -static MegaBlock mmapNewMegaBlock(GC_state s, size_t numBlocks) +static MegaBlock mmapNewMegaBlock(GC_state s, size_t numBlocks, enum BlockPurpose purpose) { pointer start = GC_mmapAnon(NULL, s->controls->blockSize * numBlocks); if (MAP_FAILED == start) { @@ -478,14 +497,21 @@ static MegaBlock mmapNewMegaBlock(GC_state s, size_t numBlocks) DIE("whoops, mmap didn't align by the block-size."); } + BlockAllocator global = s->blockAllocatorGlobal; + pthread_mutex_lock(&(global->megaBlockLock)); + global->numBlocks += numBlocks; + global->numBlocksInUse[purpose] += numBlocks; + pthread_mutex_unlock(&(global->megaBlockLock)); + MegaBlock mb = (MegaBlock)start; mb->numBlocks = numBlocks; mb->nextMegaBlock = NULL; + mb->purpose = purpose; return mb; } -Blocks allocateBlocks(GC_state s, size_t numBlocks) { +Blocks allocateBlocksWithPurpose(GC_state s, size_t numBlocks, enum BlockPurpose purpose) { BlockAllocator local = s->blockAllocatorLocal; assertBlockAllocatorOkay(s, local); @@ -497,10 +523,10 @@ Blocks allocateBlocks(GC_state s, size_t numBlocks) { * fails, we're a bit screwed. */ - MegaBlock mb = tryFindMegaBlock(s, numBlocks, class); + MegaBlock mb = tryFindMegaBlock(s, numBlocks, class, purpose); if (NULL == mb) - mb = mmapNewMegaBlock(s, numBlocks); + mb = mmapNewMegaBlock(s, numBlocks, purpose); if (NULL == mb) DIE("ran out of space!"); @@ -510,6 +536,7 @@ Blocks allocateBlocks(GC_state s, size_t numBlocks) { Blocks bs = (Blocks)mb; bs->container = NULL; bs->numBlocks = actualNumBlocks; + bs->purpose = purpose; return bs; } @@ -517,31 +544,39 @@ Blocks allocateBlocks(GC_state s, size_t numBlocks) { assertBlockAllocatorOkay(s, local); /** Look in local first. */ - Blocks result = tryAllocateAndAdjustSuperBlocks(s, local, class); + Blocks result = tryAllocateAndAdjustSuperBlocks(s, local, class, purpose); if (result != NULL) { assertBlockAllocatorOkay(s, local); + assert( result->purpose == purpose ); return result; } /** If both local fails, we need to mmap new superchunks. */ mmapNewSuperBlocks(s, local); - result = tryAllocateAndAdjustSuperBlocks(s, local, class); + result = tryAllocateAndAdjustSuperBlocks(s, local, class, purpose); if (result == NULL) { DIE("Ran out of space for new superblocks!"); } assertBlockAllocatorOkay(s, local); + assert( result->purpose == purpose ); return result; } +Blocks allocateBlocks(GC_state s, size_t numBlocks) { + return allocateBlocksWithPurpose(s, numBlocks, BLOCK_FOR_UNKNOWN); +} + + void freeBlocks(GC_state s, Blocks bs, writeFreedBlockInfoFnClosure f) { BlockAllocator local = s->blockAllocatorLocal; assertBlockAllocatorOkay(s, local); size_t numBlocks = bs->numBlocks; SuperBlock sb = bs->container; + enum BlockPurpose purpose = bs->purpose; pointer blockStart = (pointer)bs; #if ASSERT @@ -586,6 +621,7 @@ void freeBlocks(GC_state s, Blocks bs, writeFreedBlockInfoFnClosure f) { MegaBlock mb = (MegaBlock)blockStart; mb->numBlocks = numBlocks; mb->nextMegaBlock = NULL; + mb->purpose = purpose; freeMegaBlock(s, mb, sizeClass); return; } @@ -594,6 +630,7 @@ void freeBlocks(GC_state s, Blocks bs, writeFreedBlockInfoFnClosure f) { FreeBlock elem = (FreeBlock)blockStart; elem->container = sb; + elem->purpose = purpose; BlockAllocator owner = sb->owner; assert(owner != NULL); assert(sb->sizeClass == computeSizeClass(numBlocks)); @@ -613,4 +650,47 @@ void freeBlocks(GC_state s, Blocks bs, writeFreedBlockInfoFnClosure f) { } } + +void queryCurrentBlockUsage(GC_state s, size_t *numBlocks, size_t *blocksInUseArr) { + *numBlocks = 0; + for (enum BlockPurpose p = 0; p < NUM_BLOCK_PURPOSES; p++) { + blocksInUseArr[p] = 0; + } + + // query local allocators + for (uint32_t i = 0; i < s->numberOfProcs; i++) { + BlockAllocator ball = s->procStates[i].blockAllocatorLocal; + *numBlocks += ball->numBlocks; + for (enum BlockPurpose p = 0; p < NUM_BLOCK_PURPOSES; p++) { + blocksInUseArr[p] += ball->numBlocksInUse[p]; + } + } + + // query global allocator + BlockAllocator global = s->blockAllocatorGlobal; + *numBlocks += global->numBlocks; + for (enum BlockPurpose p = 0; p < NUM_BLOCK_PURPOSES; p++) { + blocksInUseArr[p] += global->numBlocksInUse[p]; + } +} + + +void logCurrentBlockUsage(GC_state s) { + size_t count; + size_t inUse[NUM_BLOCK_PURPOSES]; + queryCurrentBlockUsage(s, &count, (size_t*)inUse); + + struct timespec now; + timespec_now(&now); + LOG(LM_BLOCK_ALLOCATOR, LL_INFO, + "block-allocator(%zu.%.9zu):\n" + " total mmap'ed %zu\n" + " BLOCK_FOR_UNKNOWN %zu (%zu%%)", + now.tv_sec, + now.tv_nsec, + count, + inUse[BLOCK_FOR_UNKNOWN], + (size_t)(100.0 * (double)inUse[BLOCK_FOR_UNKNOWN] / (double)count)); +} + #endif diff --git a/runtime/gc/block-allocator.h b/runtime/gc/block-allocator.h index 76d277ebd..bf33e4689 100644 --- a/runtime/gc/block-allocator.h +++ b/runtime/gc/block-allocator.h @@ -16,6 +16,17 @@ #if (defined (MLTON_GC_INTERNAL_TYPES)) +enum BlockPurpose { + BLOCK_FOR_HEAP_CHUNK, + BLOCK_FOR_REMEMBERED_SET, + BLOCK_FOR_FORGOTTEN_SET, + BLOCK_FOR_HH_ALLOCATOR, + BLOCK_FOR_UF_ALLOCATOR, + BLOCK_FOR_UNKNOWN, + NUM_BLOCK_PURPOSES /** Hack to know statically how many there are. Make sure + * this comes last in the list. */ +}; + /** This is used for debugging, to write info about freed blocks when * s->controls->debugKeepFreedBlocks is enabled. * @@ -48,6 +59,7 @@ typedef struct DebugKeptFreeBlock { typedef struct FreeBlock { struct FreeBlock *nextFree; struct SuperBlock *container; + enum BlockPurpose purpose; } *FreeBlock; @@ -103,6 +115,7 @@ typedef struct SuperBlockList { typedef struct MegaBlock { struct MegaBlock *nextMegaBlock; size_t numBlocks; + enum BlockPurpose purpose; } *MegaBlock; @@ -124,11 +137,8 @@ enum FullnessGroup { typedef struct BlockAllocator { - /** These are used for local to decide to move things to global, but - * are ignored in global. - */ size_t numBlocks; - size_t numBlocksInUse; + size_t numBlocksInUse[NUM_BLOCK_PURPOSES]; /** There are 3 fullness groups in each size class: * 0 is completely full, i.e. no free blocks available @@ -160,6 +170,7 @@ typedef struct BlockAllocator { typedef struct Blocks { SuperBlock container; size_t numBlocks; + enum BlockPurpose purpose; } *Blocks; #else @@ -179,6 +190,18 @@ void initLocalBlockAllocator(GC_state s, BlockAllocator globalAllocator); /** Get a pointer to the start of some number of free contiguous blocks. */ Blocks allocateBlocks(GC_state s, size_t numBlocks); +Blocks allocateBlocksWithPurpose(GC_state s, size_t numBlocks, enum BlockPurpose purpose); + +/** populate: + * *numBlocks := current total number of blocks mmap'ed + * blocksInUseArr[p] := current number of blocks allocated for purpose `p` + * + * The `blocksInUseArr` must be an array of length `NUM_BLOCK_PURPOSES` + */ +void queryCurrentBlockUsage(GC_state s, size_t *numBlocks, size_t *blocksInUseArr); + +void logCurrentBlockUsage(GC_state s); + /** Free a group of contiguous blocks. */ void freeBlocks(GC_state s, Blocks bs, writeFreedBlockInfoFnClosure f); diff --git a/runtime/gc/chunk.c b/runtime/gc/chunk.c index 3cf61731b..82567d940 100644 --- a/runtime/gc/chunk.c +++ b/runtime/gc/chunk.c @@ -199,6 +199,7 @@ void HM_freeChunkWithInfo( Blocks bs = (Blocks)chunk; bs->numBlocks = numBlocks; bs->container = container; + bs->purpose = BLOCK_FOR_UNKNOWN; freeBlocks(s, bs, &c); } diff --git a/runtime/gc/garbage-collection.c b/runtime/gc/garbage-collection.c index e4b4b1b4d..0903591ef 100644 --- a/runtime/gc/garbage-collection.c +++ b/runtime/gc/garbage-collection.c @@ -102,6 +102,10 @@ void GC_collect (GC_state s, size_t bytesRequested, bool force) { HM_EBR_leaveQuiescentState(s); HM_EBR_enterQuiescentState(s); + if (s->procNumber == 0) { + logCurrentBlockUsage(s); + } + // HM_HierarchicalHeap h = getThreadCurrent(s)->hierarchicalHeap; // while (h->nextAncestor != NULL) h = h->nextAncestor; // if (HM_HH_getDepth(h) == 0 && HM_getChunkListSize(HM_HH_getChunkList(h)) > 8192) { diff --git a/runtime/gc/init.c b/runtime/gc/init.c index 4c373b212..cf4e7335a 100644 --- a/runtime/gc/init.c +++ b/runtime/gc/init.c @@ -609,6 +609,13 @@ void GC_lateInit(GC_state s) { initLocalBlockAllocator(s, initGlobalBlockAllocator(s)); + struct timespec now; + timespec_now(&now); + LOG(LM_BLOCK_ALLOCATOR, LL_INFO, + "block-allocator(%zu.%.9zu): init", + now.tv_sec, + now.tv_nsec); + s->nextChunkAllocSize = s->controls->allocChunkSize; /* Initialize profiling. This must occur after processing diff --git a/runtime/gc/logger.c b/runtime/gc/logger.c index d48a245ab..432ae0254 100644 --- a/runtime/gc/logger.c +++ b/runtime/gc/logger.c @@ -205,6 +205,7 @@ bool stringToLogModule(enum LogModule* module, const char* moduleString) { struct Conversion conversions[] = {{.string = "allocation", .module = LM_ALLOCATION}, + {.string = "block-allocator", .module = LM_BLOCK_ALLOCATOR}, {.string = "chunk", .module = LM_CHUNK}, {.string = "chunk-pool", .module = LM_CHUNK_POOL}, {.string = "dfs-mark", .module = LM_DFS_MARK}, diff --git a/runtime/gc/logger.h b/runtime/gc/logger.h index 5d7680a32..5a5cce8a7 100644 --- a/runtime/gc/logger.h +++ b/runtime/gc/logger.h @@ -18,6 +18,7 @@ enum LogModule { LM_ALLOCATION, + LM_BLOCK_ALLOCATOR, LM_CHUNK, LM_CHUNK_POOL, LM_DFS_MARK, From 56b0b6b87f4236cc6df4f82e1c905ee4c2767d2b Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Mon, 3 Oct 2022 11:52:33 -0400 Subject: [PATCH 082/114] block purpose plumbing --- runtime/gc/block-allocator.c | 8 ++--- runtime/gc/block-allocator.h | 2 +- runtime/gc/cc-work-list.c | 2 +- runtime/gc/chunk.c | 37 +++++++++++++++++------ runtime/gc/chunk.h | 4 +-- runtime/gc/concurrent-collection.c | 8 ++--- runtime/gc/concurrent-list.c | 2 +- runtime/gc/hierarchical-heap-collection.c | 4 +-- runtime/gc/remembered-set.c | 2 +- 9 files changed, 43 insertions(+), 26 deletions(-) diff --git a/runtime/gc/block-allocator.c b/runtime/gc/block-allocator.c index d1c6ebe5e..10553db3c 100644 --- a/runtime/gc/block-allocator.c +++ b/runtime/gc/block-allocator.c @@ -566,7 +566,7 @@ Blocks allocateBlocksWithPurpose(GC_state s, size_t numBlocks, enum BlockPurpose Blocks allocateBlocks(GC_state s, size_t numBlocks) { - return allocateBlocksWithPurpose(s, numBlocks, BLOCK_FOR_UNKNOWN); + return allocateBlocksWithPurpose(s, numBlocks, BLOCK_FOR_UNKNOWN_PURPOSE); } @@ -685,12 +685,12 @@ void logCurrentBlockUsage(GC_state s) { LOG(LM_BLOCK_ALLOCATOR, LL_INFO, "block-allocator(%zu.%.9zu):\n" " total mmap'ed %zu\n" - " BLOCK_FOR_UNKNOWN %zu (%zu%%)", + " BLOCK_FOR_UNKNOWN_PURPOSE %zu (%zu%%)", now.tv_sec, now.tv_nsec, count, - inUse[BLOCK_FOR_UNKNOWN], - (size_t)(100.0 * (double)inUse[BLOCK_FOR_UNKNOWN] / (double)count)); + inUse[BLOCK_FOR_UNKNOWN_PURPOSE], + (size_t)(100.0 * (double)inUse[BLOCK_FOR_UNKNOWN_PURPOSE] / (double)count)); } #endif diff --git a/runtime/gc/block-allocator.h b/runtime/gc/block-allocator.h index bf33e4689..602135896 100644 --- a/runtime/gc/block-allocator.h +++ b/runtime/gc/block-allocator.h @@ -22,7 +22,7 @@ enum BlockPurpose { BLOCK_FOR_FORGOTTEN_SET, BLOCK_FOR_HH_ALLOCATOR, BLOCK_FOR_UF_ALLOCATOR, - BLOCK_FOR_UNKNOWN, + BLOCK_FOR_UNKNOWN_PURPOSE, NUM_BLOCK_PURPOSES /** Hack to know statically how many there are. Make sure * this comes last in the list. */ }; diff --git a/runtime/gc/cc-work-list.c b/runtime/gc/cc-work-list.c index 44e19ba99..324d4fae0 100644 --- a/runtime/gc/cc-work-list.c +++ b/runtime/gc/cc-work-list.c @@ -18,7 +18,7 @@ void CC_workList_free( CC_workList w) { HM_chunkList c = &(w->storage); - HM_freeChunksInListWithInfo(s, c, NULL); + HM_freeChunksInListWithInfo(s, c, NULL, BLOCK_FOR_UNKNOWN_PURPOSE); w->currentChunk = NULL; } diff --git a/runtime/gc/chunk.c b/runtime/gc/chunk.c index 82567d940..f484928f4 100644 --- a/runtime/gc/chunk.c +++ b/runtime/gc/chunk.c @@ -119,11 +119,11 @@ HM_chunk HM_initializeChunk(pointer start, pointer end) { } -HM_chunk HM_getFreeChunk(GC_state s, size_t bytesRequested) { +HM_chunk HM_getFreeChunkWithPurpose(GC_state s, size_t bytesRequested, enum BlockPurpose purpose) { size_t chunkWidth = align(bytesRequested + sizeof(struct HM_chunk), HM_BLOCK_SIZE); size_t numBlocks = chunkWidth / HM_BLOCK_SIZE; - Blocks start = allocateBlocks(s, numBlocks); + Blocks start = allocateBlocksWithPurpose(s, numBlocks, purpose); SuperBlock container = start->container; numBlocks = start->numBlocks; HM_chunk result = @@ -134,6 +134,11 @@ HM_chunk HM_getFreeChunk(GC_state s, size_t bytesRequested) { } +HM_chunk HM_getFreeChunk(GC_state s, size_t bytesRequested) { + return HM_getFreeChunkWithPurpose(s, bytesRequested, BLOCK_FOR_UNKNOWN_PURPOSE); +} + + struct writeChunkInfoArgs { writeFreedBlockInfoFn fun; void* env; @@ -174,7 +179,8 @@ void writeChunkInfo( void HM_freeChunkWithInfo( GC_state s, HM_chunk chunk, - writeFreedBlockInfoFnClosure f) + writeFreedBlockInfoFnClosure f, + enum BlockPurpose purpose) { struct writeChunkInfoArgs args; @@ -199,35 +205,40 @@ void HM_freeChunkWithInfo( Blocks bs = (Blocks)chunk; bs->numBlocks = numBlocks; bs->container = container; - bs->purpose = BLOCK_FOR_UNKNOWN; + bs->purpose = purpose; freeBlocks(s, bs, &c); } void HM_freeChunk(GC_state s, HM_chunk chunk) { - HM_freeChunkWithInfo(s, chunk, NULL); + HM_freeChunkWithInfo(s, chunk, NULL, BLOCK_FOR_UNKNOWN_PURPOSE); } void HM_freeChunksInListWithInfo( GC_state s, HM_chunkList list, - writeFreedBlockInfoFnClosure f) + writeFreedBlockInfoFnClosure f, + enum BlockPurpose purpose) { HM_chunk chunk = list->firstChunk; while (chunk != NULL) { HM_chunk next = chunk->nextChunk; - HM_freeChunkWithInfo(s, chunk, f); + HM_freeChunkWithInfo(s, chunk, f, purpose); chunk = next; } HM_initChunkList(list); } void HM_freeChunksInList(GC_state s, HM_chunkList list) { - HM_freeChunksInListWithInfo(s, list, NULL); + HM_freeChunksInListWithInfo(s, list, NULL, BLOCK_FOR_UNKNOWN_PURPOSE); } -HM_chunk HM_allocateChunk(HM_chunkList list, size_t bytesRequested) { +HM_chunk HM_allocateChunkWithPurpose( + HM_chunkList list, + size_t bytesRequested, + enum BlockPurpose purpose) +{ GC_state s = pthread_getspecific(gcstate_key); - HM_chunk chunk = HM_getFreeChunk(s, bytesRequested); + HM_chunk chunk = HM_getFreeChunkWithPurpose(s, bytesRequested, purpose); if (NULL == chunk) { DIE("Out of memory. Unable to allocate chunk of size %zu.", @@ -246,6 +257,12 @@ HM_chunk HM_allocateChunk(HM_chunkList list, size_t bytesRequested) { return chunk; } + +HM_chunk HM_allocateChunk(HM_chunkList list, size_t bytesRequested) { + return HM_allocateChunkWithPurpose(list, bytesRequested, BLOCK_FOR_UNKNOWN_PURPOSE); +} + + void HM_initChunkList(HM_chunkList list) { list->firstChunk = NULL; list->lastChunk = NULL; diff --git a/runtime/gc/chunk.h b/runtime/gc/chunk.h index 478041e16..d634706d8 100644 --- a/runtime/gc/chunk.h +++ b/runtime/gc/chunk.h @@ -162,8 +162,8 @@ void HM_initChunkList(HM_chunkList list); void HM_freeChunk(GC_state s, HM_chunk chunk); void HM_freeChunksInList(GC_state s, HM_chunkList list); -void HM_freeChunkWithInfo(GC_state s, HM_chunk chunk, writeFreedBlockInfoFnClosure f); -void HM_freeChunksInListWithInfo(GC_state s, HM_chunkList list, writeFreedBlockInfoFnClosure f); +void HM_freeChunkWithInfo(GC_state s, HM_chunk chunk, writeFreedBlockInfoFnClosure f, enum BlockPurpose purpose); +void HM_freeChunksInListWithInfo(GC_state s, HM_chunkList list, writeFreedBlockInfoFnClosure f, enum BlockPurpose purpose); // void HM_deleteChunks(GC_state s, HM_chunkList deleteList); void HM_appendChunkList(HM_chunkList destinationChunkList, HM_chunkList chunkList); diff --git a/runtime/gc/concurrent-collection.c b/runtime/gc/concurrent-collection.c index 942c8b76a..3c379b869 100644 --- a/runtime/gc/concurrent-collection.c +++ b/runtime/gc/concurrent-collection.c @@ -865,7 +865,7 @@ void CC_filterPinned( // HM_freeRemSetWithInfo(s, oldRemSet, &infoc); // this reintializes the private remset - HM_freeChunksInListWithInfo(s, &(oldRemSet->private), &infoc); + HM_freeChunksInListWithInfo(s, &(oldRemSet->private), &infoc, BLOCK_FOR_UNKNOWN_PURPOSE); assert (newRemSet.public.firstChunk == NULL); // this moves all data into remset of hh HM_appendRemSet(oldRemSet, &newRemSet); @@ -1151,8 +1151,8 @@ size_t CC_collectWithRoots( /** SAM_NOTE: TODO: deleteList no longer needed, because * block allocator handles that. */ - HM_freeChunksInListWithInfo(s, origList, &infoc); - HM_freeChunksInListWithInfo(s, deleteList, &infoc); + HM_freeChunksInListWithInfo(s, origList, &infoc, BLOCK_FOR_UNKNOWN_PURPOSE); + HM_freeChunksInListWithInfo(s, deleteList, &infoc, BLOCK_FOR_UNKNOWN_PURPOSE); for(HM_chunk chunk = repList->firstChunk; chunk!=NULL; chunk = chunk->nextChunk) { @@ -1168,7 +1168,7 @@ size_t CC_collectWithRoots( assert(isChunkInList(stackChunk, origList)); HM_unlinkChunk(origList, stackChunk); info.freedType = CC_FREED_STACK_CHUNK; - HM_freeChunkWithInfo(s, stackChunk, &infoc); + HM_freeChunkWithInfo(s, stackChunk, &infoc, BLOCK_FOR_UNKNOWN_PURPOSE); info.freedType = CC_FREED_NORMAL_CHUNK; cp->stack = BOGUS_OBJPTR; diff --git a/runtime/gc/concurrent-list.c b/runtime/gc/concurrent-list.c index 839b3323c..7ef53d1d5 100644 --- a/runtime/gc/concurrent-list.c +++ b/runtime/gc/concurrent-list.c @@ -217,5 +217,5 @@ void CC_appendConcList(CC_concList concList1, CC_concList concList2) { void CC_freeChunksInConcListWithInfo(GC_state s, CC_concList concList, void *info) { struct HM_chunkList _chunkList; CC_popAsChunkList(concList, &(_chunkList)); - HM_freeChunksInListWithInfo(s, &(_chunkList), info); + HM_freeChunksInListWithInfo(s, &(_chunkList), info, BLOCK_FOR_UNKNOWN_PURPOSE); } \ No newline at end of file diff --git a/runtime/gc/hierarchical-heap-collection.c b/runtime/gc/hierarchical-heap-collection.c index bb433398d..7579f3ea2 100644 --- a/runtime/gc/hierarchical-heap-collection.c +++ b/runtime/gc/hierarchical-heap-collection.c @@ -664,7 +664,7 @@ void HM_HHC_collectLocal(uint32_t desiredScope) ES_foreachSuspect(s, suspects, &fObjptrClosure); info.depth = depth; info.freedType = LGC_FREED_SUSPECT_CHUNK; - HM_freeChunksInListWithInfo(s, suspects, &infoc); + HM_freeChunksInListWithInfo(s, suspects, &infoc, BLOCK_FOR_UNKNOWN_PURPOSE); } } @@ -693,7 +693,7 @@ void HM_HHC_collectLocal(uint32_t desiredScope) #endif info.depth = HM_HH_getDepth(hhTail); info.freedType = LGC_FREED_REMSET_CHUNK; - HM_freeChunksInListWithInfo(s, &(remset->private), &infoc); + HM_freeChunksInListWithInfo(s, &(remset->private), &infoc, BLOCK_FOR_UNKNOWN_PURPOSE); } #if ASSERT diff --git a/runtime/gc/remembered-set.c b/runtime/gc/remembered-set.c index 7bab8924f..2152cd399 100644 --- a/runtime/gc/remembered-set.c +++ b/runtime/gc/remembered-set.c @@ -189,6 +189,6 @@ void HM_appendRemSet(HM_remSet r1, HM_remSet r2) { } void HM_freeRemSetWithInfo(GC_state s, HM_remSet remSet, void* info) { - HM_freeChunksInListWithInfo(s, &(remSet->private), info); + HM_freeChunksInListWithInfo(s, &(remSet->private), info, BLOCK_FOR_UNKNOWN_PURPOSE); CC_freeChunksInConcListWithInfo(s, &(remSet->public), info); } From 79080e938abe77c7f422aca8b64110967d7729ce Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Mon, 3 Oct 2022 12:18:27 -0400 Subject: [PATCH 083/114] track BLOCK_FOR_HEAP_CHUNK --- runtime/gc/block-allocator.c | 26 +++++++++++++++++++++-- runtime/gc/entangled-ebr.c | 4 ++-- runtime/gc/garbage-collection.c | 6 +++++- runtime/gc/hierarchical-heap-collection.c | 8 +++++-- runtime/gc/hierarchical-heap.c | 18 ++++++++++++---- runtime/gc/new-object.c | 12 +++++++++-- 6 files changed, 61 insertions(+), 13 deletions(-) diff --git a/runtime/gc/block-allocator.c b/runtime/gc/block-allocator.c index 10553db3c..8d30ebadd 100644 --- a/runtime/gc/block-allocator.c +++ b/runtime/gc/block-allocator.c @@ -311,6 +311,25 @@ static void deallocateInSuperBlock( assert(sb->owner != NULL); assert(sb->owner->numBlocksInUse[purpose] >= ((size_t)1 << sb->sizeClass)); sb->owner->numBlocksInUse[purpose] -= (1 << sb->sizeClass); + +#if 0 + size_t nb = 1 << sb->sizeClass; + if (nb <= sb->owner->numBlocksInUse[purpose]) { + sb->owner->numBlocksInUse[purpose] -= nb; + } + else { + // Somehow the tracking is off; perhaps the allocateBlocks/freeBlocks calls + // are mismatched somehow. No worries; we'll just track as best we can by + // assuming that the remainder blocks were marked as unknown purpose. This + // works as long as freeing blocks with purpose is more precise than + // allocating. + size_t amountTracked = sb->owner->numBlocksInUse[purpose]; + size_t amountRem = nb - amountTracked; + sb->owner->numBlocksInUse[purpose] = 0; + assert( sb->owner->numBlocksInUse[BLOCK_FOR_UNKNOWN_PURPOSE] >= amountRem ); + sb->owner->numBlocksInUse[BLOCK_FOR_UNKNOWN_PURPOSE] -= amountRem; + } +#endif } @@ -684,11 +703,14 @@ void logCurrentBlockUsage(GC_state s) { timespec_now(&now); LOG(LM_BLOCK_ALLOCATOR, LL_INFO, "block-allocator(%zu.%.9zu):\n" - " total mmap'ed %zu\n" - " BLOCK_FOR_UNKNOWN_PURPOSE %zu (%zu%%)", + " total mmap'ed %zu\n" + " BLOCK_FOR_HEAP_CHUNK %zu (%zu%%)\n" + " BLOCK_FOR_UNKNOWN_PURPOSE %zu (%zu%%)\n", now.tv_sec, now.tv_nsec, count, + inUse[BLOCK_FOR_HEAP_CHUNK], + (size_t)(100.0 * (double)inUse[BLOCK_FOR_HEAP_CHUNK] / (double)count), inUse[BLOCK_FOR_UNKNOWN_PURPOSE], (size_t)(100.0 * (double)inUse[BLOCK_FOR_UNKNOWN_PURPOSE] / (double)count)); } diff --git a/runtime/gc/entangled-ebr.c b/runtime/gc/entangled-ebr.c index d4c539617..8e934762c 100644 --- a/runtime/gc/entangled-ebr.c +++ b/runtime/gc/entangled-ebr.c @@ -6,8 +6,8 @@ #if (defined (MLTON_GC_INTERNAL_FUNCS)) -void freeChunk (GC_state s, void *ptr) { - HM_freeChunk(s, (HM_chunk)ptr); +void freeChunk(GC_state s, void *ptr) { + HM_freeChunkWithInfo(s, (HM_chunk)ptr, NULL, BLOCK_FOR_HEAP_CHUNK); } void HM_EBR_init(GC_state s) { diff --git a/runtime/gc/garbage-collection.c b/runtime/gc/garbage-collection.c index 0903591ef..67d8475b8 100644 --- a/runtime/gc/garbage-collection.c +++ b/runtime/gc/garbage-collection.c @@ -53,7 +53,11 @@ void growStackCurrent(GC_state s) { /* in this case, the new stack needs more space, so allocate a new chunk, * copy the stack, and throw away the old chunk. */ - HM_chunk newChunk = HM_allocateChunk(HM_HH_getChunkList(newhh), stackSize); + HM_chunk newChunk = HM_allocateChunkWithPurpose( + HM_HH_getChunkList(newhh), + stackSize, + BLOCK_FOR_HEAP_CHUNK); + if (NULL == newChunk) { DIE("Ran out of space to grow stack!"); } diff --git a/runtime/gc/hierarchical-heap-collection.c b/runtime/gc/hierarchical-heap-collection.c index 7579f3ea2..3a5de23ab 100644 --- a/runtime/gc/hierarchical-heap-collection.c +++ b/runtime/gc/hierarchical-heap-collection.c @@ -717,7 +717,7 @@ void HM_HHC_collectLocal(uint32_t desiredScope) } else { - HM_freeChunk(s, chunk); + HM_freeChunkWithInfo(s, chunk, &infoc, BLOCK_FOR_HEAP_CHUNK); } chunk = next; } @@ -1931,7 +1931,11 @@ pointer copyObject(pointer p, /* Need to allocate a new chunk. Safe to use the dechecker state of where * the object came from, as all objects in the same heap can be safely * reassigned to any dechecker state of that heap. */ - chunk = HM_allocateChunk(tgtChunkList, objectSize); + chunk = HM_allocateChunkWithPurpose( + tgtChunkList, + objectSize, + BLOCK_FOR_HEAP_CHUNK); + if (NULL == chunk) { DIE("Ran out of space for Hierarchical Heap!"); diff --git a/runtime/gc/hierarchical-heap.c b/runtime/gc/hierarchical-heap.c index d4cf0bd72..0a5b631a8 100644 --- a/runtime/gc/hierarchical-heap.c +++ b/runtime/gc/hierarchical-heap.c @@ -588,7 +588,10 @@ bool HM_HH_extend(GC_state s, GC_thread thread, size_t bytesRequested) hh = newhh; } - chunk = HM_allocateChunk(HM_HH_getChunkList(hh), bytesRequested); + chunk = HM_allocateChunkWithPurpose( + HM_HH_getChunkList(hh), + bytesRequested, + BLOCK_FOR_HEAP_CHUNK); if (NULL == chunk) { return FALSE; @@ -690,8 +693,11 @@ void splitHeapForCC(GC_state s, GC_thread thread) { HM_HierarchicalHeap newHH = HM_HH_new(s, HM_HH_getDepth(hh)); thread->hierarchicalHeap = newHH; - HM_chunk chunk = - HM_allocateChunk(HM_HH_getChunkList(newHH), GC_HEAP_LIMIT_SLOP); + HM_chunk chunk = HM_allocateChunkWithPurpose( + HM_HH_getChunkList(newHH), + GC_HEAP_LIMIT_SLOP, + BLOCK_FOR_HEAP_CHUNK); + chunk->levelHead = HM_HH_getUFNode(newHH); #ifdef DETECT_ENTANGLEMENT @@ -874,7 +880,11 @@ objptr copyCurrentStack(GC_state s, GC_thread thread) { assert(isStackReservedAligned(s, reserved)); size_t stackSize = sizeofStackWithMetaData(s, reserved); - HM_chunk newChunk = HM_allocateChunk(HM_HH_getChunkList(hh), stackSize); + HM_chunk newChunk = HM_allocateChunkWithPurpose( + HM_HH_getChunkList(hh), + stackSize, + BLOCK_FOR_HEAP_CHUNK); + if (NULL == newChunk) { DIE("Ran out of space to copy stack!"); } diff --git a/runtime/gc/new-object.c b/runtime/gc/new-object.c index ed9e0d500..9083b7593 100644 --- a/runtime/gc/new-object.c +++ b/runtime/gc/new-object.c @@ -147,8 +147,16 @@ GC_thread newThreadWithHeap( * yet. */ HM_HierarchicalHeap hh = HM_HH_new(s, depth); - HM_chunk tChunk = HM_allocateChunk(HM_HH_getChunkList(hh), threadSize); - HM_chunk sChunk = HM_allocateChunk(HM_HH_getChunkList(hh), stackSize); + HM_chunk tChunk = HM_allocateChunkWithPurpose( + HM_HH_getChunkList(hh), + threadSize, + BLOCK_FOR_HEAP_CHUNK); + + HM_chunk sChunk = HM_allocateChunkWithPurpose( + HM_HH_getChunkList(hh), + stackSize, + BLOCK_FOR_HEAP_CHUNK); + if (NULL == sChunk || NULL == tChunk) { DIE("Ran out of space for thread+stack allocation!"); } From 55f01559970b975be056a11e23cfcccb939c74bb Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Mon, 3 Oct 2022 12:25:10 -0400 Subject: [PATCH 084/114] track BLOCK_FOR_HH_ALLOCATOR and BLOCK_FOR_UF_ALLOCATOR --- runtime/gc/block-allocator.c | 6 ++++++ runtime/gc/chunk.h | 1 + runtime/gc/fixed-size-allocator.c | 9 +++++++-- runtime/gc/fixed-size-allocator.h | 6 +++++- runtime/gc/init.c | 8 ++++---- 5 files changed, 23 insertions(+), 7 deletions(-) diff --git a/runtime/gc/block-allocator.c b/runtime/gc/block-allocator.c index 8d30ebadd..9ac17e122 100644 --- a/runtime/gc/block-allocator.c +++ b/runtime/gc/block-allocator.c @@ -705,12 +705,18 @@ void logCurrentBlockUsage(GC_state s) { "block-allocator(%zu.%.9zu):\n" " total mmap'ed %zu\n" " BLOCK_FOR_HEAP_CHUNK %zu (%zu%%)\n" + " BLOCK_FOR_HH_ALLOCATOR %zu (%zu%%)\n" + " BLOCK_FOR_UF_ALLOCATOR %zu (%zu%%)\n" " BLOCK_FOR_UNKNOWN_PURPOSE %zu (%zu%%)\n", now.tv_sec, now.tv_nsec, count, inUse[BLOCK_FOR_HEAP_CHUNK], (size_t)(100.0 * (double)inUse[BLOCK_FOR_HEAP_CHUNK] / (double)count), + inUse[BLOCK_FOR_HH_ALLOCATOR], + (size_t)(100.0 * (double)inUse[BLOCK_FOR_HH_ALLOCATOR] / (double)count), + inUse[BLOCK_FOR_UF_ALLOCATOR], + (size_t)(100.0 * (double)inUse[BLOCK_FOR_UF_ALLOCATOR] / (double)count), inUse[BLOCK_FOR_UNKNOWN_PURPOSE], (size_t)(100.0 * (double)inUse[BLOCK_FOR_UNKNOWN_PURPOSE] / (double)count)); } diff --git a/runtime/gc/chunk.h b/runtime/gc/chunk.h index d634706d8..15818d8be 100644 --- a/runtime/gc/chunk.h +++ b/runtime/gc/chunk.h @@ -156,6 +156,7 @@ HM_chunk HM_getFreeChunk(GC_state s, size_t bytesRequested); * chunk->limit - chunk->frontier <= bytesRequested * Returns NULL if unable to find space for such a chunk. */ HM_chunk HM_allocateChunk(HM_chunkList list, size_t bytesRequested); +HM_chunk HM_allocateChunkWithPurpose(HM_chunkList list, size_t bytesRequested, enum BlockPurpose purpose); void HM_initChunkList(HM_chunkList list); diff --git a/runtime/gc/fixed-size-allocator.c b/runtime/gc/fixed-size-allocator.c index 9eec57607..8a6757d08 100644 --- a/runtime/gc/fixed-size-allocator.c +++ b/runtime/gc/fixed-size-allocator.c @@ -6,7 +6,11 @@ #if (defined (MLTON_GC_INTERNAL_FUNCS)) -void initFixedSizeAllocator(FixedSizeAllocator fsa, size_t fixedSize) { +void initFixedSizeAllocator( + FixedSizeAllocator fsa, + size_t fixedSize, + enum BlockPurpose purpose) +{ size_t minSize = sizeof(struct FixedSizeElement); fsa->fixedSize = align(fixedSize < minSize ? minSize : fixedSize, 8); HM_initChunkList(&(fsa->buffer)); @@ -16,6 +20,7 @@ void initFixedSizeAllocator(FixedSizeAllocator fsa, size_t fixedSize) { fsa->numAllocated = 0; fsa->numLocalFreed = 0; fsa->numSharedFreed = 0; + fsa->purpose = purpose; return; } @@ -86,7 +91,7 @@ void* allocateFixedSize(FixedSizeAllocator fsa) { * to their original buffer, by looking up the chunk header. */ - chunk = HM_allocateChunk(buffer, fsa->fixedSize + sizeof(void*)); + chunk = HM_allocateChunkWithPurpose(buffer, fsa->fixedSize + sizeof(void*), fsa->purpose); pointer gap = HM_shiftChunkStart(chunk, sizeof(void*)); *(FixedSizeAllocator *)gap = fsa; diff --git a/runtime/gc/fixed-size-allocator.h b/runtime/gc/fixed-size-allocator.h index 95cad510d..30061c696 100644 --- a/runtime/gc/fixed-size-allocator.h +++ b/runtime/gc/fixed-size-allocator.h @@ -29,6 +29,7 @@ typedef struct FixedSizeAllocator { size_t numAllocated; size_t numLocalFreed; size_t numSharedFreed; + enum BlockPurpose purpose; /** A bit of a hack. I just want quick access to pages to store elements. * I'll reuse the frontier mechanism inherent to chunks to remember which @@ -66,7 +67,10 @@ typedef struct FixedSizeAllocator *FixedSizeAllocator; /** Initialize [fsa] to be able to allocate objects of size [fixedSize]. * You should never re-initialize an allocator. */ -void initFixedSizeAllocator(FixedSizeAllocator fsa, size_t fixedSize); +void initFixedSizeAllocator( + FixedSizeAllocator fsa, + size_t fixedSize, + enum BlockPurpose purpose); /** Allocate an object of the size specified when the allocator was initialized. diff --git a/runtime/gc/init.c b/runtime/gc/init.c index cf4e7335a..2cdf267ed 100644 --- a/runtime/gc/init.c +++ b/runtime/gc/init.c @@ -509,8 +509,8 @@ int GC_init (GC_state s, int argc, char **argv) { s->rootsLength = 0; s->savedThread = BOGUS_OBJPTR; - initFixedSizeAllocator(getHHAllocator(s), sizeof(struct HM_HierarchicalHeap)); - initFixedSizeAllocator(getUFAllocator(s), sizeof(struct HM_UnionFindNode)); + initFixedSizeAllocator(getHHAllocator(s), sizeof(struct HM_HierarchicalHeap), BLOCK_FOR_HH_ALLOCATOR); + initFixedSizeAllocator(getUFAllocator(s), sizeof(struct HM_UnionFindNode), BLOCK_FOR_UF_ALLOCATOR); s->numberDisentanglementChecks = 0; s->signalHandlerThread = BOGUS_OBJPTR; @@ -648,8 +648,8 @@ void GC_duplicate (GC_state d, GC_state s) { d->wsQueueTop = BOGUS_OBJPTR; d->wsQueueBot = BOGUS_OBJPTR; initLocalBlockAllocator(d, s->blockAllocatorGlobal); - initFixedSizeAllocator(getHHAllocator(d), sizeof(struct HM_HierarchicalHeap)); - initFixedSizeAllocator(getUFAllocator(d), sizeof(struct HM_UnionFindNode)); + initFixedSizeAllocator(getHHAllocator(d), sizeof(struct HM_HierarchicalHeap), BLOCK_FOR_HH_ALLOCATOR); + initFixedSizeAllocator(getUFAllocator(d), sizeof(struct HM_UnionFindNode), BLOCK_FOR_UF_ALLOCATOR); d->hhEBR = s->hhEBR; d->hmEBR = s->hmEBR; d->nextChunkAllocSize = s->nextChunkAllocSize; From 0ace54be199d2334349cae289c2617e7f77fe04c Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Mon, 3 Oct 2022 13:20:38 -0400 Subject: [PATCH 085/114] more plumbing, and track BLOCK_FOR_REMEMBERED_SET --- runtime/gc/block-allocator.c | 18 +++++++++++++----- runtime/gc/chunk.c | 8 ++++++-- runtime/gc/chunk.h | 3 ++- runtime/gc/concurrent-collection.c | 8 ++++---- runtime/gc/concurrent-list.c | 15 +++++++++------ runtime/gc/concurrent-list.h | 2 +- runtime/gc/concurrent-stack.c | 2 +- runtime/gc/entanglement-suspects.c | 6 +++--- runtime/gc/hierarchical-heap-collection.c | 4 ++-- runtime/gc/remembered-set.c | 4 ++-- 10 files changed, 43 insertions(+), 27 deletions(-) diff --git a/runtime/gc/block-allocator.c b/runtime/gc/block-allocator.c index 9ac17e122..190cdd9c5 100644 --- a/runtime/gc/block-allocator.c +++ b/runtime/gc/block-allocator.c @@ -703,20 +703,28 @@ void logCurrentBlockUsage(GC_state s) { timespec_now(&now); LOG(LM_BLOCK_ALLOCATOR, LL_INFO, "block-allocator(%zu.%.9zu):\n" - " total mmap'ed %zu\n" - " BLOCK_FOR_HEAP_CHUNK %zu (%zu%%)\n" - " BLOCK_FOR_HH_ALLOCATOR %zu (%zu%%)\n" - " BLOCK_FOR_UF_ALLOCATOR %zu (%zu%%)\n" - " BLOCK_FOR_UNKNOWN_PURPOSE %zu (%zu%%)\n", + " total mmap'ed %zu\n" + " BLOCK_FOR_HEAP_CHUNK %zu (%zu%%)\n" + " BLOCK_FOR_REMEMBERED_SET %zu (%zu%%)\n" + " BLOCK_FOR_HH_ALLOCATOR %zu (%zu%%)\n" + " BLOCK_FOR_UF_ALLOCATOR %zu (%zu%%)\n" + " BLOCK_FOR_UNKNOWN_PURPOSE %zu (%zu%%)\n", now.tv_sec, now.tv_nsec, count, + inUse[BLOCK_FOR_HEAP_CHUNK], (size_t)(100.0 * (double)inUse[BLOCK_FOR_HEAP_CHUNK] / (double)count), + + inUse[BLOCK_FOR_REMEMBERED_SET], + (size_t)(100.0 * (double)inUse[BLOCK_FOR_REMEMBERED_SET] / (double)count), + inUse[BLOCK_FOR_HH_ALLOCATOR], (size_t)(100.0 * (double)inUse[BLOCK_FOR_HH_ALLOCATOR] / (double)count), + inUse[BLOCK_FOR_UF_ALLOCATOR], (size_t)(100.0 * (double)inUse[BLOCK_FOR_UF_ALLOCATOR] / (double)count), + inUse[BLOCK_FOR_UNKNOWN_PURPOSE], (size_t)(100.0 * (double)inUse[BLOCK_FOR_UNKNOWN_PURPOSE] / (double)count)); } diff --git a/runtime/gc/chunk.c b/runtime/gc/chunk.c index f484928f4..09c078bc4 100644 --- a/runtime/gc/chunk.c +++ b/runtime/gc/chunk.c @@ -493,10 +493,10 @@ size_t HM_getChunkListUsedSize(HM_chunkList list) { return list->usedSize; } -pointer HM_storeInchunkList(HM_chunkList chunkList, void* p, size_t objSize) { +pointer HM_storeInChunkListWithPurpose(HM_chunkList chunkList, void* p, size_t objSize, enum BlockPurpose purpose) { HM_chunk chunk = HM_getChunkListLastChunk(chunkList); if (NULL == chunk || HM_getChunkSizePastFrontier(chunk) < objSize) { - chunk = HM_allocateChunk(chunkList, objSize); + chunk = HM_allocateChunkWithPurpose(chunkList, objSize, purpose); } assert(NULL != chunk); @@ -511,6 +511,10 @@ pointer HM_storeInchunkList(HM_chunkList chunkList, void* p, size_t objSize) { return frontier; } +pointer HM_storeInChunkList(HM_chunkList chunkList, void* p, size_t objSize) { + return HM_storeInChunkListWithPurpose(chunkList, p, objSize, BLOCK_FOR_UNKNOWN_PURPOSE); +} + HM_HierarchicalHeap HM_getLevelHead(HM_chunk chunk) { assert(chunk != NULL); assert(chunk->levelHead != NULL); diff --git a/runtime/gc/chunk.h b/runtime/gc/chunk.h index 15818d8be..6335b45ea 100644 --- a/runtime/gc/chunk.h +++ b/runtime/gc/chunk.h @@ -266,7 +266,8 @@ pointer HM_shiftChunkStart(HM_chunk chunk, size_t bytes); pointer HM_getChunkStartGap(HM_chunk chunk); /* store the object pointed by p at the end of list and return the address */ -pointer HM_storeInchunkList(HM_chunkList chunkList, void* p, size_t objSize); +pointer HM_storeInChunkList(HM_chunkList chunkList, void* p, size_t objSize); +pointer HM_storeInchunkListWithPurpose(HM_chunkList chunkList, void* p, size_t objSize, enum BlockPurpose purpose); /** diff --git a/runtime/gc/concurrent-collection.c b/runtime/gc/concurrent-collection.c index 3c379b869..8fb97d0f5 100644 --- a/runtime/gc/concurrent-collection.c +++ b/runtime/gc/concurrent-collection.c @@ -865,7 +865,7 @@ void CC_filterPinned( // HM_freeRemSetWithInfo(s, oldRemSet, &infoc); // this reintializes the private remset - HM_freeChunksInListWithInfo(s, &(oldRemSet->private), &infoc, BLOCK_FOR_UNKNOWN_PURPOSE); + HM_freeChunksInListWithInfo(s, &(oldRemSet->private), &infoc, BLOCK_FOR_REMEMBERED_SET); assert (newRemSet.public.firstChunk == NULL); // this moves all data into remset of hh HM_appendRemSet(oldRemSet, &newRemSet); @@ -1151,8 +1151,8 @@ size_t CC_collectWithRoots( /** SAM_NOTE: TODO: deleteList no longer needed, because * block allocator handles that. */ - HM_freeChunksInListWithInfo(s, origList, &infoc, BLOCK_FOR_UNKNOWN_PURPOSE); - HM_freeChunksInListWithInfo(s, deleteList, &infoc, BLOCK_FOR_UNKNOWN_PURPOSE); + HM_freeChunksInListWithInfo(s, origList, &infoc, BLOCK_FOR_HEAP_CHUNK); + HM_freeChunksInListWithInfo(s, deleteList, &infoc, BLOCK_FOR_HEAP_CHUNK); for(HM_chunk chunk = repList->firstChunk; chunk!=NULL; chunk = chunk->nextChunk) { @@ -1168,7 +1168,7 @@ size_t CC_collectWithRoots( assert(isChunkInList(stackChunk, origList)); HM_unlinkChunk(origList, stackChunk); info.freedType = CC_FREED_STACK_CHUNK; - HM_freeChunkWithInfo(s, stackChunk, &infoc, BLOCK_FOR_UNKNOWN_PURPOSE); + HM_freeChunkWithInfo(s, stackChunk, &infoc, BLOCK_FOR_HEAP_CHUNK); info.freedType = CC_FREED_NORMAL_CHUNK; cp->stack = BOGUS_OBJPTR; diff --git a/runtime/gc/concurrent-list.c b/runtime/gc/concurrent-list.c index 7ef53d1d5..e43ed9b4a 100644 --- a/runtime/gc/concurrent-list.c +++ b/runtime/gc/concurrent-list.c @@ -4,10 +4,13 @@ void CC_initConcList(CC_concList concList) { pthread_mutex_init(&(concList->mutex), NULL); } + void allocateChunkInConcList( CC_concList concList, size_t objSize, - HM_chunk lastChunk) { + HM_chunk lastChunk, + enum BlockPurpose purpose) +{ GC_state s = pthread_getspecific(gcstate_key); // pthread_mutex_lock(&concList->mutex); @@ -16,7 +19,7 @@ void allocateChunkInConcList( return; } - HM_chunk chunk = HM_getFreeChunk(s, objSize); + HM_chunk chunk = HM_getFreeChunkWithPurpose(s, objSize, purpose); if (NULL == chunk) { @@ -54,7 +57,7 @@ void allocateChunkInConcList( pthread_mutex_unlock(&concList->mutex); } if (!success) { - HM_freeChunk(s, chunk); + HM_freeChunkWithInfo(s, chunk, NULL, purpose); } // if (!__sync_bool_compare_and_swap(&(concList->lastChunk), lastChunk, chunk)) { @@ -65,20 +68,20 @@ void allocateChunkInConcList( } -pointer CC_storeInConcList(CC_concList concList, void* p, size_t objSize){ +pointer CC_storeInConcListWithPurpose(CC_concList concList, void* p, size_t objSize, enum BlockPurpose purpose){ assert(concList != NULL); // pthread_mutex_lock(&concList->mutex); while(TRUE) { HM_chunk chunk = concList->lastChunk; if (NULL == chunk) { - allocateChunkInConcList(concList, objSize, chunk); + allocateChunkInConcList(concList, objSize, chunk, purpose); continue; } else { pointer frontier = HM_getChunkFrontier(chunk); size_t sizePast = (size_t) (chunk->limit - frontier); if (sizePast < objSize) { - allocateChunkInConcList(concList, objSize, chunk); + allocateChunkInConcList(concList, objSize, chunk, purpose); continue; } diff --git a/runtime/gc/concurrent-list.h b/runtime/gc/concurrent-list.h index 99c1cd30f..6f73d8bb4 100644 --- a/runtime/gc/concurrent-list.h +++ b/runtime/gc/concurrent-list.h @@ -25,7 +25,7 @@ struct CC_concList { #if (defined (MLTON_GC_INTERNAL_FUNCS)) void CC_initConcList(CC_concList concList); -pointer CC_storeInConcList(CC_concList concList, void* p, size_t objSize); +pointer CC_storeInConcListWithPurpose(CC_concList concList, void* p, size_t objSize, enum BlockPurpose purpose); // void CC_foreachObjInList(CC_concList concList, size_t objSize, HM_foreachObjClosure f); // void CC_foreachRemInConc(GC_state s, CC_concList concList, struct HM_foreachDownptrClosure* f); void CC_popAsChunkList(CC_concList concList, HM_chunkList chunkList); diff --git a/runtime/gc/concurrent-stack.c b/runtime/gc/concurrent-stack.c index 325ed4025..d35f061b5 100644 --- a/runtime/gc/concurrent-stack.c +++ b/runtime/gc/concurrent-stack.c @@ -71,7 +71,7 @@ bool CC_stack_data_push(CC_stack_data* stack, void* datum){ stack->storage[stack->size++] = datum; #endif - HM_storeInchunkList(&(stack->storage), &(datum), sizeof(datum)); + HM_storeInChunkList(&(stack->storage), &(datum), sizeof(datum)); pthread_mutex_unlock(&stack->mutex); return TRUE; } diff --git a/runtime/gc/entanglement-suspects.c b/runtime/gc/entanglement-suspects.c index 53de88b80..6ec20c565 100644 --- a/runtime/gc/entanglement-suspects.c +++ b/runtime/gc/entanglement-suspects.c @@ -58,7 +58,7 @@ void clear_suspect( if (pinType(header) == PIN_ANY && unpinDepth < eargs->heapDepth) { /* Not ready to be cleared */ HM_HierarchicalHeap unpinHeap = HM_HH_getHeapAtDepth(s, eargs->thread, unpinDepth); - HM_storeInchunkList(HM_HH_getSuspects(unpinHeap), opp, sizeof(objptr)); + HM_storeInChunkList(HM_HH_getSuspects(unpinHeap), opp, sizeof(objptr)); return; } @@ -69,7 +69,7 @@ void clear_suspect( } else { /*oops something changed in b/w, let's try at the next join*/ - HM_storeInchunkList(eargs->newList, opp, sizeof(objptr)); + HM_storeInChunkList(eargs->newList, opp, sizeof(objptr)); } } @@ -95,7 +95,7 @@ void ES_add(__attribute__((unused)) GC_state s, HM_chunkList es, objptr op) return; } s->cumulativeStatistics->numSuspectsMarked++; - HM_storeInchunkList(es, &op, sizeof(objptr)); + HM_storeInChunkList(es, &op, sizeof(objptr)); } int ES_foreachSuspect( diff --git a/runtime/gc/hierarchical-heap-collection.c b/runtime/gc/hierarchical-heap-collection.c index 3a5de23ab..fe236092b 100644 --- a/runtime/gc/hierarchical-heap-collection.c +++ b/runtime/gc/hierarchical-heap-collection.c @@ -693,7 +693,7 @@ void HM_HHC_collectLocal(uint32_t desiredScope) #endif info.depth = HM_HH_getDepth(hhTail); info.freedType = LGC_FREED_REMSET_CHUNK; - HM_freeChunksInListWithInfo(s, &(remset->private), &infoc, BLOCK_FOR_UNKNOWN_PURPOSE); + HM_freeChunksInListWithInfo(s, &(remset->private), &infoc, BLOCK_FOR_REMEMBERED_SET); } #if ASSERT @@ -1240,7 +1240,7 @@ void copySuspect( return; } uint32_t opDepth = args->toDepth; - HM_storeInchunkList(HM_HH_getSuspects(toSpaceHH(s, args, opDepth)), &new_ptr, sizeof(objptr)); + HM_storeInChunkList(HM_HH_getSuspects(toSpaceHH(s, args, opDepth)), &new_ptr, sizeof(objptr)); } bool headerForwarded(GC_header h) diff --git a/runtime/gc/remembered-set.c b/runtime/gc/remembered-set.c index 2152cd399..87f28b5c0 100644 --- a/runtime/gc/remembered-set.c +++ b/runtime/gc/remembered-set.c @@ -11,10 +11,10 @@ void HM_initRemSet(HM_remSet remSet) { void HM_remember(HM_remSet remSet, HM_remembered remElem, bool conc) { if (!conc) { - HM_storeInchunkList(&(remSet->private), (void*)remElem, sizeof(struct HM_remembered)); + HM_storeInChunkListWithPurpose(&(remSet->private), (void*)remElem, sizeof(struct HM_remembered), BLOCK_FOR_REMEMBERED_SET); } else { - CC_storeInConcList(&(remSet->public), (void *)remElem, sizeof(struct HM_remembered)); + CC_storeInConcListWithPurpose(&(remSet->public), (void *)remElem, sizeof(struct HM_remembered), BLOCK_FOR_REMEMBERED_SET); } } From 281646aedf6d03ed488b1354a919233e38d7b337 Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Mon, 3 Oct 2022 15:22:54 -0400 Subject: [PATCH 086/114] track BLOCK_FOR_FORGOTTEN_SET and BLOCK_FOR_GC_WORKLIST --- runtime/gc/block-allocator.c | 8 ++++++++ runtime/gc/block-allocator.h | 1 + runtime/gc/cc-work-list.c | 9 ++++++--- runtime/gc/concurrent-collection.c | 2 +- runtime/gc/concurrent-stack.c | 6 +++--- 5 files changed, 19 insertions(+), 7 deletions(-) diff --git a/runtime/gc/block-allocator.c b/runtime/gc/block-allocator.c index 190cdd9c5..2f7f727a6 100644 --- a/runtime/gc/block-allocator.c +++ b/runtime/gc/block-allocator.c @@ -706,8 +706,10 @@ void logCurrentBlockUsage(GC_state s) { " total mmap'ed %zu\n" " BLOCK_FOR_HEAP_CHUNK %zu (%zu%%)\n" " BLOCK_FOR_REMEMBERED_SET %zu (%zu%%)\n" + " BLOCK_FOR_FORGOTTEN_SET %zu (%zu%%)\n" " BLOCK_FOR_HH_ALLOCATOR %zu (%zu%%)\n" " BLOCK_FOR_UF_ALLOCATOR %zu (%zu%%)\n" + " BLOCK_FOR_GC_WORKLIST %zu (%zu%%)\n" " BLOCK_FOR_UNKNOWN_PURPOSE %zu (%zu%%)\n", now.tv_sec, now.tv_nsec, @@ -719,12 +721,18 @@ void logCurrentBlockUsage(GC_state s) { inUse[BLOCK_FOR_REMEMBERED_SET], (size_t)(100.0 * (double)inUse[BLOCK_FOR_REMEMBERED_SET] / (double)count), + inUse[BLOCK_FOR_FORGOTTEN_SET], + (size_t)(100.0 * (double)inUse[BLOCK_FOR_FORGOTTEN_SET] / (double)count), + inUse[BLOCK_FOR_HH_ALLOCATOR], (size_t)(100.0 * (double)inUse[BLOCK_FOR_HH_ALLOCATOR] / (double)count), inUse[BLOCK_FOR_UF_ALLOCATOR], (size_t)(100.0 * (double)inUse[BLOCK_FOR_UF_ALLOCATOR] / (double)count), + inUse[BLOCK_FOR_GC_WORKLIST], + (size_t)(100.0 * (double)inUse[BLOCK_FOR_GC_WORKLIST] / (double)count), + inUse[BLOCK_FOR_UNKNOWN_PURPOSE], (size_t)(100.0 * (double)inUse[BLOCK_FOR_UNKNOWN_PURPOSE] / (double)count)); } diff --git a/runtime/gc/block-allocator.h b/runtime/gc/block-allocator.h index 602135896..685f6c38b 100644 --- a/runtime/gc/block-allocator.h +++ b/runtime/gc/block-allocator.h @@ -22,6 +22,7 @@ enum BlockPurpose { BLOCK_FOR_FORGOTTEN_SET, BLOCK_FOR_HH_ALLOCATOR, BLOCK_FOR_UF_ALLOCATOR, + BLOCK_FOR_GC_WORKLIST, BLOCK_FOR_UNKNOWN_PURPOSE, NUM_BLOCK_PURPOSES /** Hack to know statically how many there are. Make sure * this comes last in the list. */ diff --git a/runtime/gc/cc-work-list.c b/runtime/gc/cc-work-list.c index 324d4fae0..536af4f85 100644 --- a/runtime/gc/cc-work-list.c +++ b/runtime/gc/cc-work-list.c @@ -10,7 +10,10 @@ void CC_workList_init( HM_chunkList c = &(w->storage); HM_initChunkList(c); // arbitrary, just need an initial chunk - w->currentChunk = HM_allocateChunk(c, sizeof(struct CC_workList_elem)); + w->currentChunk = HM_allocateChunkWithPurpose( + c, + sizeof(struct CC_workList_elem), + BLOCK_FOR_GC_WORKLIST); } void CC_workList_free( @@ -18,7 +21,7 @@ void CC_workList_free( CC_workList w) { HM_chunkList c = &(w->storage); - HM_freeChunksInListWithInfo(s, c, NULL, BLOCK_FOR_UNKNOWN_PURPOSE); + HM_freeChunksInListWithInfo(s, c, NULL, BLOCK_FOR_GC_WORKLIST); w->currentChunk = NULL; } @@ -297,7 +300,7 @@ objptr* CC_workList_pop( if (NULL != chunk->nextChunk) { HM_chunk nextChunk = chunk->nextChunk; HM_unlinkChunk(list, nextChunk); - HM_freeChunk(s, nextChunk); + HM_freeChunkWithInfo(s, nextChunk, NULL, BLOCK_FOR_GC_WORKLIST); } assert(NULL == chunk->nextChunk); diff --git a/runtime/gc/concurrent-collection.c b/runtime/gc/concurrent-collection.c index 8fb97d0f5..fff52c0d6 100644 --- a/runtime/gc/concurrent-collection.c +++ b/runtime/gc/concurrent-collection.c @@ -1064,7 +1064,7 @@ size_t CC_collectWithRoots( forEachObjptrInCCStackBag(s, removedFromCCBag, tryUnmarkAndUnmarkLoop, &lists); unmarkLoop(s, &lists); - HM_freeChunksInList(s, removedFromCCBag); + HM_freeChunksInListWithInfo(s, removedFromCCBag, NULL, BLOCK_FOR_FORGOTTEN_SET); assert(CC_workList_isEmpty(s, &(lists.worklist))); CC_workList_free(s, &(lists.worklist)); diff --git a/runtime/gc/concurrent-stack.c b/runtime/gc/concurrent-stack.c index d35f061b5..15c4535c5 100644 --- a/runtime/gc/concurrent-stack.c +++ b/runtime/gc/concurrent-stack.c @@ -71,7 +71,7 @@ bool CC_stack_data_push(CC_stack_data* stack, void* datum){ stack->storage[stack->size++] = datum; #endif - HM_storeInChunkList(&(stack->storage), &(datum), sizeof(datum)); + HM_storeInChunkListWithPurpose(&(stack->storage), &(datum), sizeof(datum), BLOCK_FOR_FORGOTTEN_SET); pthread_mutex_unlock(&stack->mutex); return TRUE; } @@ -141,7 +141,7 @@ void CC_stack_data_free(CC_stack_data* stack){ #endif void CC_stack_data_free(GC_state s, CC_stack_data* stack) { - HM_freeChunksInList(s, &(stack->storage)); + HM_freeChunksInListWithInfo(s, &(stack->storage), NULL, BLOCK_FOR_FORGOTTEN_SET); } void CC_stack_free(GC_state s, CC_stack* stack) { @@ -154,7 +154,7 @@ void CC_stack_free(GC_state s, CC_stack* stack) { void CC_stack_data_clear(GC_state s, CC_stack_data* stack){ pthread_mutex_lock(&stack->mutex); - HM_freeChunksInList(s, &(stack->storage)); + HM_freeChunksInListWithInfo(s, &(stack->storage), NULL, BLOCK_FOR_FORGOTTEN_SET); pthread_mutex_unlock(&stack->mutex); } From 04238a672ea05cb651fd539f89028e56f59ba8cc Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Tue, 4 Oct 2022 13:49:38 -0400 Subject: [PATCH 087/114] generic Sampler that can be polled at regular intervals, and configuration to sample block usage --- runtime/gc.c | 1 + runtime/gc.h | 1 + runtime/gc/block-allocator.c | 27 +++++++++++--- runtime/gc/block-allocator.h | 9 ++--- runtime/gc/garbage-collection.c | 4 +-- runtime/gc/gc_state.h | 1 + runtime/gc/init.c | 9 ++--- runtime/gc/sampler.c | 63 +++++++++++++++++++++++++++++++++ runtime/gc/sampler.h | 34 ++++++++++++++++++ 9 files changed, 130 insertions(+), 19 deletions(-) create mode 100644 runtime/gc/sampler.c create mode 100644 runtime/gc/sampler.h diff --git a/runtime/gc.c b/runtime/gc.c index df53c2eba..c72adf2f5 100644 --- a/runtime/gc.c +++ b/runtime/gc.c @@ -95,6 +95,7 @@ extern C_Pthread_Key_t gcstate_key; #include "gc/concurrent-list.c" #include "gc/remembered-set.c" #include "gc/rusage.c" +#include "gc/sampler.c" #include "gc/sequence-allocate.c" #include "gc/sequence.c" #include "gc/share.c" diff --git a/runtime/gc.h b/runtime/gc.h index 1660440c5..4015b2bb5 100644 --- a/runtime/gc.h +++ b/runtime/gc.h @@ -27,6 +27,7 @@ typedef GC_state GCState_t; #include "gc/debug.h" #include "gc/logger.h" +#include "gc/sampler.h" #include "gc/block-allocator.h" #include "gc/tls-objects.h" diff --git a/runtime/gc/block-allocator.c b/runtime/gc/block-allocator.c index 2f7f727a6..2d7a00717 100644 --- a/runtime/gc/block-allocator.c +++ b/runtime/gc/block-allocator.c @@ -694,13 +694,15 @@ void queryCurrentBlockUsage(GC_state s, size_t *numBlocks, size_t *blocksInUseAr } -void logCurrentBlockUsage(GC_state s) { +void logCurrentBlockUsage( + GC_state s, + struct timespec *now, + __attribute__((unused)) void *env) +{ size_t count; size_t inUse[NUM_BLOCK_PURPOSES]; queryCurrentBlockUsage(s, &count, (size_t*)inUse); - struct timespec now; - timespec_now(&now); LOG(LM_BLOCK_ALLOCATOR, LL_INFO, "block-allocator(%zu.%.9zu):\n" " total mmap'ed %zu\n" @@ -711,8 +713,8 @@ void logCurrentBlockUsage(GC_state s) { " BLOCK_FOR_UF_ALLOCATOR %zu (%zu%%)\n" " BLOCK_FOR_GC_WORKLIST %zu (%zu%%)\n" " BLOCK_FOR_UNKNOWN_PURPOSE %zu (%zu%%)\n", - now.tv_sec, - now.tv_nsec, + now->tv_sec, + now->tv_nsec, count, inUse[BLOCK_FOR_HEAP_CHUNK], @@ -737,4 +739,19 @@ void logCurrentBlockUsage(GC_state s) { (size_t)(100.0 * (double)inUse[BLOCK_FOR_UNKNOWN_PURPOSE] / (double)count)); } +Sampler newBlockUsageSampler(GC_state s) { + struct SamplerClosure func; + func.fun = logCurrentBlockUsage; + func.env = NULL; + + struct timespec desiredInterval; + desiredInterval.tv_sec = 0; + desiredInterval.tv_nsec = 1000000; // 1 ms + + Sampler result = malloc(sizeof(struct Sampler)); + initSampler(s, result, &func, &desiredInterval); + + return result; +} + #endif diff --git a/runtime/gc/block-allocator.h b/runtime/gc/block-allocator.h index 685f6c38b..6234b5931 100644 --- a/runtime/gc/block-allocator.h +++ b/runtime/gc/block-allocator.h @@ -193,6 +193,10 @@ Blocks allocateBlocks(GC_state s, size_t numBlocks); Blocks allocateBlocksWithPurpose(GC_state s, size_t numBlocks, enum BlockPurpose purpose); +/** Free a group of contiguous blocks. */ +void freeBlocks(GC_state s, Blocks bs, writeFreedBlockInfoFnClosure f); + + /** populate: * *numBlocks := current total number of blocks mmap'ed * blocksInUseArr[p] := current number of blocks allocated for purpose `p` @@ -201,10 +205,7 @@ Blocks allocateBlocksWithPurpose(GC_state s, size_t numBlocks, enum BlockPurpose */ void queryCurrentBlockUsage(GC_state s, size_t *numBlocks, size_t *blocksInUseArr); -void logCurrentBlockUsage(GC_state s); - -/** Free a group of contiguous blocks. */ -void freeBlocks(GC_state s, Blocks bs, writeFreedBlockInfoFnClosure f); +Sampler newBlockUsageSampler(GC_state s); #endif diff --git a/runtime/gc/garbage-collection.c b/runtime/gc/garbage-collection.c index 67d8475b8..e39b80c5a 100644 --- a/runtime/gc/garbage-collection.c +++ b/runtime/gc/garbage-collection.c @@ -106,9 +106,7 @@ void GC_collect (GC_state s, size_t bytesRequested, bool force) { HM_EBR_leaveQuiescentState(s); HM_EBR_enterQuiescentState(s); - if (s->procNumber == 0) { - logCurrentBlockUsage(s); - } + maybeSample(s, s->blockUsageSampler); // HM_HierarchicalHeap h = getThreadCurrent(s)->hierarchicalHeap; // while (h->nextAncestor != NULL) h = h->nextAncestor; diff --git a/runtime/gc/gc_state.h b/runtime/gc/gc_state.h index 8a824ad77..8d1ffb152 100644 --- a/runtime/gc/gc_state.h +++ b/runtime/gc/gc_state.h @@ -32,6 +32,7 @@ struct GC_state { volatile uint32_t atomicState; struct BlockAllocator *blockAllocatorGlobal; struct BlockAllocator *blockAllocatorLocal; + struct Sampler *blockUsageSampler; objptr callFromCHandlerThread; /* Handler for exported C calls (in heap). */ pointer callFromCOpArgsResPtr; /* Pass op, args, and res from exported C call */ struct GC_controls *controls; diff --git a/runtime/gc/init.c b/runtime/gc/init.c index 2cdf267ed..3fd2f50a4 100644 --- a/runtime/gc/init.c +++ b/runtime/gc/init.c @@ -608,13 +608,7 @@ void GC_lateInit(GC_state s) { HM_EBR_init(s); initLocalBlockAllocator(s, initGlobalBlockAllocator(s)); - - struct timespec now; - timespec_now(&now); - LOG(LM_BLOCK_ALLOCATOR, LL_INFO, - "block-allocator(%zu.%.9zu): init", - now.tv_sec, - now.tv_nsec); + s->blockUsageSampler = newBlockUsageSampler(s); s->nextChunkAllocSize = s->controls->allocChunkSize; @@ -648,6 +642,7 @@ void GC_duplicate (GC_state d, GC_state s) { d->wsQueueTop = BOGUS_OBJPTR; d->wsQueueBot = BOGUS_OBJPTR; initLocalBlockAllocator(d, s->blockAllocatorGlobal); + d->blockUsageSampler = s->blockUsageSampler; initFixedSizeAllocator(getHHAllocator(d), sizeof(struct HM_HierarchicalHeap), BLOCK_FOR_HH_ALLOCATOR); initFixedSizeAllocator(getUFAllocator(d), sizeof(struct HM_UnionFindNode), BLOCK_FOR_UF_ALLOCATOR); d->hhEBR = s->hhEBR; diff --git a/runtime/gc/sampler.c b/runtime/gc/sampler.c new file mode 100644 index 000000000..bc7b7ac5f --- /dev/null +++ b/runtime/gc/sampler.c @@ -0,0 +1,63 @@ +/* Copyright (C) 2022 Sam Westrick + * + * MLton is released under a HPND-style license. + * See the file MLton-LICENSE for details. + */ + + +void initSampler( + __attribute__((unused)) GC_state s, + Sampler samp, + SamplerClosure func, + struct timespec *desiredInterval) +{ + samp->func = *func; + samp->desiredInterval = *desiredInterval; + samp->currentEpoch = 0; + timespec_now(&(samp->absoluteStart)); +} + + +static void timespec_mul(struct timespec *dst, size_t multiplier) { + size_t sec = dst->tv_sec; + size_t nsec = dst->tv_nsec; + + size_t nps = 1000L * 1000 * 1000; + + size_t new_nsec = (nsec * multiplier) % nps; + size_t add_sec = (nsec * multiplier) / nps; + + dst->tv_sec = (sec * multiplier) + add_sec; + dst->tv_nsec = new_nsec; +} + + +static inline double timespec_to_seconds(struct timespec *tm) { + return (double)tm->tv_sec + ((double)tm->tv_nsec * 0.000000001); +} + + +void maybeSample(GC_state s, Sampler samp) { + size_t oldEpoch = samp->currentEpoch; + + // compute the time of the last successful sample (relative to start) + struct timespec lastSample; + lastSample = samp->desiredInterval; + timespec_mul(&lastSample, oldEpoch); + + // compare against current time by computing epoch diff + struct timespec now; + timespec_now(&now); + timespec_sub(&now, &(samp->absoluteStart)); + double diff = timespec_to_seconds(&now) - timespec_to_seconds(&lastSample); + long epochDiff = (long)(diff / timespec_to_seconds(&samp->desiredInterval)); + + if (epochDiff < 1) + return; + + size_t newEpoch = oldEpoch + epochDiff; + + if (__sync_bool_compare_and_swap(&samp->currentEpoch, oldEpoch, newEpoch)) { + samp->func.fun(s, &now, samp->func.env); + } +} \ No newline at end of file diff --git a/runtime/gc/sampler.h b/runtime/gc/sampler.h new file mode 100644 index 000000000..c7f9b88c2 --- /dev/null +++ b/runtime/gc/sampler.h @@ -0,0 +1,34 @@ +/* Copyright (C) 2022 Sam Westrick + * + * MLton is released under a HPND-style license. + * See the file MLton-LICENSE for details. + */ + +#ifndef SAMPLER_H_ +#define SAMPLER_H_ + +#if (defined (MLTON_GC_INTERNAL_FUNCS)) + +typedef void (*SamplerFun) (GC_state s, struct timespec *tm, void *env); + +typedef struct SamplerClosure { + SamplerFun fun; + void *env; +} *SamplerClosure; + +typedef struct Sampler { + struct SamplerClosure func; + struct timespec desiredInterval; + struct timespec absoluteStart; + size_t currentEpoch; +} * Sampler; + + +void initSampler(GC_state s, Sampler samp, SamplerClosure func, struct timespec *desiredInterval); + +void maybeSample(GC_state s, Sampler samp); + +#endif /* MLTON_GC_INTERNAL_FUNCS */ + + +#endif /* SAMPLER_H_ */ \ No newline at end of file From 2873f3c5f58c6491b9cb1e49bd77544061b2ac08 Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Tue, 4 Oct 2022 14:48:38 -0400 Subject: [PATCH 088/114] keep track of allocated/freed separately (and the same for mapped/released) --- runtime/gc/block-allocator.c | 128 +++++++++++++++++++++-------------- runtime/gc/block-allocator.h | 18 +++-- 2 files changed, 89 insertions(+), 57 deletions(-) diff --git a/runtime/gc/block-allocator.c b/runtime/gc/block-allocator.c index 2d7a00717..af6c09a47 100644 --- a/runtime/gc/block-allocator.c +++ b/runtime/gc/block-allocator.c @@ -135,9 +135,11 @@ static void initBlockAllocator(GC_state s, BlockAllocator ball) { ball->completelyEmptyGroup.firstSuperBlock = NULL; ball->firstFreedByOther = NULL; - ball->numBlocks = 0; + ball->numBlocksMapped = 0; + ball->numBlocksReleased = 0; for (enum BlockPurpose p = 0; p < NUM_BLOCK_PURPOSES; p++) { - ball->numBlocksInUse[p] = 0; + ball->numBlocksAllocated[p] = 0; + ball->numBlocksFreed[p] = 0; } for (size_t i = 0; i < numMegaBlockSizeClasses; i++) { @@ -241,7 +243,7 @@ static void mmapNewSuperBlocks( prependSuperBlock(getFullnessGroup(s, ball, 0, COMPLETELY_EMPTY), sb); } - ball->numBlocks += count*(SUPERBLOCK_SIZE(s)); + ball->numBlocksMapped += count*(SUPERBLOCK_SIZE(s)); } @@ -265,7 +267,7 @@ static Blocks allocateInSuperBlock( sb->numBlocksFree -= (1 << sb->sizeClass); assert(sb->owner != NULL); - sb->owner->numBlocksInUse[purpose] += (1 << sb->sizeClass); + sb->owner->numBlocksAllocated[purpose] += (1 << sb->sizeClass); result->container = sb; result->numBlocks = 1 << sb->sizeClass; @@ -280,7 +282,7 @@ static Blocks allocateInSuperBlock( sb->numBlocksFree -= (1 << sb->sizeClass); assert(sb->owner != NULL); - sb->owner->numBlocksInUse[purpose] += (1 << sb->sizeClass); + sb->owner->numBlocksAllocated[purpose] += (1 << sb->sizeClass); Blocks bs = (Blocks)result; bs->container = sb; @@ -309,27 +311,7 @@ static void deallocateInSuperBlock( assert( purpose < NUM_BLOCK_PURPOSES ); assert(sb->owner != NULL); - assert(sb->owner->numBlocksInUse[purpose] >= ((size_t)1 << sb->sizeClass)); - sb->owner->numBlocksInUse[purpose] -= (1 << sb->sizeClass); - -#if 0 - size_t nb = 1 << sb->sizeClass; - if (nb <= sb->owner->numBlocksInUse[purpose]) { - sb->owner->numBlocksInUse[purpose] -= nb; - } - else { - // Somehow the tracking is off; perhaps the allocateBlocks/freeBlocks calls - // are mismatched somehow. No worries; we'll just track as best we can by - // assuming that the remainder blocks were marked as unknown purpose. This - // works as long as freeing blocks with purpose is more precise than - // allocating. - size_t amountTracked = sb->owner->numBlocksInUse[purpose]; - size_t amountRem = nb - amountTracked; - sb->owner->numBlocksInUse[purpose] = 0; - assert( sb->owner->numBlocksInUse[BLOCK_FOR_UNKNOWN_PURPOSE] >= amountRem ); - sb->owner->numBlocksInUse[BLOCK_FOR_UNKNOWN_PURPOSE] -= amountRem; - } -#endif + sb->owner->numBlocksFreed[purpose] += (1 << sb->sizeClass); } @@ -437,8 +419,8 @@ static void freeMegaBlock(GC_state s, MegaBlock mb, size_t sizeClass) { (size_t)1 << (s->controls->megablockThreshold - 1)); pthread_mutex_lock(&(global->megaBlockLock)); - global->numBlocks -= nb; - global->numBlocksInUse[purpose] -= nb; + global->numBlocksReleased += nb; + global->numBlocksFreed[purpose] += nb; pthread_mutex_unlock(&(global->megaBlockLock)); return; } @@ -448,8 +430,7 @@ static void freeMegaBlock(GC_state s, MegaBlock mb, size_t sizeClass) { pthread_mutex_lock(&(global->megaBlockLock)); mb->nextMegaBlock = global->megaBlockSizeClass[mbClass].firstMegaBlock; global->megaBlockSizeClass[mbClass].firstMegaBlock = mb; - assert( global->numBlocksInUse[mb->purpose] >= mb->numBlocks ); - global->numBlocksInUse[mb->purpose] -= mb->numBlocks; + global->numBlocksFreed[mb->purpose] += mb->numBlocks; pthread_mutex_unlock(&(global->megaBlockLock)); return; } @@ -487,7 +468,7 @@ static MegaBlock tryFindMegaBlock( if (mb->numBlocks >= numBlocksNeeded) { *mbp = mb->nextMegaBlock; mb->nextMegaBlock = NULL; - global->numBlocksInUse[purpose] += mb->numBlocks; + global->numBlocksAllocated[purpose] += mb->numBlocks; pthread_mutex_unlock(&(global->megaBlockLock)); LOG(LM_CHUNK_POOL, LL_INFO, @@ -518,8 +499,8 @@ static MegaBlock mmapNewMegaBlock(GC_state s, size_t numBlocks, enum BlockPurpos BlockAllocator global = s->blockAllocatorGlobal; pthread_mutex_lock(&(global->megaBlockLock)); - global->numBlocks += numBlocks; - global->numBlocksInUse[purpose] += numBlocks; + global->numBlocksMapped += numBlocks; + global->numBlocksAllocated[purpose] += numBlocks; pthread_mutex_unlock(&(global->megaBlockLock)); MegaBlock mb = (MegaBlock)start; @@ -670,26 +651,38 @@ void freeBlocks(GC_state s, Blocks bs, writeFreedBlockInfoFnClosure f) { } -void queryCurrentBlockUsage(GC_state s, size_t *numBlocks, size_t *blocksInUseArr) { - *numBlocks = 0; +void queryCurrentBlockUsage( + GC_state s, + size_t *numBlocksMapped, + size_t *numBlocksReleased, + size_t *numBlocksAllocated, + size_t *numBlocksFreed) +{ + *numBlocksMapped = 0; + *numBlocksReleased = 0; for (enum BlockPurpose p = 0; p < NUM_BLOCK_PURPOSES; p++) { - blocksInUseArr[p] = 0; + numBlocksAllocated[p] = 0; + numBlocksFreed[p] = 0; } // query local allocators for (uint32_t i = 0; i < s->numberOfProcs; i++) { BlockAllocator ball = s->procStates[i].blockAllocatorLocal; - *numBlocks += ball->numBlocks; + *numBlocksMapped += ball->numBlocksMapped; + *numBlocksReleased += ball->numBlocksReleased; for (enum BlockPurpose p = 0; p < NUM_BLOCK_PURPOSES; p++) { - blocksInUseArr[p] += ball->numBlocksInUse[p]; + numBlocksAllocated[p] += ball->numBlocksAllocated[p]; + numBlocksFreed[p] += ball->numBlocksFreed[p]; } } // query global allocator BlockAllocator global = s->blockAllocatorGlobal; - *numBlocks += global->numBlocks; + *numBlocksMapped += global->numBlocksMapped; + *numBlocksReleased += global->numBlocksReleased; for (enum BlockPurpose p = 0; p < NUM_BLOCK_PURPOSES; p++) { - blocksInUseArr[p] += global->numBlocksInUse[p]; + numBlocksAllocated[p] += global->numBlocksAllocated[p]; + numBlocksFreed[p] += global->numBlocksFreed[p]; } } @@ -699,44 +692,75 @@ void logCurrentBlockUsage( struct timespec *now, __attribute__((unused)) void *env) { - size_t count; + size_t mapped; + size_t released; + size_t allocated[NUM_BLOCK_PURPOSES]; + size_t freed[NUM_BLOCK_PURPOSES]; + queryCurrentBlockUsage(s, &mapped, &released, (size_t*)allocated, (size_t*)freed); + size_t inUse[NUM_BLOCK_PURPOSES]; - queryCurrentBlockUsage(s, &count, (size_t*)inUse); + for (enum BlockPurpose p = 0; p < NUM_BLOCK_PURPOSES; p++) { + if (freed[p] > allocated[p]) { + inUse[p] = 0; + } + else { + inUse[p] = allocated[p] - freed[p]; + } + } + + size_t count = mapped-released; + if (released > mapped) count = 0; LOG(LM_BLOCK_ALLOCATOR, LL_INFO, - "block-allocator(%zu.%.9zu):\n" - " total mmap'ed %zu\n" - " BLOCK_FOR_HEAP_CHUNK %zu (%zu%%)\n" - " BLOCK_FOR_REMEMBERED_SET %zu (%zu%%)\n" - " BLOCK_FOR_FORGOTTEN_SET %zu (%zu%%)\n" - " BLOCK_FOR_HH_ALLOCATOR %zu (%zu%%)\n" - " BLOCK_FOR_UF_ALLOCATOR %zu (%zu%%)\n" - " BLOCK_FOR_GC_WORKLIST %zu (%zu%%)\n" - " BLOCK_FOR_UNKNOWN_PURPOSE %zu (%zu%%)\n", + "block-allocator(%zu.%.9zu)\n" + " currently mapped %zu (= %zu - %zu)\n" + " BLOCK_FOR_HEAP_CHUNK %zu (%zu%%) (= %zu - %zu)\n" + " BLOCK_FOR_REMEMBERED_SET %zu (%zu%%) (= %zu - %zu)\n" + " BLOCK_FOR_FORGOTTEN_SET %zu (%zu%%) (= %zu - %zu)\n" + " BLOCK_FOR_HH_ALLOCATOR %zu (%zu%%) (= %zu - %zu)\n" + " BLOCK_FOR_UF_ALLOCATOR %zu (%zu%%) (= %zu - %zu)\n" + " BLOCK_FOR_GC_WORKLIST %zu (%zu%%) (= %zu - %zu)\n" + " BLOCK_FOR_UNKNOWN_PURPOSE %zu (%zu%%) (= %zu - %zu)\n", now->tv_sec, now->tv_nsec, count, + mapped, + released, inUse[BLOCK_FOR_HEAP_CHUNK], (size_t)(100.0 * (double)inUse[BLOCK_FOR_HEAP_CHUNK] / (double)count), + allocated[BLOCK_FOR_HEAP_CHUNK], + freed[BLOCK_FOR_HEAP_CHUNK], inUse[BLOCK_FOR_REMEMBERED_SET], (size_t)(100.0 * (double)inUse[BLOCK_FOR_REMEMBERED_SET] / (double)count), + allocated[BLOCK_FOR_REMEMBERED_SET], + freed[BLOCK_FOR_REMEMBERED_SET], inUse[BLOCK_FOR_FORGOTTEN_SET], (size_t)(100.0 * (double)inUse[BLOCK_FOR_FORGOTTEN_SET] / (double)count), + allocated[BLOCK_FOR_FORGOTTEN_SET], + freed[BLOCK_FOR_FORGOTTEN_SET], inUse[BLOCK_FOR_HH_ALLOCATOR], (size_t)(100.0 * (double)inUse[BLOCK_FOR_HH_ALLOCATOR] / (double)count), + allocated[BLOCK_FOR_HH_ALLOCATOR], + freed[BLOCK_FOR_HH_ALLOCATOR], inUse[BLOCK_FOR_UF_ALLOCATOR], (size_t)(100.0 * (double)inUse[BLOCK_FOR_UF_ALLOCATOR] / (double)count), + allocated[BLOCK_FOR_UF_ALLOCATOR], + freed[BLOCK_FOR_UF_ALLOCATOR], inUse[BLOCK_FOR_GC_WORKLIST], (size_t)(100.0 * (double)inUse[BLOCK_FOR_GC_WORKLIST] / (double)count), + allocated[BLOCK_FOR_GC_WORKLIST], + freed[BLOCK_FOR_GC_WORKLIST], inUse[BLOCK_FOR_UNKNOWN_PURPOSE], - (size_t)(100.0 * (double)inUse[BLOCK_FOR_UNKNOWN_PURPOSE] / (double)count)); + (size_t)(100.0 * (double)inUse[BLOCK_FOR_UNKNOWN_PURPOSE] / (double)count), + allocated[BLOCK_FOR_UNKNOWN_PURPOSE], + freed[BLOCK_FOR_UNKNOWN_PURPOSE]); } Sampler newBlockUsageSampler(GC_state s) { diff --git a/runtime/gc/block-allocator.h b/runtime/gc/block-allocator.h index 6234b5931..f350b1a40 100644 --- a/runtime/gc/block-allocator.h +++ b/runtime/gc/block-allocator.h @@ -138,8 +138,10 @@ enum FullnessGroup { typedef struct BlockAllocator { - size_t numBlocks; - size_t numBlocksInUse[NUM_BLOCK_PURPOSES]; + size_t numBlocksMapped; + size_t numBlocksReleased; + size_t numBlocksAllocated[NUM_BLOCK_PURPOSES]; + size_t numBlocksFreed[NUM_BLOCK_PURPOSES]; /** There are 3 fullness groups in each size class: * 0 is completely full, i.e. no free blocks available @@ -199,11 +201,17 @@ void freeBlocks(GC_state s, Blocks bs, writeFreedBlockInfoFnClosure f); /** populate: * *numBlocks := current total number of blocks mmap'ed - * blocksInUseArr[p] := current number of blocks allocated for purpose `p` + * blocksAllocated[p] := cumulative number of blocks allocated for purpose `p` + * blocksFreed[p] := cumulative number of blocks freed for purpose `p` * - * The `blocksInUseArr` must be an array of length `NUM_BLOCK_PURPOSES` + * The `blocksAllocated` and `blocksFreed` arrays must have length `NUM_BLOCK_PURPOSES` */ -void queryCurrentBlockUsage(GC_state s, size_t *numBlocks, size_t *blocksInUseArr); +void queryCurrentBlockUsage( + GC_state s, + size_t *numBlocksMapped, + size_t *numBlocksReleased, + size_t *blocksAllocated, + size_t *blocksFreed); Sampler newBlockUsageSampler(GC_state s); From 2da673ffb1fad0f8992f4ee8acfd2af14d9e3176 Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Tue, 4 Oct 2022 15:17:54 -0400 Subject: [PATCH 089/114] add block usage sample interval control --- runtime/gc/block-allocator.c | 5 +--- runtime/gc/controls.h | 1 + runtime/gc/init.c | 55 ++++++++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 4 deletions(-) diff --git a/runtime/gc/block-allocator.c b/runtime/gc/block-allocator.c index af6c09a47..78764536a 100644 --- a/runtime/gc/block-allocator.c +++ b/runtime/gc/block-allocator.c @@ -768,10 +768,7 @@ Sampler newBlockUsageSampler(GC_state s) { func.fun = logCurrentBlockUsage; func.env = NULL; - struct timespec desiredInterval; - desiredInterval.tv_sec = 0; - desiredInterval.tv_nsec = 1000000; // 1 ms - + struct timespec desiredInterval = s->controls->blockUsageSampleInterval; Sampler result = malloc(sizeof(struct Sampler)); initSampler(s, result, &func, &desiredInterval); diff --git a/runtime/gc/controls.h b/runtime/gc/controls.h index e9ff7f394..75af66944 100644 --- a/runtime/gc/controls.h +++ b/runtime/gc/controls.h @@ -64,6 +64,7 @@ struct GC_controls { size_t allocBlocksMinSize; size_t superblockThreshold; // upper bound on size-class of a superblock size_t megablockThreshold; // upper bound on size-class of a megablock (unmap above this threshold) + struct timespec blockUsageSampleInterval; float emptinessFraction; bool debugKeepFreeBlocks; bool manageEntanglement; diff --git a/runtime/gc/init.c b/runtime/gc/init.c index 3fd2f50a4..e233e4cfa 100644 --- a/runtime/gc/init.c +++ b/runtime/gc/init.c @@ -80,6 +80,49 @@ static size_t stringToBytes(const char *s) { die ("Invalid @MLton/@mpl memory amount: %s.", s); } + +static void stringToTime(const char *s, struct timespec *t) { + double d; + char *endptr; + size_t factor; + + d = strtod (s, &endptr); + if (s == endptr) + goto bad; + + switch (*endptr++) { + case 's': + factor = 1; + break; + case 'm': + factor = 1000; + break; + case 'u': + factor = 1000 * 1000; + break; + case 'n': + factor = 1000 * 1000 * 1000; + break; + default: + goto bad; + } + + d /= (double)factor; + size_t sec = (size_t)d; + size_t nsec = (size_t)((d - (double)sec) * 1000000000.0); + + unless (*endptr == '\0' + and 0.0 <= d) + goto bad; + + t->tv_sec = sec; + t->tv_nsec = nsec; + return; + +bad: + die ("Invalid @MLton/@mpl time spec: %s.", s); +} + /* ---------------------------------------------------------------- */ /* GC_init */ /* ---------------------------------------------------------------- */ @@ -327,6 +370,14 @@ int processAtMLton (GC_state s, int start, int argc, char **argv, die("%s megablock-threshold must be at least 1", atName); } s->controls->megablockThreshold = xx; + } else if (0 == strcmp(arg, "block-usage-sample-interval")) { + i++; + if (i == argc || (0 == strcmp (argv[i], "--"))) { + die ("%s block-usage-sample-interval missing argument.", atName); + } + struct timespec tm; + stringToTime(argv[i++], &tm); + s->controls->blockUsageSampleInterval = tm; } else if (0 == strcmp (arg, "collection-type")) { i++; if (i == argc || (0 == strcmp (argv[i], "--"))) { @@ -481,6 +532,10 @@ int GC_init (GC_state s, int argc, char **argv) { s->controls->megablockThreshold = 18; s->controls->manageEntanglement = TRUE; + // default: sample block usage once a second + s->controls->blockUsageSampleInterval.tv_sec = 1; + s->controls->blockUsageSampleInterval.tv_nsec = 0; + /* Not arbitrary; should be at least the page size and must also respect the * limit check coalescing amount in the compiler. */ s->controls->blockSize = max(GC_pageSize(), 4096); From 302158292556045348c21c4c01f8c9f195c32a01 Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Sun, 9 Oct 2022 10:59:18 -0400 Subject: [PATCH 090/114] remove unnecessary locking for global block stats --- runtime/gc/block-allocator.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/runtime/gc/block-allocator.c b/runtime/gc/block-allocator.c index 78764536a..4a8a186ee 100644 --- a/runtime/gc/block-allocator.c +++ b/runtime/gc/block-allocator.c @@ -408,20 +408,18 @@ static void clearOutOtherFrees(GC_state s) { static void freeMegaBlock(GC_state s, MegaBlock mb, size_t sizeClass) { BlockAllocator global = s->blockAllocatorGlobal; + size_t nb = mb->numBlocks; + enum BlockPurpose purpose = mb->purpose; if (sizeClass >= s->controls->megablockThreshold) { - size_t nb = mb->numBlocks; - enum BlockPurpose purpose = mb->purpose; GC_release((pointer)mb, s->controls->blockSize * mb->numBlocks); LOG(LM_CHUNK_POOL, LL_INFO, "Released large allocation of %zu blocks (unmap threshold: %zu)", nb, (size_t)1 << (s->controls->megablockThreshold - 1)); - pthread_mutex_lock(&(global->megaBlockLock)); - global->numBlocksReleased += nb; - global->numBlocksFreed[purpose] += nb; - pthread_mutex_unlock(&(global->megaBlockLock)); + __sync_fetch_and_add(&(global->numBlocksFreed[purpose]), nb); + __sync_fetch_and_add(&(global->numBlocksReleased), nb); return; } @@ -430,8 +428,9 @@ static void freeMegaBlock(GC_state s, MegaBlock mb, size_t sizeClass) { pthread_mutex_lock(&(global->megaBlockLock)); mb->nextMegaBlock = global->megaBlockSizeClass[mbClass].firstMegaBlock; global->megaBlockSizeClass[mbClass].firstMegaBlock = mb; - global->numBlocksFreed[mb->purpose] += mb->numBlocks; pthread_mutex_unlock(&(global->megaBlockLock)); + + __sync_fetch_and_add(&(global->numBlocksFreed[purpose]), nb); return; } @@ -468,9 +467,10 @@ static MegaBlock tryFindMegaBlock( if (mb->numBlocks >= numBlocksNeeded) { *mbp = mb->nextMegaBlock; mb->nextMegaBlock = NULL; - global->numBlocksAllocated[purpose] += mb->numBlocks; pthread_mutex_unlock(&(global->megaBlockLock)); + __sync_fetch_and_add(&(global->numBlocksAllocated[purpose]), mb->numBlocks); + LOG(LM_CHUNK_POOL, LL_INFO, "inspected %zu, satisfied large alloc of %zu blocks using megablock of %zu", count, @@ -498,10 +498,8 @@ static MegaBlock mmapNewMegaBlock(GC_state s, size_t numBlocks, enum BlockPurpos } BlockAllocator global = s->blockAllocatorGlobal; - pthread_mutex_lock(&(global->megaBlockLock)); - global->numBlocksMapped += numBlocks; - global->numBlocksAllocated[purpose] += numBlocks; - pthread_mutex_unlock(&(global->megaBlockLock)); + __sync_fetch_and_add(&(global->numBlocksMapped), numBlocks); + __sync_fetch_and_add(&(global->numBlocksAllocated[purpose]), numBlocks); MegaBlock mb = (MegaBlock)start; mb->numBlocks = numBlocks; From 741981bc72e9c7a25ae5d1dab48bf43ef39da818 Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Tue, 11 Oct 2022 23:30:07 -0400 Subject: [PATCH 091/114] add tracking for entanglement suspects and ebr --- runtime/gc/block-allocator.c | 12 ++++++++++++ runtime/gc/block-allocator.h | 2 ++ runtime/gc/cc-work-list.c | 6 +++++- runtime/gc/concurrent-list.c | 4 ++-- runtime/gc/concurrent-list.h | 3 ++- runtime/gc/ebr.c | 7 +++++-- runtime/gc/entanglement-suspects.c | 8 ++++---- runtime/gc/hierarchical-heap-collection.c | 8 ++++++-- runtime/gc/remembered-set.c | 4 ++-- 9 files changed, 40 insertions(+), 14 deletions(-) diff --git a/runtime/gc/block-allocator.c b/runtime/gc/block-allocator.c index 4a8a186ee..8b18f45b0 100644 --- a/runtime/gc/block-allocator.c +++ b/runtime/gc/block-allocator.c @@ -718,6 +718,8 @@ void logCurrentBlockUsage( " BLOCK_FOR_HH_ALLOCATOR %zu (%zu%%) (= %zu - %zu)\n" " BLOCK_FOR_UF_ALLOCATOR %zu (%zu%%) (= %zu - %zu)\n" " BLOCK_FOR_GC_WORKLIST %zu (%zu%%) (= %zu - %zu)\n" + " BLOCK_FOR_SUSPECTS %zu (%zu%%) (= %zu - %zu)\n" + " BLOCK_FOR_EBR %zu (%zu%%) (= %zu - %zu)\n" " BLOCK_FOR_UNKNOWN_PURPOSE %zu (%zu%%) (= %zu - %zu)\n", now->tv_sec, now->tv_nsec, @@ -755,6 +757,16 @@ void logCurrentBlockUsage( allocated[BLOCK_FOR_GC_WORKLIST], freed[BLOCK_FOR_GC_WORKLIST], + inUse[BLOCK_FOR_SUSPECTS], + (size_t)(100.0 * (double)inUse[BLOCK_FOR_SUSPECTS] / (double)count), + allocated[BLOCK_FOR_SUSPECTS], + freed[BLOCK_FOR_SUSPECTS], + + inUse[BLOCK_FOR_EBR], + (size_t)(100.0 * (double)inUse[BLOCK_FOR_EBR] / (double)count), + allocated[BLOCK_FOR_EBR], + freed[BLOCK_FOR_EBR], + inUse[BLOCK_FOR_UNKNOWN_PURPOSE], (size_t)(100.0 * (double)inUse[BLOCK_FOR_UNKNOWN_PURPOSE] / (double)count), allocated[BLOCK_FOR_UNKNOWN_PURPOSE], diff --git a/runtime/gc/block-allocator.h b/runtime/gc/block-allocator.h index f350b1a40..a7a965228 100644 --- a/runtime/gc/block-allocator.h +++ b/runtime/gc/block-allocator.h @@ -23,6 +23,8 @@ enum BlockPurpose { BLOCK_FOR_HH_ALLOCATOR, BLOCK_FOR_UF_ALLOCATOR, BLOCK_FOR_GC_WORKLIST, + BLOCK_FOR_SUSPECTS, + BLOCK_FOR_EBR, BLOCK_FOR_UNKNOWN_PURPOSE, NUM_BLOCK_PURPOSES /** Hack to know statically how many there are. Make sure * this comes last in the list. */ diff --git a/runtime/gc/cc-work-list.c b/runtime/gc/cc-work-list.c index 536af4f85..18705b6be 100644 --- a/runtime/gc/cc-work-list.c +++ b/runtime/gc/cc-work-list.c @@ -139,7 +139,11 @@ void CC_workList_push( if (chunk->nextChunk != NULL) { chunk = chunk->nextChunk; // this will be an empty chunk } else { - chunk = HM_allocateChunk(list, elemSize); + // chunk = HM_allocateChunk(list, elemSize); + chunk = HM_allocateChunkWithPurpose( + list, + elemSize, + BLOCK_FOR_GC_WORKLIST); } w->currentChunk = chunk; } diff --git a/runtime/gc/concurrent-list.c b/runtime/gc/concurrent-list.c index e43ed9b4a..e34c165ad 100644 --- a/runtime/gc/concurrent-list.c +++ b/runtime/gc/concurrent-list.c @@ -217,8 +217,8 @@ void CC_appendConcList(CC_concList concList1, CC_concList concList2) { pthread_mutex_unlock(&concList1->mutex); } -void CC_freeChunksInConcListWithInfo(GC_state s, CC_concList concList, void *info) { +void CC_freeChunksInConcListWithInfo(GC_state s, CC_concList concList, void *info, enum BlockPurpose purpose) { struct HM_chunkList _chunkList; CC_popAsChunkList(concList, &(_chunkList)); - HM_freeChunksInListWithInfo(s, &(_chunkList), info, BLOCK_FOR_UNKNOWN_PURPOSE); + HM_freeChunksInListWithInfo(s, &(_chunkList), info, purpose); } \ No newline at end of file diff --git a/runtime/gc/concurrent-list.h b/runtime/gc/concurrent-list.h index 6f73d8bb4..dbe042abd 100644 --- a/runtime/gc/concurrent-list.h +++ b/runtime/gc/concurrent-list.h @@ -26,12 +26,13 @@ struct CC_concList { void CC_initConcList(CC_concList concList); pointer CC_storeInConcListWithPurpose(CC_concList concList, void* p, size_t objSize, enum BlockPurpose purpose); + // void CC_foreachObjInList(CC_concList concList, size_t objSize, HM_foreachObjClosure f); // void CC_foreachRemInConc(GC_state s, CC_concList concList, struct HM_foreachDownptrClosure* f); void CC_popAsChunkList(CC_concList concList, HM_chunkList chunkList); HM_chunk CC_getLastChunk (CC_concList concList); -void CC_freeChunksInConcListWithInfo(GC_state s, CC_concList concList, void *info); +void CC_freeChunksInConcListWithInfo(GC_state s, CC_concList concList, void *info, enum BlockPurpose purpose); void CC_appendConcList(CC_concList concList1, CC_concList concList2); #endif /* MLTON_GC_INTERNAL_FUNCS */ diff --git a/runtime/gc/ebr.c b/runtime/gc/ebr.c index dfa9072fd..ec7847371 100644 --- a/runtime/gc/ebr.c +++ b/runtime/gc/ebr.c @@ -73,7 +73,7 @@ static void rotateAndReclaim(GC_state s, EBR_shared ebr) } } - HM_freeChunksInList(s, limboBag); + HM_freeChunksInListWithInfo(s, limboBag, NULL, BLOCK_FOR_EBR); HM_initChunkList(limboBag); // clear it out } @@ -153,7 +153,10 @@ void EBR_retire(GC_state s, EBR_shared ebr, void *ptr) // slow path: allocate new chunk - chunk = HM_allocateChunk(limboBag, sizeof(void *)); + chunk = HM_allocateChunkWithPurpose( + limboBag, + sizeof(void *), + BLOCK_FOR_EBR); assert(NULL != chunk && HM_getChunkSizePastFrontier(chunk) >= sizeof(void *)); diff --git a/runtime/gc/entanglement-suspects.c b/runtime/gc/entanglement-suspects.c index 6ec20c565..6275ed4c3 100644 --- a/runtime/gc/entanglement-suspects.c +++ b/runtime/gc/entanglement-suspects.c @@ -58,7 +58,7 @@ void clear_suspect( if (pinType(header) == PIN_ANY && unpinDepth < eargs->heapDepth) { /* Not ready to be cleared */ HM_HierarchicalHeap unpinHeap = HM_HH_getHeapAtDepth(s, eargs->thread, unpinDepth); - HM_storeInChunkList(HM_HH_getSuspects(unpinHeap), opp, sizeof(objptr)); + HM_storeInChunkListWithPurpose(HM_HH_getSuspects(unpinHeap), opp, sizeof(objptr), BLOCK_FOR_SUSPECTS); return; } @@ -69,7 +69,7 @@ void clear_suspect( } else { /*oops something changed in b/w, let's try at the next join*/ - HM_storeInChunkList(eargs->newList, opp, sizeof(objptr)); + HM_storeInChunkListWithPurpose(eargs->newList, opp, sizeof(objptr), BLOCK_FOR_SUSPECTS); } } @@ -95,7 +95,7 @@ void ES_add(__attribute__((unused)) GC_state s, HM_chunkList es, objptr op) return; } s->cumulativeStatistics->numSuspectsMarked++; - HM_storeInChunkList(es, &op, sizeof(objptr)); + HM_storeInChunkListWithPurpose(es, &op, sizeof(objptr), BLOCK_FOR_SUSPECTS); } int ES_foreachSuspect( @@ -154,5 +154,5 @@ void ES_clear(GC_state s, HM_HierarchicalHeap hh) int numSuspects = ES_foreachSuspect(s, &oldList, &fObjptrClosure); s->cumulativeStatistics->numSuspectsCleared+=numSuspects; - HM_freeChunksInList(s, &(oldList)); + HM_freeChunksInListWithInfo(s, &(oldList), NULL, BLOCK_FOR_SUSPECTS); } diff --git a/runtime/gc/hierarchical-heap-collection.c b/runtime/gc/hierarchical-heap-collection.c index fe236092b..3113c8ab5 100644 --- a/runtime/gc/hierarchical-heap-collection.c +++ b/runtime/gc/hierarchical-heap-collection.c @@ -664,7 +664,7 @@ void HM_HHC_collectLocal(uint32_t desiredScope) ES_foreachSuspect(s, suspects, &fObjptrClosure); info.depth = depth; info.freedType = LGC_FREED_SUSPECT_CHUNK; - HM_freeChunksInListWithInfo(s, suspects, &infoc, BLOCK_FOR_UNKNOWN_PURPOSE); + HM_freeChunksInListWithInfo(s, suspects, &infoc, BLOCK_FOR_SUSPECTS); } } @@ -1240,7 +1240,11 @@ void copySuspect( return; } uint32_t opDepth = args->toDepth; - HM_storeInChunkList(HM_HH_getSuspects(toSpaceHH(s, args, opDepth)), &new_ptr, sizeof(objptr)); + HM_storeInChunkListWithPurpose( + HM_HH_getSuspects(toSpaceHH(s, args, opDepth)), + &new_ptr, + sizeof(objptr), + BLOCK_FOR_SUSPECTS); } bool headerForwarded(GC_header h) diff --git a/runtime/gc/remembered-set.c b/runtime/gc/remembered-set.c index 87f28b5c0..f6ef64414 100644 --- a/runtime/gc/remembered-set.c +++ b/runtime/gc/remembered-set.c @@ -189,6 +189,6 @@ void HM_appendRemSet(HM_remSet r1, HM_remSet r2) { } void HM_freeRemSetWithInfo(GC_state s, HM_remSet remSet, void* info) { - HM_freeChunksInListWithInfo(s, &(remSet->private), info, BLOCK_FOR_UNKNOWN_PURPOSE); - CC_freeChunksInConcListWithInfo(s, &(remSet->public), info); + HM_freeChunksInListWithInfo(s, &(remSet->private), info, BLOCK_FOR_REMEMBERED_SET); + CC_freeChunksInConcListWithInfo(s, &(remSet->public), info, BLOCK_FOR_REMEMBERED_SET); } From 6ea1e7e00e5e9b93bd41e783b1c6e27d1390f55d Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Sun, 16 Oct 2022 12:52:54 -0400 Subject: [PATCH 092/114] separate runtime call for clear suspects, and some suspects logging --- basis-library/mlton/thread.sig | 2 ++ basis-library/mlton/thread.sml | 3 +++ basis-library/primitive/prim-mlton.sml | 2 ++ basis-library/schedulers/shh/Scheduler.sml | 5 +++- runtime/gc/entanglement-suspects.c | 30 ++++++++++++++++++++-- runtime/gc/hierarchical-heap.c | 25 +++++++++++++++--- runtime/gc/hierarchical-heap.h | 5 ++++ runtime/gc/thread.c | 12 +++++++++ runtime/gc/thread.h | 2 ++ 9 files changed, 79 insertions(+), 7 deletions(-) diff --git a/basis-library/mlton/thread.sig b/basis-library/mlton/thread.sig index 90b9caaaa..32b09060d 100644 --- a/basis-library/mlton/thread.sig +++ b/basis-library/mlton/thread.sig @@ -69,6 +69,8 @@ signature MLTON_THREAD = (* Move all chunks at the current depth up one level. *) val promoteChunks : thread -> unit + val clearSuspectsAtDepth: thread * int -> unit + (* "put a new thread in the hierarchy *) val moveNewThreadToDepth : thread * int -> unit diff --git a/basis-library/mlton/thread.sml b/basis-library/mlton/thread.sml index 4b3b98659..0b1c9941f 100644 --- a/basis-library/mlton/thread.sml +++ b/basis-library/mlton/thread.sml @@ -90,6 +90,9 @@ struct Prim.moveNewThreadToDepth (t, Word32.fromInt d) fun checkFinishedCCReadyToJoin () = Prim.checkFinishedCCReadyToJoin (gcState ()) + + fun clearSuspectsAtDepth (t, d) = + Prim.clearSuspectsAtDepth (gcState (), t, Word32.fromInt d) end structure Disentanglement = diff --git a/basis-library/primitive/prim-mlton.sml b/basis-library/primitive/prim-mlton.sml index 6bdd58cc4..8b2b982f1 100644 --- a/basis-library/primitive/prim-mlton.sml +++ b/basis-library/primitive/prim-mlton.sml @@ -370,6 +370,8 @@ structure Thread = val setMinLocalCollectionDepth = _import "GC_HH_setMinLocalCollectionDepth" runtime private: thread * Word32.word -> unit; val mergeThreads = _import "GC_HH_mergeThreads" runtime private: thread * thread -> unit; val promoteChunks = _import "GC_HH_promoteChunks" runtime private: thread -> unit; + val clearSuspectsAtDepth = _import "GC_HH_clearSuspectsAtDepth" runtime private: + GCState.t * thread * Word32.word -> unit; val decheckFork = _import "GC_HH_decheckFork" runtime private: GCState.t * Word64.word ref * Word64.word ref -> unit; diff --git a/basis-library/schedulers/shh/Scheduler.sml b/basis-library/schedulers/shh/Scheduler.sml index 2347fdc4f..74fb320aa 100644 --- a/basis-library/schedulers/shh/Scheduler.sml +++ b/basis-library/schedulers/shh/Scheduler.sml @@ -336,7 +336,8 @@ struct val gr = if popDiscard () then - ( HH.promoteChunks thread + ( HH.clearSuspectsAtDepth (thread, depth) + ; HH.promoteChunks thread ; HH.setDepth (thread, depth) ; DE.decheckJoin (tidLeft, tidRight) (* ; dbgmsg' (fn _ => "join fast at depth " ^ Int.toString depth) *) @@ -358,6 +359,7 @@ struct val tidRight = DE.decheckGetTid t in HH.mergeThreads (thread, t); + HH.clearSuspectsAtDepth (thread, depth); HH.promoteChunks thread; HH.setDepth (thread, depth); DE.decheckJoin (tidLeft, tidRight); @@ -414,6 +416,7 @@ struct ; setQueueDepth (myWorkerId ()) depth ) + val _ = HH.clearSuspectsAtDepth (thread, depth) val _ = HH.promoteChunks thread val _ = HH.setDepth (thread, depth) (* val _ = dbgmsg' (fn _ => "join CC at depth " ^ Int.toString depth) *) diff --git a/runtime/gc/entanglement-suspects.c b/runtime/gc/entanglement-suspects.c index 6275ed4c3..c43d45b10 100644 --- a/runtime/gc/entanglement-suspects.c +++ b/runtime/gc/entanglement-suspects.c @@ -138,21 +138,47 @@ void ES_move(HM_chunkList list1, HM_chunkList list2) { HM_initChunkList(list2); } +static size_t SUSPECTS_THRESHOLD = 10000; + + void ES_clear(GC_state s, HM_HierarchicalHeap hh) { + struct timespec startTime; + struct timespec stopTime; + HM_chunkList es = HM_HH_getSuspects(hh); uint32_t heapDepth = HM_HH_getDepth(hh); struct HM_chunkList oldList = *(es); HM_initChunkList(HM_HH_getSuspects(hh)); + + size_t numSuspects = HM_getChunkListUsedSize(&oldList) / sizeof(objptr); + if (numSuspects >= SUSPECTS_THRESHOLD) { + timespec_now(&startTime); + } + struct ES_clearArgs eargs = { .newList = HM_HH_getSuspects(hh), .heapDepth = heapDepth, .thread = getThreadCurrent(s) }; + struct GC_foreachObjptrClosure fObjptrClosure = {.fun = clear_suspect, .env = &(eargs)}; - int numSuspects = ES_foreachSuspect(s, &oldList, &fObjptrClosure); - s->cumulativeStatistics->numSuspectsCleared+=numSuspects; + int ns = ES_foreachSuspect(s, &oldList, &fObjptrClosure); + assert(numSuspects == ns); + s->cumulativeStatistics->numSuspectsCleared += numSuspects; HM_freeChunksInListWithInfo(s, &(oldList), NULL, BLOCK_FOR_SUSPECTS); + + if (numSuspects >= SUSPECTS_THRESHOLD) { + timespec_now(&stopTime); + timespec_sub(&stopTime, &startTime); + LOG(LM_HIERARCHICAL_HEAP, LL_FORCE, + "time to clear %zu suspects at depth %u: %d.%09d", + numSuspects, + HM_HH_getDepth(hh), + stopTime.tv_sec, + stopTime.tv_nsec + ); + } } diff --git a/runtime/gc/hierarchical-heap.c b/runtime/gc/hierarchical-heap.c index 0a5b631a8..9d4a028c1 100644 --- a/runtime/gc/hierarchical-heap.c +++ b/runtime/gc/hierarchical-heap.c @@ -340,6 +340,26 @@ void HM_HH_merge( assertInvariants(parentThread); } + +void HM_HH_clearSuspectsAtDepth( + GC_state s, + GC_thread thread, + uint32_t targetDepth) +{ + // walk to find heap; only clear suspects at the target depth + for (HM_HierarchicalHeap cursor = thread->hierarchicalHeap; + NULL != cursor; + cursor = cursor->nextAncestor) + { + uint32_t d = HM_HH_getDepth(cursor); + if (d <= targetDepth) { + if (d == targetDepth) ES_clear(s, cursor); + return; + } + } +} + + void HM_HH_promoteChunks( GC_state s, GC_thread thread) @@ -350,7 +370,6 @@ void HM_HH_promoteChunks( { /* no need to do anything; this function only guarantees that the * current depth has been completely evacuated. */ - ES_clear(s, thread->hierarchicalHeap); return; } @@ -458,9 +477,6 @@ void HM_HH_promoteChunks( } assert(hh == thread->hierarchicalHeap); - ES_clear(s, thread->hierarchicalHeap); - assert(hh == thread->hierarchicalHeap); - #if ASSERT assert(hh == thread->hierarchicalHeap); uint32_t newDepth = HM_HH_getDepth(hh); @@ -470,6 +486,7 @@ void HM_HH_promoteChunks( #endif } + bool HM_HH_isLevelHead(HM_HierarchicalHeap hh) { return (NULL != hh) diff --git a/runtime/gc/hierarchical-heap.h b/runtime/gc/hierarchical-heap.h index f7a4fa343..7add64a41 100644 --- a/runtime/gc/hierarchical-heap.h +++ b/runtime/gc/hierarchical-heap.h @@ -160,6 +160,11 @@ void HM_HH_resetList(pointer threadp); void mergeCompletedCCs(GC_state s, HM_HierarchicalHeap hh); +void HM_HH_clearSuspectsAtDepth( + GC_state s, + GC_thread thread, + uint32_t targetDepth); + /** Very fancy (constant-space) loop that frees each dependant union-find * node of hh. Specifically, calls this on each dependant ufnode: diff --git a/runtime/gc/thread.c b/runtime/gc/thread.c index 495843cde..1455dd5c7 100644 --- a/runtime/gc/thread.c +++ b/runtime/gc/thread.c @@ -164,6 +164,18 @@ void GC_HH_promoteChunks(pointer threadp) { HM_HH_promoteChunks(s, thread); } +void GC_HH_clearSuspectsAtDepth(GC_state s, pointer threadp, uint32_t depth) { + getStackCurrent(s)->used = sizeofGCStateCurrentStackUsed(s); + getThreadCurrent(s)->exnStack = s->exnStack; + HM_HH_updateValues(getThreadCurrent(s), s->frontier); + assert(threadAndHeapOkay(s)); + + GC_thread thread = threadObjptrToStruct(s, pointerToObjptr(threadp, NULL)); + assert(thread != NULL); + assert(thread->hierarchicalHeap != NULL); + HM_HH_clearSuspectsAtDepth(s, thread, depth); +} + void GC_HH_moveNewThreadToDepth(pointer threadp, uint32_t depth) { GC_state s = pthread_getspecific(gcstate_key); GC_thread thread = threadObjptrToStruct(s, pointerToObjptr(threadp, NULL)); diff --git a/runtime/gc/thread.h b/runtime/gc/thread.h index 0c7c03c23..7633daee9 100644 --- a/runtime/gc/thread.h +++ b/runtime/gc/thread.h @@ -129,6 +129,8 @@ PRIVATE void GC_HH_setMinLocalCollectionDepth(pointer thread, Word32 depth); */ PRIVATE void GC_HH_moveNewThreadToDepth(pointer thread, Word32 depth); +PRIVATE void GC_HH_clearSuspectsAtDepth(GC_state s, pointer threadp, uint32_t depth); + PRIVATE Bool GC_HH_checkFinishedCCReadyToJoin(GC_state s); #endif /* MLTON_GC_INTERNAL_BASIS */ From 12073fde4429aa497ccadb12bb58bc9f8ee4ec40 Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Mon, 17 Oct 2022 15:42:18 -0400 Subject: [PATCH 093/114] performance improvement: dont assume quiescent for HM_EBR (chunk management);signifiicantly less overhead on entangled read barriers --- runtime/gc/assign.c | 8 ++++---- runtime/gc/garbage-collection.c | 2 +- runtime/gc/hierarchical-heap-collection.c | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/runtime/gc/assign.c b/runtime/gc/assign.c index ccf7325af..4a80545d3 100644 --- a/runtime/gc/assign.c +++ b/runtime/gc/assign.c @@ -21,14 +21,14 @@ objptr Assignable_decheckObjptr(objptr dst, objptr src) return src; } - HM_EBR_leaveQuiescentState(s); + // HM_EBR_leaveQuiescentState(s); if (!decheck(s, src)) { assert (isMutable(s, dstp)); new_src = manage_entangled(s, src, getThreadCurrent(s)->decheckState); assert (isPinned(new_src)); } - HM_EBR_enterQuiescentState(s); + // HM_EBR_enterQuiescentState(s); assert (!hasFwdPtr(objptrToPointer(new_src, NULL))); return new_src; } @@ -48,7 +48,7 @@ objptr Assignable_readBarrier( { return ptr; } - HM_EBR_leaveQuiescentState(s); + // HM_EBR_leaveQuiescentState(s); if (!decheck(s, ptr)) { assert (ES_contains(NULL, obj)); @@ -64,7 +64,7 @@ objptr Assignable_readBarrier( // } ptr = manage_entangled(s, ptr, getThreadCurrent(s)->decheckState); } - HM_EBR_enterQuiescentState(s); + // HM_EBR_enterQuiescentState(s); assert (!hasFwdPtr(objptrToPointer(ptr, NULL))); return ptr; diff --git a/runtime/gc/garbage-collection.c b/runtime/gc/garbage-collection.c index e39b80c5a..ff92abe05 100644 --- a/runtime/gc/garbage-collection.c +++ b/runtime/gc/garbage-collection.c @@ -104,7 +104,7 @@ void GC_collect (GC_state s, size_t bytesRequested, bool force) { // ebr for chunks HM_EBR_leaveQuiescentState(s); - HM_EBR_enterQuiescentState(s); + // HM_EBR_enterQuiescentState(s); maybeSample(s, s->blockUsageSampler); diff --git a/runtime/gc/hierarchical-heap-collection.c b/runtime/gc/hierarchical-heap-collection.c index 3113c8ab5..90ec724f7 100644 --- a/runtime/gc/hierarchical-heap-collection.c +++ b/runtime/gc/hierarchical-heap-collection.c @@ -730,7 +730,7 @@ void HM_HHC_collectLocal(uint32_t desiredScope) } HM_EBR_leaveQuiescentState(s); - HM_EBR_enterQuiescentState(s); + // HM_EBR_enterQuiescentState(s); /* after everything has been scavenged, we have to move the pinned chunks */ depth = thread->currentDepth + 1; From 7b2ce2b3278adcfc517a9b40494d147fbd2db26f Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Tue, 18 Oct 2022 13:51:41 -0400 Subject: [PATCH 094/114] initial brainstorm of interface for scheduler to handle clearing suspects --- runtime/gc/entanglement-suspects.c | 171 ++++++++++++++++++++++++++++- runtime/gc/entanglement-suspects.h | 25 +++++ 2 files changed, 193 insertions(+), 3 deletions(-) diff --git a/runtime/gc/entanglement-suspects.c b/runtime/gc/entanglement-suspects.c index c43d45b10..2416b82b8 100644 --- a/runtime/gc/entanglement-suspects.c +++ b/runtime/gc/entanglement-suspects.c @@ -59,17 +59,20 @@ void clear_suspect( /* Not ready to be cleared */ HM_HierarchicalHeap unpinHeap = HM_HH_getHeapAtDepth(s, eargs->thread, unpinDepth); HM_storeInChunkListWithPurpose(HM_HH_getSuspects(unpinHeap), opp, sizeof(objptr), BLOCK_FOR_SUSPECTS); + eargs->numMoved++; return; } GC_header newHeader = header & ~(SUSPECT_MASK); if (__sync_bool_compare_and_swap(getHeaderp(p), header, newHeader)) { /* clearing successful */ + eargs->numCleared++; return; } else { /*oops something changed in b/w, let's try at the next join*/ HM_storeInChunkListWithPurpose(eargs->newList, opp, sizeof(objptr), BLOCK_FOR_SUSPECTS); + eargs->numFailed++; } } @@ -159,7 +162,10 @@ void ES_clear(GC_state s, HM_HierarchicalHeap hh) struct ES_clearArgs eargs = { .newList = HM_HH_getSuspects(hh), .heapDepth = heapDepth, - .thread = getThreadCurrent(s) + .thread = getThreadCurrent(s), + .numMoved = 0, + .numCleared = 0, + .numFailed = 0 }; struct GC_foreachObjptrClosure fObjptrClosure = @@ -170,15 +176,174 @@ void ES_clear(GC_state s, HM_HierarchicalHeap hh) HM_freeChunksInListWithInfo(s, &(oldList), NULL, BLOCK_FOR_SUSPECTS); + if (eargs.numFailed > 0) { + LOG(LM_HIERARCHICAL_HEAP, LL_FORCE, + "WARNING: %zu failed suspect clears", + eargs.numFailed); + } + if (numSuspects >= SUSPECTS_THRESHOLD) { timespec_now(&stopTime); timespec_sub(&stopTime, &startTime); LOG(LM_HIERARCHICAL_HEAP, LL_FORCE, - "time to clear %zu suspects at depth %u: %d.%09d", + "time to process %zu suspects (%zu cleared, %zu moved) at depth %u: %ld.%09ld", numSuspects, + eargs.numCleared, + eargs.numMoved, HM_HH_getDepth(hh), - stopTime.tv_sec, + (long)stopTime.tv_sec, stopTime.tv_nsec ); } } + + +size_t ES_numSuspects( + __attribute__((unused)) GC_state s, + HM_HierarchicalHeap hh) +{ + return HM_getChunkListUsedSize(HM_HH_getSuspects(hh)) / sizeof(objptr); +} + + +ES_clearSet ES_takeClearSet( + __attribute__((unused)) GC_state s, + HM_HierarchicalHeap hh) +{ + ES_clearSet result = malloc(sizeof(struct ES_clearSet)); + HM_chunkList es = HM_HH_getSuspects(hh); + struct HM_chunkList oldList = *es; + HM_initChunkList(es); + + size_t numChunks = 0; + for (HM_chunk cursor = HM_getChunkListFirstChunk(&oldList); + cursor != NULL; + cursor = cursor->nextChunk) + { + numChunks++; + } + + HM_chunk *chunkArray = malloc(numChunks * sizeof(HM_chunk)); + result->chunkArray = chunkArray; + result->lenChunkArray = numChunks; + result->depth = HM_HH_getDepth(hh); + + size_t i = 0; + for (HM_chunk cursor = HM_getChunkListFirstChunk(&oldList); + cursor != NULL; + cursor = cursor->nextChunk) + { + chunkArray[i] = cursor; + i++; + } + + return result; +} + + +size_t ES_numChunksInClearSet( + __attribute__((unused)) GC_state s, + ES_clearSet es) +{ + return es->lenChunkArray; +} + + +void clear_suspect_par_safe( + __attribute__((unused)) GC_state s, + objptr *opp, + objptr op, + struct HM_chunkList *output, + size_t lenOutput) +{ + pointer p = objptrToPointer(op, NULL); + GC_header header = getHeader(p); + uint32_t unpinDepth = (header & UNPIN_DEPTH_MASK) >> UNPIN_DEPTH_SHIFT; + + // Note: lenOutput == depth of heap whose suspects we are clearing + if (pinType(header) == PIN_ANY && unpinDepth < lenOutput) { + /* Not ready to be cleared; move it instead */ + HM_storeInChunkListWithPurpose(&(output[unpinDepth]), opp, sizeof(objptr), BLOCK_FOR_SUSPECTS); + return; + } + + GC_header newHeader = header & ~(SUSPECT_MASK); + if (__sync_bool_compare_and_swap(getHeaderp(p), header, newHeader)) { + /* clearing successful */ + return; + } + else { + // oops, something changed in between + // Is this possible? + // - Seems like it could be, if there is a CGC happening + // simultaneously at the same level (and it's marking/unmarking objects)? + // - If this is the only possibility, then we should be able to just + // do the CAS with newHeader in a loop? + DIE("TODO: is this possible?"); + } +} + + +ES_finishedClearSetGrain ES_processClearSetGrain( + GC_state s, + ES_clearSet es, + size_t start, + size_t stop) +{ + ES_finishedClearSetGrain result = malloc(sizeof(struct ES_finishedClearSetGrain)); + struct HM_chunkList *output = malloc(es->depth * sizeof(struct HM_chunkList)); + result->output = output; + result->lenOutput = es->depth; + + // initialize output + for (uint32_t i = 0; i < es->depth; i++) { + HM_initChunkList(&(output[i])); + } + + // process each input chunk + for (size_t i = start; i < stop; i++) { + HM_chunk chunk = es->chunkArray[i]; + pointer p = HM_getChunkStart(chunk); + pointer frontier = HM_getChunkFrontier(chunk); + while (p < frontier) + { + objptr* opp = (objptr*)p; + objptr op = *opp; + if (isObjptr(op)) { + clear_suspect_par_safe(s, opp, op, output, es->depth); + } + p += sizeof(objptr); + } + } + + return result; +} + + +void ES_commitFinishedClearSetGrain( + GC_state s, + GC_thread thread, + ES_finishedClearSetGrain es) +{ + for (size_t i = 0; i < es->lenOutput; i++) { + HM_chunkList list = &(es->output[i]); + if (HM_getChunkListSize(list) == 0) + continue; + + HM_HierarchicalHeap dest = HM_HH_getHeapAtDepth(s, thread, i); + HM_appendChunkList(HM_HH_getSuspects(dest), list); + HM_initChunkList(list); + } + + free(es->output); + free(es); +} + + +void ES_deleteClearSet(GC_state s, ES_clearSet es) { + for (size_t i = 0; i < es->lenChunkArray; i++) { + HM_freeChunkWithInfo(s, es->chunkArray[i], NULL, BLOCK_FOR_SUSPECTS); + } + free(es->chunkArray); + free(es); +} \ No newline at end of file diff --git a/runtime/gc/entanglement-suspects.h b/runtime/gc/entanglement-suspects.h index fb82535da..013dfc511 100644 --- a/runtime/gc/entanglement-suspects.h +++ b/runtime/gc/entanglement-suspects.h @@ -10,8 +10,24 @@ typedef struct ES_clearArgs { HM_chunkList newList; uint32_t heapDepth; GC_thread thread; + size_t numMoved; + size_t numFailed; + size_t numCleared; } * ES_clearArgs; + +typedef struct ES_clearSet { + HM_chunk *chunkArray; // array of chunks that need to be processed + size_t lenChunkArray; // len(chunkArray) + uint32_t depth; +} * ES_clearSet; + +typedef struct ES_finishedClearSetGrain { + struct HM_chunkList *output; // output[d]: unsuccessful clears that were moved to depth d + size_t lenOutput; // len(output array) +} * ES_finishedClearSetGrain; + + bool ES_mark(__attribute__((unused)) GC_state s, objptr op); void ES_unmark(GC_state s, objptr op); @@ -23,6 +39,15 @@ HM_chunkList ES_append(GC_state s, HM_chunkList es1, HM_chunkList es2); void ES_clear(GC_state s, HM_HierarchicalHeap hh); +// These functions allow us to clear a suspect set in parallel, +// by integrating with the scheduler. The idea is... +size_t ES_numSuspects(GC_state s, HM_HierarchicalHeap hh); +ES_clearSet ES_takeClearSet(GC_state s, HM_HierarchicalHeap hh); +size_t ES_numChunksInClearSet(GC_state s, ES_clearSet es); +ES_finishedClearSetGrain ES_processClearSetGrain(GC_state s, ES_clearSet es, size_t start, size_t stop); +void ES_commitFinishedClearSetGrain(GC_state s, GC_thread thread, ES_finishedClearSetGrain es); +void ES_deleteClearSet(GC_state s, ES_clearSet es); + void ES_move(HM_chunkList list1, HM_chunkList list2); int ES_foreachSuspect(GC_state s, HM_chunkList storage, struct GC_foreachObjptrClosure * fObjptrClosure); From 15a0578e8e8d53d3854bb42e6de5aa2dc2f0cba7 Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Tue, 18 Oct 2022 15:10:44 -0400 Subject: [PATCH 095/114] clear suspects in parallel if needed --- basis-library/mlton/thread.sig | 8 ++ basis-library/mlton/thread.sml | 21 ++++ basis-library/primitive/prim-mlton.sml | 12 ++ basis-library/schedulers/shh/Scheduler.sml | 135 ++++++++++++++++++++- runtime/gc/entanglement-suspects.c | 25 ++++ runtime/gc/entanglement-suspects.h | 2 + runtime/gc/thread.c | 57 +++++++++ runtime/gc/thread.h | 7 ++ 8 files changed, 261 insertions(+), 6 deletions(-) diff --git a/basis-library/mlton/thread.sig b/basis-library/mlton/thread.sig index 32b09060d..2a3fae6da 100644 --- a/basis-library/mlton/thread.sig +++ b/basis-library/mlton/thread.sig @@ -42,6 +42,8 @@ signature MLTON_THREAD = structure HierarchicalHeap : sig type thread = Basic.t + type clear_set + type finished_clear_set_grain (* The level (depth) of a thread's heap in the hierarchy. *) val getDepth : thread -> int @@ -70,6 +72,12 @@ signature MLTON_THREAD = val promoteChunks : thread -> unit val clearSuspectsAtDepth: thread * int -> unit + val numSuspectsAtDepth: thread * int -> int + val takeClearSetAtDepth: thread * int -> clear_set + val numChunksInClearSet: clear_set -> int + val processClearSetGrain: clear_set * int * int -> finished_clear_set_grain + val commitFinishedClearSetGrain: thread * finished_clear_set_grain -> unit + val deleteClearSet: clear_set -> unit (* "put a new thread in the hierarchy *) val moveNewThreadToDepth : thread * int -> unit diff --git a/basis-library/mlton/thread.sml b/basis-library/mlton/thread.sml index 0b1c9941f..8ef2d1c95 100644 --- a/basis-library/mlton/thread.sml +++ b/basis-library/mlton/thread.sml @@ -73,6 +73,9 @@ struct type thread = Basic.t type t = MLtonPointer.t + type clear_set = MLtonPointer.t + type finished_clear_set_grain = MLtonPointer.t + fun forceLeftHeap (myId, t) = Prim.forceLeftHeap(Word32.fromInt myId, t) fun forceNewChunk () = Prim.forceNewChunk (gcState ()) fun registerCont (kl, kr, k, t) = Prim.registerCont(kl, kr, k, t) @@ -93,6 +96,24 @@ struct fun clearSuspectsAtDepth (t, d) = Prim.clearSuspectsAtDepth (gcState (), t, Word32.fromInt d) + + fun numSuspectsAtDepth (t, d) = + Word64.toInt (Prim.numSuspectsAtDepth (gcState (), t, Word32.fromInt d)) + + fun takeClearSetAtDepth (t, d) = + Prim.takeClearSetAtDepth (gcState (), t, Word32.fromInt d) + + fun numChunksInClearSet c = + Word64.toInt (Prim.numChunksInClearSet (gcState (), c)) + + fun processClearSetGrain (c, start, stop) = + Prim.processClearSetGrain (gcState (), c, Word64.fromInt start, Word64.fromInt stop) + + fun commitFinishedClearSetGrain (t, fcsg) = + Prim.commitFinishedClearSetGrain (gcState (), t, fcsg) + + fun deleteClearSet c = + Prim.deleteClearSet (gcState (), c) end structure Disentanglement = diff --git a/basis-library/primitive/prim-mlton.sml b/basis-library/primitive/prim-mlton.sml index 8b2b982f1..2273b4b41 100644 --- a/basis-library/primitive/prim-mlton.sml +++ b/basis-library/primitive/prim-mlton.sml @@ -372,6 +372,18 @@ structure Thread = val promoteChunks = _import "GC_HH_promoteChunks" runtime private: thread -> unit; val clearSuspectsAtDepth = _import "GC_HH_clearSuspectsAtDepth" runtime private: GCState.t * thread * Word32.word -> unit; + val numSuspectsAtDepth = _import "GC_HH_numSuspectsAtDepth" runtime private: + GCState.t * thread * Word32.word -> Word64.word; + val takeClearSetAtDepth = _import "GC_HH_takeClearSetAtDepth" runtime private: + GCState.t * thread * Word32.word -> Pointer.t; + val numChunksInClearSet = _import "GC_HH_numChunksInClearSet" runtime private: + GCState.t * Pointer.t -> Word64.word; + val processClearSetGrain = _import "GC_HH_processClearSetGrain" runtime private: + GCState.t * Pointer.t * Word64.word * Word64.word -> Pointer.t; + val commitFinishedClearSetGrain = _import "GC_HH_commitFinishedClearSetGrain" runtime private: + GCState.t * thread * Pointer.t -> unit; + val deleteClearSet = _import "GC_HH_deleteClearSet" runtime private: + GCState.t * Pointer.t -> unit; val decheckFork = _import "GC_HH_decheckFork" runtime private: GCState.t * Word64.word ref * Word64.word ref -> unit; diff --git a/basis-library/schedulers/shh/Scheduler.sml b/basis-library/schedulers/shh/Scheduler.sml index 74fb320aa..fed4fa5d1 100644 --- a/basis-library/schedulers/shh/Scheduler.sml +++ b/basis-library/schedulers/shh/Scheduler.sml @@ -336,10 +336,10 @@ struct val gr = if popDiscard () then - ( HH.clearSuspectsAtDepth (thread, depth) - ; HH.promoteChunks thread + ( HH.promoteChunks thread ; HH.setDepth (thread, depth) ; DE.decheckJoin (tidLeft, tidRight) + ; maybeParClearSuspectsAtDepth (thread, depth) (* ; dbgmsg' (fn _ => "join fast at depth " ^ Int.toString depth) *) (* ; HH.forceNewChunk () *) ; let @@ -359,11 +359,11 @@ struct val tidRight = DE.decheckGetTid t in HH.mergeThreads (thread, t); - HH.clearSuspectsAtDepth (thread, depth); HH.promoteChunks thread; HH.setDepth (thread, depth); DE.decheckJoin (tidLeft, tidRight); setQueueDepth (myWorkerId ()) depth; + maybeParClearSuspectsAtDepth (thread, depth); (* dbgmsg' (fn _ => "join slow at depth " ^ Int.toString depth); *) case HM.refDerefNoBarrier rightSideResult of NONE => die (fn _ => "scheduler bug: join failed: missing result") @@ -376,8 +376,83 @@ struct (extractResult fr, extractResult gr) end + + and simpleParFork thread depth (f: unit -> unit, g: unit -> unit) : unit = + let + val rightSideThread = ref (NONE: Thread.t option) + val rightSideResult = ref (NONE: unit result option) + val incounter = ref 2 + + val (tidLeft, tidRight) = DE.decheckFork () + + fun g' () = + let + val () = DE.copySyncDepthsFromThread (thread, depth+1) + val () = DE.decheckSetTid tidRight + val gr = result g + val t = Thread.current () + in + rightSideThread := SOME t; + rightSideResult := SOME gr; + if decrementHitsZero incounter then + ( setQueueDepth (myWorkerId ()) (depth+1) + ; threadSwitch thread + ) + else + returnToSched () + end + val _ = push (NormalTask g') + val _ = HH.setDepth (thread, depth + 1) + (* NOTE: off-by-one on purpose. Runtime depths start at 1. *) + val _ = recordForkDepth depth + + val _ = DE.decheckSetTid tidLeft + val fr = result f + val tidLeft = DE.decheckGetTid thread + + val gr = + if popDiscard () then + ( HH.promoteChunks thread + ; HH.setDepth (thread, depth) + ; DE.decheckJoin (tidLeft, tidRight) + ; maybeParClearSuspectsAtDepth (thread, depth) + (* ; dbgmsg' (fn _ => "join fast at depth " ^ Int.toString depth) *) + (* ; HH.forceNewChunk () *) + ; let + val gr = result g + in + (* (gr, DE.decheckGetTid thread) *) + gr + end + ) + else + ( clear () (* this should be safe after popDiscard fails? *) + ; if decrementHitsZero incounter then () else returnToSched () + ; case HM.refDerefNoBarrier rightSideThread of + NONE => die (fn _ => "scheduler bug: join failed") + | SOME t => + let + val tidRight = DE.decheckGetTid t + in + HH.mergeThreads (thread, t); + HH.promoteChunks thread; + HH.setDepth (thread, depth); + DE.decheckJoin (tidLeft, tidRight); + setQueueDepth (myWorkerId ()) depth; + maybeParClearSuspectsAtDepth (thread, depth); + (* dbgmsg' (fn _ => "join slow at depth " ^ Int.toString depth); *) + case HM.refDerefNoBarrier rightSideResult of + NONE => die (fn _ => "scheduler bug: join failed: missing result") + | SOME gr => gr + end + ) + in + (extractResult fr, extractResult gr); + () + end + - fun forkGC thread depth (f : unit -> 'a, g : unit -> 'b) = + and forkGC thread depth (f : unit -> 'a, g : unit -> 'b) = let val heapId = ref (HH.getRoot thread) val gcTaskTuple = (thread, heapId) @@ -416,9 +491,9 @@ struct ; setQueueDepth (myWorkerId ()) depth ) - val _ = HH.clearSuspectsAtDepth (thread, depth) val _ = HH.promoteChunks thread val _ = HH.setDepth (thread, depth) + val _ = maybeParClearSuspectsAtDepth (thread, depth) (* val _ = dbgmsg' (fn _ => "join CC at depth " ^ Int.toString depth) *) in result @@ -440,7 +515,55 @@ struct (f (), g ()) end - fun fork (f, g) = fork' {ccOkayAtThisDepth=true} (f, g) + and fork (f, g) = fork' {ccOkayAtThisDepth=true} (f, g) + + and simpleFork (f, g) = + let + val thread = Thread.current () + val depth = HH.getDepth thread + in + (* if ccOkayAtThisDepth andalso depth = 1 then *) + if depth < Queue.capacity andalso depthOkayForDECheck depth then + simpleParFork thread depth (f, g) + else + (* don't let us hit an error, just sequentialize instead *) + (f (); g ()) + end + + and maybeParClearSuspectsAtDepth (t, d) = + if HH.numSuspectsAtDepth (t, d) <= 10000 then + HH.clearSuspectsAtDepth (t, d) + else + let + val cs = HH.takeClearSetAtDepth (t, d) + val count = HH.numChunksInClearSet cs + val grainSize = 20 + val numGrains = 1 + (count-1) div grainSize + val results = ArrayExtra.alloc numGrains + fun start i = i*grainSize + fun stop i = Int.min (grainSize + start i, count) + + fun processLoop i j = + if j-i = 1 then + Array.update (results, i, HH.processClearSetGrain (cs, start i, stop i)) + else + let + val mid = i + (j-i) div 2 + in + simpleFork (fn _ => processLoop i mid, fn _ => processLoop mid j) + end + + fun commitLoop i = + if i >= numGrains then () else + ( HH.commitFinishedClearSetGrain (t, Array.sub (results, i)) + ; commitLoop (i+1) + ) + in + processLoop 0 numGrains; + commitLoop 0; + HH.deleteClearSet cs; + maybeParClearSuspectsAtDepth (t, d) (* need to go again, just in case *) + end end (* ======================================================================== diff --git a/runtime/gc/entanglement-suspects.c b/runtime/gc/entanglement-suspects.c index 2416b82b8..e8d14e3fe 100644 --- a/runtime/gc/entanglement-suspects.c +++ b/runtime/gc/entanglement-suspects.c @@ -210,6 +210,11 @@ ES_clearSet ES_takeClearSet( __attribute__((unused)) GC_state s, HM_HierarchicalHeap hh) { + struct timespec startTime; + timespec_now(&startTime); + + size_t numSuspects = ES_numSuspects(s, hh); + ES_clearSet result = malloc(sizeof(struct ES_clearSet)); HM_chunkList es = HM_HH_getSuspects(hh); struct HM_chunkList oldList = *es; @@ -227,6 +232,8 @@ ES_clearSet ES_takeClearSet( result->chunkArray = chunkArray; result->lenChunkArray = numChunks; result->depth = HM_HH_getDepth(hh); + result->numSuspects = numSuspects; + result->startTime = startTime; size_t i = 0; for (HM_chunk cursor = HM_getChunkListFirstChunk(&oldList); @@ -341,9 +348,27 @@ void ES_commitFinishedClearSetGrain( void ES_deleteClearSet(GC_state s, ES_clearSet es) { + s->cumulativeStatistics->numSuspectsCleared += es->numSuspects; + for (size_t i = 0; i < es->lenChunkArray; i++) { HM_freeChunkWithInfo(s, es->chunkArray[i], NULL, BLOCK_FOR_SUSPECTS); } + + size_t numSuspects = es->numSuspects; + uint32_t depth = es->depth; + struct timespec startTime = es->startTime; free(es->chunkArray); free(es); + + struct timespec stopTime; + timespec_now(&stopTime); + timespec_sub(&stopTime, &startTime); + LOG(LM_HIERARCHICAL_HEAP, LL_FORCE, + "time to process %zu suspects at depth %u: %ld.%09ld", + numSuspects, + depth, + (long)stopTime.tv_sec, + stopTime.tv_nsec + ); + } \ No newline at end of file diff --git a/runtime/gc/entanglement-suspects.h b/runtime/gc/entanglement-suspects.h index 013dfc511..9afabb960 100644 --- a/runtime/gc/entanglement-suspects.h +++ b/runtime/gc/entanglement-suspects.h @@ -20,6 +20,8 @@ typedef struct ES_clearSet { HM_chunk *chunkArray; // array of chunks that need to be processed size_t lenChunkArray; // len(chunkArray) uint32_t depth; + size_t numSuspects; + struct timespec startTime; } * ES_clearSet; typedef struct ES_finishedClearSetGrain { diff --git a/runtime/gc/thread.c b/runtime/gc/thread.c index 1455dd5c7..4cbc6b209 100644 --- a/runtime/gc/thread.c +++ b/runtime/gc/thread.c @@ -176,6 +176,63 @@ void GC_HH_clearSuspectsAtDepth(GC_state s, pointer threadp, uint32_t depth) { HM_HH_clearSuspectsAtDepth(s, thread, depth); } +Word64 GC_HH_numSuspectsAtDepth(GC_state s, pointer threadp, uint32_t targetDepth) { + getStackCurrent(s)->used = sizeofGCStateCurrentStackUsed(s); + getThreadCurrent(s)->exnStack = s->exnStack; + HM_HH_updateValues(getThreadCurrent(s), s->frontier); + assert(threadAndHeapOkay(s)); + GC_thread thread = threadObjptrToStruct(s, pointerToObjptr(threadp, NULL)); + assert(thread != NULL); + assert(thread->hierarchicalHeap != NULL); + + for (HM_HierarchicalHeap cursor = thread->hierarchicalHeap; + NULL != cursor; + cursor = cursor->nextAncestor) + { + uint32_t d = HM_HH_getDepth(cursor); + if (d <= targetDepth) { + if (d == targetDepth) return (Word64)ES_numSuspects(s, cursor); + return 0; + } + } + + return 0; +} + +Pointer /*ES_clearSet*/ +GC_HH_takeClearSetAtDepth(GC_state s, pointer threadp, uint32_t targetDepth) { + getStackCurrent(s)->used = sizeofGCStateCurrentStackUsed(s); + getThreadCurrent(s)->exnStack = s->exnStack; + HM_HH_updateValues(getThreadCurrent(s), s->frontier); + assert(threadAndHeapOkay(s)); + GC_thread thread = threadObjptrToStruct(s, pointerToObjptr(threadp, NULL)); + assert(thread != NULL); + assert(thread->hierarchicalHeap != NULL); + return (pointer)ES_takeClearSet(s, HM_HH_getHeapAtDepth(s, thread, targetDepth)); +} + +Word64 GC_HH_numChunksInClearSet(GC_state s, pointer clearSet) { + return (Word64)ES_numChunksInClearSet(s, (ES_clearSet)clearSet); +} + +Pointer /*ES_finishedClearSetGrain*/ +GC_HH_processClearSetGrain(GC_state s, pointer clearSet, Word64 start, Word64 stop) { + return (pointer)ES_processClearSetGrain(s, (ES_clearSet)clearSet, (size_t)start, (size_t)stop); +} + +void GC_HH_commitFinishedClearSetGrain(GC_state s, pointer threadp, pointer finClearSetGrain) { + getStackCurrent(s)->used = sizeofGCStateCurrentStackUsed(s); + getThreadCurrent(s)->exnStack = s->exnStack; + HM_HH_updateValues(getThreadCurrent(s), s->frontier); + assert(threadAndHeapOkay(s)); + GC_thread thread = threadObjptrToStruct(s, pointerToObjptr(threadp, NULL)); + ES_commitFinishedClearSetGrain(s, thread, (ES_finishedClearSetGrain)finClearSetGrain); +} + +void GC_HH_deleteClearSet(GC_state s, pointer clearSet) { + ES_deleteClearSet(s, (ES_clearSet)clearSet); +} + void GC_HH_moveNewThreadToDepth(pointer threadp, uint32_t depth) { GC_state s = pthread_getspecific(gcstate_key); GC_thread thread = threadObjptrToStruct(s, pointerToObjptr(threadp, NULL)); diff --git a/runtime/gc/thread.h b/runtime/gc/thread.h index 7633daee9..68047b5ad 100644 --- a/runtime/gc/thread.h +++ b/runtime/gc/thread.h @@ -131,6 +131,13 @@ PRIVATE void GC_HH_moveNewThreadToDepth(pointer thread, Word32 depth); PRIVATE void GC_HH_clearSuspectsAtDepth(GC_state s, pointer threadp, uint32_t depth); +PRIVATE Word64 GC_HH_numSuspectsAtDepth(GC_state s, pointer threadp, uint32_t depth); +PRIVATE Pointer /*ES_clearSet*/ GC_HH_takeClearSetAtDepth(GC_state s, pointer threadp, uint32_t depth); +PRIVATE Word64 GC_HH_numChunksInClearSet(GC_state s, pointer clearSet); +PRIVATE Pointer /*ES_finishedClearSetGrain*/ GC_HH_processClearSetGrain(GC_state s, pointer clearSet, Word64 start, Word64 stop); +PRIVATE void GC_HH_commitFinishedClearSetGrain(GC_state s, pointer threadp, pointer finClearSetGrain); +PRIVATE void GC_HH_deleteClearSet(GC_state s, pointer clearSet); + PRIVATE Bool GC_HH_checkFinishedCCReadyToJoin(GC_state s); #endif /* MLTON_GC_INTERNAL_BASIS */ From fe3156ccdbb3fe461133a4c0a6a9969cf4d9724b Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Wed, 19 Oct 2022 11:06:51 -0400 Subject: [PATCH 096/114] Fix `#if ASSERT` typo Be warned! `#ifdef ASSERT` is true in all builds. This was causing the debug version of `traverseAndCheck` to run in all builds, with significant performance degradation in entangled benchmarks. I cleaned up the header and definition a little here, too. --- runtime/gc/decheck.c | 10 ++++------ runtime/gc/decheck.h | 6 +----- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/runtime/gc/decheck.c b/runtime/gc/decheck.c index 1f2fa462e..048125f63 100644 --- a/runtime/gc/decheck.c +++ b/runtime/gc/decheck.c @@ -319,7 +319,7 @@ bool decheckIsOrdered(GC_thread thread, decheck_tid_t t1) { #ifdef DETECT_ENTANGLEMENT -#ifdef ASSERT +#if ASSERT void traverseAndCheck( GC_state s, __attribute__((unused)) objptr *opp, @@ -340,14 +340,12 @@ void traverseAndCheck( } } #else -void traverseAndCheck( - GC_state s, +void inline traverseAndCheck( + __attribute__((unused)) GC_state s, __attribute__((unused)) objptr *opp, - objptr op, + __attribute__((unused)) objptr op, __attribute__((unused)) void *rawArgs) { - (void)s; - (void)op; return; } #endif diff --git a/runtime/gc/decheck.h b/runtime/gc/decheck.h index b43a91ef3..406b9c207 100644 --- a/runtime/gc/decheck.h +++ b/runtime/gc/decheck.h @@ -53,11 +53,7 @@ bool decheckIsOrdered(GC_thread thread, decheck_tid_t t1); int lcaHeapDepth(decheck_tid_t t1, decheck_tid_t t2); bool disentangleObject(GC_state s, objptr op, uint32_t opDepth); objptr manage_entangled(GC_state s, objptr ptr, decheck_tid_t reader); -void traverseAndCheck( - GC_state s, - __attribute__((unused)) objptr *opp, - objptr op, - __attribute__((unused)) void *rawArgs); +void traverseAndCheck(GC_state s, objptr *opp ,objptr op, void *rawArgs); #endif /* (defined (MLTON_GC_INTERNAL_FUNCS)) */ #endif /* _DECHECK_H_ */ From be9ea8f50a1093f146cbf80dfb23127f4358e4da Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Wed, 19 Oct 2022 11:06:51 -0400 Subject: [PATCH 097/114] Fix `#if ASSERT` typo Be warned! `#ifdef ASSERT` is true in all builds. This was causing the debug version of `traverseAndCheck` to run in all builds, with significant performance degradation in entangled benchmarks. I cleaned up the header and definition a little here, too. --- runtime/gc/decheck.c | 10 ++++------ runtime/gc/decheck.h | 6 +----- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/runtime/gc/decheck.c b/runtime/gc/decheck.c index 1f2fa462e..048125f63 100644 --- a/runtime/gc/decheck.c +++ b/runtime/gc/decheck.c @@ -319,7 +319,7 @@ bool decheckIsOrdered(GC_thread thread, decheck_tid_t t1) { #ifdef DETECT_ENTANGLEMENT -#ifdef ASSERT +#if ASSERT void traverseAndCheck( GC_state s, __attribute__((unused)) objptr *opp, @@ -340,14 +340,12 @@ void traverseAndCheck( } } #else -void traverseAndCheck( - GC_state s, +void inline traverseAndCheck( + __attribute__((unused)) GC_state s, __attribute__((unused)) objptr *opp, - objptr op, + __attribute__((unused)) objptr op, __attribute__((unused)) void *rawArgs) { - (void)s; - (void)op; return; } #endif diff --git a/runtime/gc/decheck.h b/runtime/gc/decheck.h index b43a91ef3..406b9c207 100644 --- a/runtime/gc/decheck.h +++ b/runtime/gc/decheck.h @@ -53,11 +53,7 @@ bool decheckIsOrdered(GC_thread thread, decheck_tid_t t1); int lcaHeapDepth(decheck_tid_t t1, decheck_tid_t t2); bool disentangleObject(GC_state s, objptr op, uint32_t opDepth); objptr manage_entangled(GC_state s, objptr ptr, decheck_tid_t reader); -void traverseAndCheck( - GC_state s, - __attribute__((unused)) objptr *opp, - objptr op, - __attribute__((unused)) void *rawArgs); +void traverseAndCheck(GC_state s, objptr *opp ,objptr op, void *rawArgs); #endif /* (defined (MLTON_GC_INTERNAL_FUNCS)) */ #endif /* _DECHECK_H_ */ From e314d4e5879f529979bd4738bf44ce27e99dc6a4 Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Wed, 19 Oct 2022 16:08:08 -0400 Subject: [PATCH 098/114] fix some compile-time warnings and remove LL_FORCE'd log messages --- runtime/gc/decheck.c | 10 +++++----- runtime/gc/entanglement-suspects.c | 8 ++++++-- runtime/gc/hierarchical-heap-collection.c | 3 ++- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/runtime/gc/decheck.c b/runtime/gc/decheck.c index 048125f63..14556e204 100644 --- a/runtime/gc/decheck.c +++ b/runtime/gc/decheck.c @@ -340,11 +340,11 @@ void traverseAndCheck( } } #else -void inline traverseAndCheck( - __attribute__((unused)) GC_state s, - __attribute__((unused)) objptr *opp, - __attribute__((unused)) objptr op, - __attribute__((unused)) void *rawArgs) +inline void traverseAndCheck( + __attribute__((unused)) GC_state s, + __attribute__((unused)) objptr *opp, + __attribute__((unused)) objptr op, + __attribute__((unused)) void *rawArgs) { return; } diff --git a/runtime/gc/entanglement-suspects.c b/runtime/gc/entanglement-suspects.c index e8d14e3fe..51b2b4147 100644 --- a/runtime/gc/entanglement-suspects.c +++ b/runtime/gc/entanglement-suspects.c @@ -170,8 +170,12 @@ void ES_clear(GC_state s, HM_HierarchicalHeap hh) struct GC_foreachObjptrClosure fObjptrClosure = {.fun = clear_suspect, .env = &(eargs)}; +#if ASSERT int ns = ES_foreachSuspect(s, &oldList, &fObjptrClosure); - assert(numSuspects == ns); + assert(numSuspects == (size_t)ns); +#else + ES_foreachSuspect(s, &oldList, &fObjptrClosure); +#endif s->cumulativeStatistics->numSuspectsCleared += numSuspects; HM_freeChunksInListWithInfo(s, &(oldList), NULL, BLOCK_FOR_SUSPECTS); @@ -363,7 +367,7 @@ void ES_deleteClearSet(GC_state s, ES_clearSet es) { struct timespec stopTime; timespec_now(&stopTime); timespec_sub(&stopTime, &startTime); - LOG(LM_HIERARCHICAL_HEAP, LL_FORCE, + LOG(LM_HIERARCHICAL_HEAP, LL_INFO, "time to process %zu suspects at depth %u: %ld.%09ld", numSuspects, depth, diff --git a/runtime/gc/hierarchical-heap-collection.c b/runtime/gc/hierarchical-heap-collection.c index 90ec724f7..1dbc64433 100644 --- a/runtime/gc/hierarchical-heap-collection.c +++ b/runtime/gc/hierarchical-heap-collection.c @@ -1703,9 +1703,10 @@ void tryUnpinOrKeepPinned(GC_state s, HM_remembered remElem, void *rawArgs) /// COULD BE HEKLPFUL FOR DEGBUGGING TO FORWARD THE OBJECTS ANYWAY void forwardFromObjsOfRemembered(GC_state s, HM_remembered remElem, void *rawArgs) { +#if ASSERT objptr op = remElem->object; - assert(isPinned(op)); +#endif // struct GC_foreachObjptrClosure closure = // {.fun = forwardHHObjptr, .env = rawArgs}; From e8a61ac253030c5eb046ae59a43620118de8afd4 Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Thu, 20 Oct 2022 11:46:45 -0400 Subject: [PATCH 099/114] handle failed clear by retry --- runtime/gc/entanglement-suspects.c | 51 ++++++++++++++++-------------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/runtime/gc/entanglement-suspects.c b/runtime/gc/entanglement-suspects.c index 51b2b4147..30220f9b8 100644 --- a/runtime/gc/entanglement-suspects.c +++ b/runtime/gc/entanglement-suspects.c @@ -181,8 +181,8 @@ void ES_clear(GC_state s, HM_HierarchicalHeap hh) HM_freeChunksInListWithInfo(s, &(oldList), NULL, BLOCK_FOR_SUSPECTS); if (eargs.numFailed > 0) { - LOG(LM_HIERARCHICAL_HEAP, LL_FORCE, - "WARNING: %zu failed suspect clears", + LOG(LM_HIERARCHICAL_HEAP, LL_INFO, + "WARNING: %zu failed suspect clear(s)", eargs.numFailed); } @@ -268,29 +268,32 @@ void clear_suspect_par_safe( size_t lenOutput) { pointer p = objptrToPointer(op, NULL); - GC_header header = getHeader(p); - uint32_t unpinDepth = (header & UNPIN_DEPTH_MASK) >> UNPIN_DEPTH_SHIFT; - - // Note: lenOutput == depth of heap whose suspects we are clearing - if (pinType(header) == PIN_ANY && unpinDepth < lenOutput) { - /* Not ready to be cleared; move it instead */ - HM_storeInChunkListWithPurpose(&(output[unpinDepth]), opp, sizeof(objptr), BLOCK_FOR_SUSPECTS); - return; - } + while (TRUE) { + GC_header header = getHeader(p); + uint32_t unpinDepth = (header & UNPIN_DEPTH_MASK) >> UNPIN_DEPTH_SHIFT; + + // Note: lenOutput == depth of heap whose suspects we are clearing + if (pinType(header) == PIN_ANY && unpinDepth < lenOutput) { + /* Not ready to be cleared; move it instead */ + HM_storeInChunkListWithPurpose(&(output[unpinDepth]), opp, sizeof(objptr), BLOCK_FOR_SUSPECTS); + return; + } - GC_header newHeader = header & ~(SUSPECT_MASK); - if (__sync_bool_compare_and_swap(getHeaderp(p), header, newHeader)) { - /* clearing successful */ - return; - } - else { - // oops, something changed in between - // Is this possible? - // - Seems like it could be, if there is a CGC happening - // simultaneously at the same level (and it's marking/unmarking objects)? - // - If this is the only possibility, then we should be able to just - // do the CAS with newHeader in a loop? - DIE("TODO: is this possible?"); + GC_header newHeader = header & ~(SUSPECT_MASK); + if (__sync_bool_compare_and_swap(getHeaderp(p), header, newHeader)) { + /* clearing successful */ + return; + } + else { + // oops, something changed in between + // Is this possible? + // - Seems like it could be, if there is a CGC happening + // simultaneously at the same level (and it's marking/unmarking objects)? + // - If this is the only possibility, then we should be able to just + // do the CAS with newHeader in a loop? + LOG(LM_HIERARCHICAL_HEAP, LL_INFO, + "WARNING: failed suspect clear; trying again"); + } } } From e6d4a383abd615f0b4ddcdf05bc39d5e0f67c20a Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Mon, 24 Oct 2022 19:10:38 -0400 Subject: [PATCH 100/114] scheduler work and idle timer --- .../schedulers/shh/CumulativePerProcTimer.sml | 73 ++++++++++++++++ basis-library/schedulers/shh/DummyTimer.sml | 19 ++++ basis-library/schedulers/shh/FORK_JOIN.sig | 6 +- basis-library/schedulers/shh/Scheduler.sml | 87 +++++++++---------- basis-library/schedulers/shh/sources.mlb | 2 + 5 files changed, 138 insertions(+), 49 deletions(-) create mode 100644 basis-library/schedulers/shh/CumulativePerProcTimer.sml create mode 100644 basis-library/schedulers/shh/DummyTimer.sml diff --git a/basis-library/schedulers/shh/CumulativePerProcTimer.sml b/basis-library/schedulers/shh/CumulativePerProcTimer.sml new file mode 100644 index 000000000..4f0202981 --- /dev/null +++ b/basis-library/schedulers/shh/CumulativePerProcTimer.sml @@ -0,0 +1,73 @@ +functor CumulativePerProcTimer(val timerName: string): +sig + val start: unit -> unit + val tick: unit -> unit (* Essentially the same as (stop();start()) *) + val stop: unit -> unit + + val isStarted: unit -> bool + + val cumulative: unit -> Time.time +end = +struct + + val numP = MLton.Parallel.numberOfProcessors + fun myId () = MLton.Parallel.processorNumber () + + fun die strfn = + ( print (Int.toString (myId ()) ^ ": " ^ strfn ()) + ; OS.Process.exit OS.Process.failure + ) + + val totals = Array.array (numP, Time.zeroTime) + val starts = Array.array (numP, Time.zeroTime) + val isRunning = Array.array (numP, false) + + fun isStarted () = + Array.sub (isRunning, myId()) + + fun start () = + let + val p = myId() + in + if Array.sub (isRunning, p) then + die (fn _ => "timer \"" ^ timerName ^ "\": start after start") + else + ( Array.update (isRunning, p, true) + ; Array.update (starts, p, Time.now ()) + ) + end + + fun tick () = + let + val p = myId() + val tnow = Time.now () + val delta = Time.- (tnow, Array.sub (starts, p)) + in + if not (Array.sub (isRunning, p)) then + die (fn _ => "timer \"" ^ timerName ^ "\": tick while stopped") + else + ( Array.update (totals, p, Time.+ (Array.sub (totals, p), delta)) + ; Array.update (starts, p, tnow) + ) + end + + fun stop () = + let + val p = myId() + val tnow = Time.now () + val delta = Time.- (tnow, Array.sub (starts, p)) + in + if not (Array.sub (isRunning, p)) then + die (fn _ => "timer \"" ^ timerName ^ "\": stop while stopped") + else + ( Array.update (isRunning, p, false) + ; Array.update (totals, p, Time.+ (Array.sub (totals, p), delta)) + ) + end + + fun cumulative () = + ( if isStarted () then tick () else () + ; Array.foldl Time.+ Time.zeroTime totals + ) + +end \ No newline at end of file diff --git a/basis-library/schedulers/shh/DummyTimer.sml b/basis-library/schedulers/shh/DummyTimer.sml new file mode 100644 index 000000000..ff2f17b7c --- /dev/null +++ b/basis-library/schedulers/shh/DummyTimer.sml @@ -0,0 +1,19 @@ +functor DummyTimer(val timerName: string): +sig + val start: unit -> unit + val tick: unit -> unit (* Essentially the same as (stop();start()) *) + val stop: unit -> unit + + val isStarted: unit -> bool + + val cumulative: unit -> Time.time +end = +struct + + fun isStarted () = false + fun start () = () + fun tick () = () + fun stop () = () + fun cumulative () = Time.zeroTime + +end \ No newline at end of file diff --git a/basis-library/schedulers/shh/FORK_JOIN.sig b/basis-library/schedulers/shh/FORK_JOIN.sig index ddc867801..c733a0fa7 100644 --- a/basis-library/schedulers/shh/FORK_JOIN.sig +++ b/basis-library/schedulers/shh/FORK_JOIN.sig @@ -8,9 +8,7 @@ sig (* synonym for par *) val fork: (unit -> 'a) * (unit -> 'b) -> 'a * 'b - (* other scheduler hooks *) - val communicate: unit -> unit - val getIdleTime: int -> Time.time - + val idleTimeSoFar: unit -> Time.time + val workTimeSoFar: unit -> Time.time val maxForkDepthSoFar: unit -> int end diff --git a/basis-library/schedulers/shh/Scheduler.sml b/basis-library/schedulers/shh/Scheduler.sml index fed4fa5d1..1ecbde1a7 100644 --- a/basis-library/schedulers/shh/Scheduler.sml +++ b/basis-library/schedulers/shh/Scheduler.sml @@ -100,33 +100,11 @@ struct fun dbgmsg' _ = () (* ======================================================================== - * IDLENESS TRACKING + * TIMERS *) - val idleTotals = Array.array (P, Time.zeroTime) - fun getIdleTime p = arraySub (idleTotals, p) - fun updateIdleTime (p, deltaTime) = - arrayUpdate (idleTotals, p, Time.+ (getIdleTime p, deltaTime)) - -(* - val timerGrain = 256 - fun startTimer myId = (myId, 0, Time.now ()) - fun tickTimer (p, count, t) = - if count < timerGrain then (p, count+1, t) else - let - val t' = Time.now () - val diff = Time.- (t', t) - val _ = updateIdleTime (p, diff) - in - (p, 0, t') - end - fun stopTimer (p, _, t) = - (tickTimer (p, timerGrain, t); ()) -*) - - fun startTimer _ = () - fun tickTimer _ = () - fun stopTimer _ = () + structure IdleTimer = CumulativePerProcTimer(val timerName = "idle") + structure WorkTimer = CumulativePerProcTimer(val timerName = "work") (** ======================================================================== * MAXIMUM FORK DEPTHS @@ -218,8 +196,6 @@ struct Queue.tryPopTop queue end - fun communicate () = () - fun push x = let val myId = myWorkerId () @@ -273,9 +249,6 @@ struct Finished x => x | Raised e => raise e - val communicate = communicate - val getIdleTime = getIdleTime - (* Must be called from a "user" thread, which has an associated HH *) fun parfork thread depth (f : unit -> 'a, g : unit -> 'b) = let @@ -599,22 +572,25 @@ struct in if other < myId then other else other+1 end - fun request idleTimer = + fun stealLoop () = let - fun loop tries it = + fun loop tries = if tries = P * 100 then - (OS.Process.sleep (Time.fromNanoseconds (LargeInt.fromInt (P * 100))); - loop 0 (tickTimer idleTimer)) + ( IdleTimer.tick () + ; OS.Process.sleep (Time.fromNanoseconds (LargeInt.fromInt (P * 100))) + ; loop 0 ) else let val friend = randomOtherId () in case trySteal friend of - NONE => loop (tries+1) (tickTimer idleTimer) - | SOME (task, depth) => (task, depth, tickTimer idleTimer) + NONE => loop (tries+1) + | SOME (task, depth) => (task, depth) end + + val result = loop 0 in - loop 0 idleTimer + result end (* ------------------------------------------------------------------- *) @@ -625,33 +601,44 @@ struct | SOME (thread, hh) => ( (*dbgmsg' (fn _ => "back in sched; found GC task") ;*) setGCTask myId NONE + ; IdleTimer.stop () + ; WorkTimer.start () ; HH.collectThreadRoot (thread, !hh) ; if popDiscard () then - ( (*dbgmsg' (fn _ => "resume task thread") - ;*) threadSwitch thread + ( threadSwitch thread + ; WorkTimer.stop () + ; IdleTimer.start () ; afterReturnToSched () ) else - () + ( WorkTimer.stop () + ; IdleTimer.start () + ) ) fun acquireWork () : unit = let - val idleTimer = startTimer myId - val (task, depth, idleTimer') = request idleTimer - val _ = stopTimer idleTimer' + val (task, depth) = stealLoop () in case task of GCTask (thread, hh) => - ( HH.collectThreadRoot (thread, !hh) + ( IdleTimer.stop () + ; WorkTimer.start () + ; HH.collectThreadRoot (thread, !hh) + ; WorkTimer.stop () + ; IdleTimer.start () ; acquireWork () ) | Continuation (thread, depth) => ( (*dbgmsg' (fn _ => "stole continuation (" ^ Int.toString depth ^ ")") ; dbgmsg' (fn _ => "resume task thread") ;*) Queue.setDepth myQueue depth + ; IdleTimer.stop () + ; WorkTimer.start () ; threadSwitch thread + ; WorkTimer.stop () + ; IdleTimer.start () ; afterReturnToSched () ; Queue.setDepth myQueue 1 ; acquireWork () @@ -667,7 +654,11 @@ struct HH.setDepth (taskThread, depth+1); setTaskBox myId t; (* dbgmsg' (fn _ => "switch to new task thread"); *) + IdleTimer.stop (); + WorkTimer.start (); threadSwitch taskThread; + WorkTimer.stop (); + IdleTimer.start (); afterReturnToSched (); Queue.setDepth myQueue 1; acquireWork () @@ -686,6 +677,7 @@ struct let val (_, acquireWork) = setupSchedLoop () in + IdleTimer.start (); acquireWork (); die (fn _ => "scheduler bug: scheduler exited acquire-work loop") end @@ -722,7 +714,10 @@ struct let val (afterReturnToSched, acquireWork) = setupSchedLoop () in + WorkTimer.start (); threadSwitch originalThread; + WorkTimer.stop (); + IdleTimer.start (); afterReturnToSched (); setQueueDepth (myWorkerId ()) 1; acquireWork (); @@ -761,5 +756,7 @@ struct ArrayExtra.Raw.unsafeToArray a end + val idleTimeSoFar = Scheduler.IdleTimer.cumulative + val workTimeSoFar = Scheduler.WorkTimer.cumulative val maxForkDepthSoFar = Scheduler.maxForkDepthSoFar -end +end \ No newline at end of file diff --git a/basis-library/schedulers/shh/sources.mlb b/basis-library/schedulers/shh/sources.mlb index 100700dcd..8fb08583e 100644 --- a/basis-library/schedulers/shh/sources.mlb +++ b/basis-library/schedulers/shh/sources.mlb @@ -22,6 +22,8 @@ local FORK_JOIN.sig SimpleRandom.sml queue/DequeABP.sml + DummyTimer.sml + CumulativePerProcTimer.sml Scheduler.sml in structure ForkJoin From c86bf538cdbb938dcec48a2ac4794c812cda1878 Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Tue, 25 Oct 2022 11:09:19 -0400 Subject: [PATCH 101/114] gc stats update --- basis-library/mpl/gc.sig | 21 +++----- basis-library/mpl/gc.sml | 69 +++++++------------------- basis-library/primitive/prim-mlton.sml | 9 ++-- runtime/gc/concurrent-collection.c | 21 +++----- runtime/gc/gc_state.c | 25 +++------- runtime/gc/gc_state.h | 9 ++-- runtime/gc/statistics.c | 12 ++--- runtime/gc/statistics.h | 9 ++-- 8 files changed, 50 insertions(+), 125 deletions(-) diff --git a/basis-library/mpl/gc.sig b/basis-library/mpl/gc.sig index 01418a4cd..c00975008 100644 --- a/basis-library/mpl/gc.sig +++ b/basis-library/mpl/gc.sig @@ -53,21 +53,12 @@ sig val promoTime: unit -> Time.time val promoTimeOfProc: int -> Time.time - val rootBytesReclaimed: unit -> IntInf.int - val rootBytesReclaimedOfProc: int -> IntInf.int + val numCCs: unit -> IntInf.t + val numCCsOfProc: int -> IntInf.t - val internalBytesReclaimed: unit -> IntInf.int - val internalBytesReclaimedOfProc: int -> IntInf.int + val ccBytesReclaimed: unit -> IntInf.int + val ccBytesReclaimedOfProc: int -> IntInf.int - val numRootCCs: unit -> IntInf.int - val numRootCCsOfProc: int -> IntInf.int - - val numInternalCCs: unit -> IntInf.int - val numInternalCCsOfProc: int -> IntInf.int - - val rootCCTime: unit -> Time.time - val rootCCTimeOfProc: int -> Time.time - - val internalCCTime: unit -> Time.time - val internalCCTimeOfProc: int -> Time.time + val ccTime: unit -> Time.time + val ccTimeOfProc: int -> Time.time end diff --git a/basis-library/mpl/gc.sml b/basis-library/mpl/gc.sml index 51d7b61ed..94c11579f 100644 --- a/basis-library/mpl/gc.sml +++ b/basis-library/mpl/gc.sml @@ -24,18 +24,12 @@ struct GC.getCumulativeStatisticsBytesAllocatedOfProc (gcState (), Word32.fromInt p) fun getCumulativeStatisticsLocalBytesReclaimedOfProc p = GC.getCumulativeStatisticsLocalBytesReclaimedOfProc (gcState (), Word32.fromInt p) - fun getNumRootCCsOfProc p = - GC.getNumRootCCsOfProc (gcState (), Word32.fromInt p) - fun getNumInternalCCsOfProc p = - GC.getNumInternalCCsOfProc (gcState (), Word32.fromInt p) - fun getRootCCMillisecondsOfProc p = - GC.getRootCCMillisecondsOfProc (gcState (), Word32.fromInt p) - fun getInternalCCMillisecondsOfProc p = - GC.getInternalCCMillisecondsOfProc (gcState (), Word32.fromInt p) - fun getRootCCBytesReclaimedOfProc p = - GC.getRootCCBytesReclaimedOfProc (gcState (), Word32.fromInt p) - fun getInternalCCBytesReclaimedOfProc p = - GC.getInternalCCBytesReclaimedOfProc (gcState (), Word32.fromInt p) + fun getNumCCsOfProc p = + GC.getNumCCsOfProc (gcState (), Word32.fromInt p) + fun getCCMillisecondsOfProc p = + GC.getCCMillisecondsOfProc (gcState (), Word32.fromInt p) + fun getCCBytesReclaimedOfProc p = + GC.getCCBytesReclaimedOfProc (gcState (), Word32.fromInt p) fun numberDisentanglementChecks () = C_UIntmax.toLargeInt (GC.numberDisentanglementChecks (gcState ())) @@ -95,34 +89,19 @@ struct ; millisecondsToTime (getPromoMillisecondsOfProc p) ) - fun numRootCCsOfProc p = + fun numCCsOfProc p = ( checkProcNum p - ; C_UIntmax.toLargeInt (getNumRootCCsOfProc p) + ; C_UIntmax.toLargeInt (getNumCCsOfProc p) ) - fun numInternalCCsOfProc p = + fun ccTimeOfProc p = ( checkProcNum p - ; C_UIntmax.toLargeInt (getNumInternalCCsOfProc p) + ; millisecondsToTime (getCCMillisecondsOfProc p) ) - fun rootCCTimeOfProc p = + fun ccBytesReclaimedOfProc p = ( checkProcNum p - ; millisecondsToTime (getRootCCMillisecondsOfProc p) - ) - - fun internalCCTimeOfProc p = - ( checkProcNum p - ; millisecondsToTime (getInternalCCMillisecondsOfProc p) - ) - - fun rootBytesReclaimedOfProc p = - ( checkProcNum p - ; C_UIntmax.toLargeInt (getRootCCBytesReclaimedOfProc p) - ) - - fun internalBytesReclaimedOfProc p = - ( checkProcNum p - ; C_UIntmax.toLargeInt (getInternalCCBytesReclaimedOfProc p) + ; C_UIntmax.toLargeInt (getCCBytesReclaimedOfProc p) ) fun sumAllProcs (f: 'a * 'a -> 'a) (perProc: int -> 'a) = @@ -151,28 +130,16 @@ struct fun promoTime () = millisecondsToTime (sumAllProcs C_UIntmax.+ getPromoMillisecondsOfProc) - fun numRootCCs () = - C_UIntmax.toLargeInt - (sumAllProcs C_UIntmax.+ getNumRootCCsOfProc) - - fun numInternalCCs () = + fun numCCs () = C_UIntmax.toLargeInt - (sumAllProcs C_UIntmax.+ getNumInternalCCsOfProc) + (sumAllProcs C_UIntmax.+ getNumCCsOfProc) - fun rootCCTime () = + fun ccTime () = millisecondsToTime - (sumAllProcs C_UIntmax.+ getRootCCMillisecondsOfProc) - - fun internalCCTime () = - millisecondsToTime - (sumAllProcs C_UIntmax.+ getInternalCCMillisecondsOfProc) - - fun rootBytesReclaimed () = - C_UIntmax.toLargeInt - (sumAllProcs C_UIntmax.max getRootCCBytesReclaimedOfProc) + (sumAllProcs C_UIntmax.+ getCCMillisecondsOfProc) - fun internalBytesReclaimed () = + fun ccBytesReclaimed () = C_UIntmax.toLargeInt - (sumAllProcs C_UIntmax.+ getInternalCCBytesReclaimedOfProc) + (sumAllProcs C_UIntmax.max getCCBytesReclaimedOfProc) end diff --git a/basis-library/primitive/prim-mlton.sml b/basis-library/primitive/prim-mlton.sml index 2273b4b41..80197a4d1 100644 --- a/basis-library/primitive/prim-mlton.sml +++ b/basis-library/primitive/prim-mlton.sml @@ -159,12 +159,9 @@ structure GC = val getCumulativeStatisticsLocalBytesReclaimedOfProc = _import "GC_getCumulativeStatisticsLocalBytesReclaimedOfProc" runtime private: GCState.t * Word32.word -> C_UIntmax.t; - val getNumRootCCsOfProc = _import "GC_getNumRootCCsOfProc" runtime private: GCState.t * Word32.word -> C_UIntmax.t; - val getNumInternalCCsOfProc = _import "GC_getNumInternalCCsOfProc" runtime private: GCState.t * Word32.word -> C_UIntmax.t; - val getRootCCMillisecondsOfProc = _import "GC_getRootCCMillisecondsOfProc" runtime private: GCState.t * Word32.word -> C_UIntmax.t; - val getInternalCCMillisecondsOfProc = _import "GC_getInternalCCMillisecondsOfProc" runtime private: GCState.t * Word32.word -> C_UIntmax.t; - val getRootCCBytesReclaimedOfProc = _import "GC_getRootCCBytesReclaimedOfProc" runtime private: GCState.t * Word32.word -> C_UIntmax.t; - val getInternalCCBytesReclaimedOfProc = _import "GC_getInternalCCBytesReclaimedOfProc" runtime private: GCState.t * Word32.word -> C_UIntmax.t; + val getNumCCsOfProc = _import "GC_getNumCCsOfProc" runtime private: GCState.t * Word32.word -> C_UIntmax.t; + val getCCMillisecondsOfProc = _import "GC_getCCMillisecondsOfProc" runtime private: GCState.t * Word32.word -> C_UIntmax.t; + val getCCBytesReclaimedOfProc = _import "GC_getCCBytesReclaimedOfProc" runtime private: GCState.t * Word32.word -> C_UIntmax.t; val numberDisentanglementChecks = _import "GC_numDisentanglementChecks" runtime private: GCState.t -> C_UIntmax.t; diff --git a/runtime/gc/concurrent-collection.c b/runtime/gc/concurrent-collection.c index fff52c0d6..ef1ca466d 100644 --- a/runtime/gc/concurrent-collection.c +++ b/runtime/gc/concurrent-collection.c @@ -935,9 +935,6 @@ size_t CC_collectWithRoots( // chunks in which all objects are garbage. Before exiting, chunks in // origList are added to the free list. - bool isConcurrent = (HM_HH_getDepth(targetHH) == 1); - // assert(isConcurrent); - uint32_t initialDepth = HM_HH_getDepth(targetHH); struct HM_chunkList _repList; @@ -1166,6 +1163,8 @@ size_t CC_collectWithRoots( assert(!(stackChunk->mightContainMultipleObjects)); assert(HM_HH_getChunkList(HM_getLevelHead(stackChunk)) == origList); assert(isChunkInList(stackChunk, origList)); + assert(bytesSaved >= HM_getChunkUsedSize(stackChunk)); + bytesSaved -= HM_getChunkUsedSize(stackChunk); HM_unlinkChunk(origList, stackChunk); info.freedType = CC_FREED_STACK_CHUNK; HM_freeChunkWithInfo(s, stackChunk, &infoc, BLOCK_FOR_HEAP_CHUNK); @@ -1189,19 +1188,13 @@ size_t CC_collectWithRoots( timespec_now(&stopTime); timespec_sub(&stopTime, &startTime); - - if (isConcurrent) { - timespec_add(&(s->cumulativeStatistics->timeRootCC), &stopTime); - s->cumulativeStatistics->numRootCCs++; - s->cumulativeStatistics->bytesReclaimedByRootCC = max(s->cumulativeStatistics->bytesReclaimedByRootCC, bytesSaved); - } else { - timespec_add(&(s->cumulativeStatistics->timeInternalCC), &stopTime); - s->cumulativeStatistics->numInternalCCs++; - s->cumulativeStatistics->bytesReclaimedByInternalCC += bytesScanned-bytesSaved; - } + timespec_add(&(s->cumulativeStatistics->timeCC), &stopTime); + s->cumulativeStatistics->numCCs++; + assert(bytesScanned >= bytesSaved); + uintmax_t bytesReclaimed = bytesScanned-bytesSaved; + s->cumulativeStatistics->bytesReclaimedByCC += bytesReclaimed; return lists.bytesSaved; - } #endif diff --git a/runtime/gc/gc_state.c b/runtime/gc/gc_state.c index 95af97473..f732433ae 100644 --- a/runtime/gc/gc_state.c +++ b/runtime/gc/gc_state.c @@ -165,30 +165,17 @@ uintmax_t GC_getCumulativeStatisticsNumLocalGCsOfProc(GC_state s, uint32_t proc) return s->procStates[proc].cumulativeStatistics->numHHLocalGCs; } -uintmax_t GC_getNumRootCCsOfProc(GC_state s, uint32_t proc) { - return s->procStates[proc].cumulativeStatistics->numRootCCs; +uintmax_t GC_getNumCCsOfProc(GC_state s, uint32_t proc) { + return s->procStates[proc].cumulativeStatistics->numCCs; } -uintmax_t GC_getNumInternalCCsOfProc(GC_state s, uint32_t proc) { - return s->procStates[proc].cumulativeStatistics->numInternalCCs; -} - -uintmax_t GC_getRootCCMillisecondsOfProc(GC_state s, uint32_t proc) { - struct timespec *t = &(s->procStates[proc].cumulativeStatistics->timeRootCC); +uintmax_t GC_getCCMillisecondsOfProc(GC_state s, uint32_t proc) { + struct timespec *t = &(s->procStates[proc].cumulativeStatistics->timeCC); return (uintmax_t)t->tv_sec * 1000 + (uintmax_t)t->tv_nsec / 1000000; } -uintmax_t GC_getInternalCCMillisecondsOfProc(GC_state s, uint32_t proc) { - struct timespec *t = &(s->procStates[proc].cumulativeStatistics->timeInternalCC); - return (uintmax_t)t->tv_sec * 1000 + (uintmax_t)t->tv_nsec / 1000000; -} - -uintmax_t GC_getRootCCBytesReclaimedOfProc(GC_state s, uint32_t proc) { - return s->procStates[proc].cumulativeStatistics->bytesReclaimedByRootCC; -} - -uintmax_t GC_getInternalCCBytesReclaimedOfProc(GC_state s, uint32_t proc) { - return s->procStates[proc].cumulativeStatistics->bytesReclaimedByInternalCC; +uintmax_t GC_getCCBytesReclaimedOfProc(GC_state s, uint32_t proc) { + return s->procStates[proc].cumulativeStatistics->bytesReclaimedByCC; } uintmax_t GC_getLocalGCMillisecondsOfProc(GC_state s, uint32_t proc) { diff --git a/runtime/gc/gc_state.h b/runtime/gc/gc_state.h index 8d1ffb152..df31890ce 100644 --- a/runtime/gc/gc_state.h +++ b/runtime/gc/gc_state.h @@ -133,12 +133,9 @@ PRIVATE uintmax_t GC_getPromoMillisecondsOfProc(GC_state s, uint32_t proc); PRIVATE uintmax_t GC_getCumulativeStatisticsNumLocalGCsOfProc(GC_state s, uint32_t proc); -PRIVATE uintmax_t GC_getNumRootCCsOfProc(GC_state s, uint32_t proc); -PRIVATE uintmax_t GC_getNumInternalCCsOfProc(GC_state s, uint32_t proc); -PRIVATE uintmax_t GC_getRootCCMillisecondsOfProc(GC_state s, uint32_t proc); -PRIVATE uintmax_t GC_getInternalCCMillisecondsOfProc(GC_state s, uint32_t proc); -PRIVATE uintmax_t GC_getRootCCBytesReclaimedOfProc(GC_state s, uint32_t proc); -PRIVATE uintmax_t GC_getInternalCCBytesReclaimedOfProc(GC_state s, uint32_t proc); +PRIVATE uintmax_t GC_getNumCCsOfProc(GC_state s, uint32_t proc); +PRIVATE uintmax_t GC_getCCMillisecondsOfProc(GC_state s, uint32_t proc); +PRIVATE uintmax_t GC_getCCBytesReclaimedOfProc(GC_state s, uint32_t proc); PRIVATE uintmax_t GC_numDisentanglementChecks(GC_state s); PRIVATE float GC_approxRaceFactor(GC_state s); PRIVATE uintmax_t GC_numChecksSkipped(GC_state s); diff --git a/runtime/gc/statistics.c b/runtime/gc/statistics.c index 4317611e6..58e70527f 100644 --- a/runtime/gc/statistics.c +++ b/runtime/gc/statistics.c @@ -47,8 +47,7 @@ struct GC_cumulativeStatistics *newCumulativeStatistics(void) { cumulativeStatistics->bytesScannedMinor = 0; cumulativeStatistics->bytesHHLocaled = 0; cumulativeStatistics->bytesReclaimedByLocal = 0; - cumulativeStatistics->bytesReclaimedByRootCC = 0; - cumulativeStatistics->bytesReclaimedByInternalCC = 0; + cumulativeStatistics->bytesReclaimedByCC = 0; cumulativeStatistics->maxBytesLive = 0; cumulativeStatistics->maxBytesLiveSinceReset = 0; cumulativeStatistics->maxHeapSize = 0; @@ -67,8 +66,7 @@ struct GC_cumulativeStatistics *newCumulativeStatistics(void) { cumulativeStatistics->numMarkCompactGCs = 0; cumulativeStatistics->numMinorGCs = 0; cumulativeStatistics->numHHLocalGCs = 0; - cumulativeStatistics->numRootCCs = 0; - cumulativeStatistics->numInternalCCs = 0; + cumulativeStatistics->numCCs = 0; cumulativeStatistics->numDisentanglementChecks = 0; cumulativeStatistics->numChecksSkipped = 0; cumulativeStatistics->numSuspectsMarked = 0; @@ -80,10 +78,8 @@ struct GC_cumulativeStatistics *newCumulativeStatistics(void) { cumulativeStatistics->timeLocalGC.tv_nsec = 0; cumulativeStatistics->timeLocalPromo.tv_sec = 0; cumulativeStatistics->timeLocalPromo.tv_nsec = 0; - cumulativeStatistics->timeRootCC.tv_sec = 0; - cumulativeStatistics->timeRootCC.tv_nsec = 0; - cumulativeStatistics->timeInternalCC.tv_sec = 0; - cumulativeStatistics->timeInternalCC.tv_nsec = 0; + cumulativeStatistics->timeCC.tv_sec = 0; + cumulativeStatistics->timeCC.tv_nsec = 0; rusageZero (&cumulativeStatistics->ru_gc); rusageZero (&cumulativeStatistics->ru_gcCopying); diff --git a/runtime/gc/statistics.h b/runtime/gc/statistics.h index d1abc974a..1d49fa2db 100644 --- a/runtime/gc/statistics.h +++ b/runtime/gc/statistics.h @@ -36,8 +36,7 @@ struct GC_cumulativeStatistics { uintmax_t bytesScannedMinor; uintmax_t bytesHHLocaled; uintmax_t bytesReclaimedByLocal; - uintmax_t bytesReclaimedByRootCC; - uintmax_t bytesReclaimedByInternalCC; + uintmax_t bytesReclaimedByCC; size_t maxBytesLive; size_t maxBytesLiveSinceReset; @@ -63,8 +62,7 @@ struct GC_cumulativeStatistics { uintmax_t numMarkCompactGCs; uintmax_t numMinorGCs; uintmax_t numHHLocalGCs; - uintmax_t numRootCCs; - uintmax_t numInternalCCs; + uintmax_t numCCs; uintmax_t numDisentanglementChecks; uintmax_t numChecksSkipped; uintmax_t numSuspectsMarked; @@ -75,8 +73,7 @@ struct GC_cumulativeStatistics { struct timespec timeLocalGC; struct timespec timeLocalPromo; - struct timespec timeRootCC; - struct timespec timeInternalCC; + struct timespec timeCC; struct rusage ru_gc; /* total resource usage in gc. */ struct rusage ru_gcCopying; /* resource usage in major copying gcs. */ From e74a4a71e0ef46d90fd0c014dd60123b047fcfff Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Tue, 25 Oct 2022 11:41:20 -0400 Subject: [PATCH 102/114] add deprecated errors for old gc stats functions --- basis-library/mpl/gc.sig | 14 ++++++++++++++ basis-library/mpl/gc.sml | 23 +++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/basis-library/mpl/gc.sig b/basis-library/mpl/gc.sig index c00975008..db6980ec7 100644 --- a/basis-library/mpl/gc.sig +++ b/basis-library/mpl/gc.sig @@ -61,4 +61,18 @@ sig val ccTime: unit -> Time.time val ccTimeOfProc: int -> Time.time + + (* DEPRECATED *) + val rootBytesReclaimed: unit -> IntInf.int + val rootBytesReclaimedOfProc: int -> IntInf.int + val internalBytesReclaimed: unit -> IntInf.int + val internalBytesReclaimedOfProc: int -> IntInf.int + val numRootCCs: unit -> IntInf.int + val numRootCCsOfProc: int -> IntInf.int + val numInternalCCs: unit -> IntInf.int + val numInternalCCsOfProc: int -> IntInf.int + val rootCCTime: unit -> Time.time + val rootCCTimeOfProc: int -> Time.time + val internalCCTime: unit -> Time.time + val internalCCTimeOfProc: int -> Time.time end diff --git a/basis-library/mpl/gc.sml b/basis-library/mpl/gc.sml index 94c11579f..5924e90e9 100644 --- a/basis-library/mpl/gc.sml +++ b/basis-library/mpl/gc.sml @@ -142,4 +142,27 @@ struct C_UIntmax.toLargeInt (sumAllProcs C_UIntmax.max getCCBytesReclaimedOfProc) + + (* ====================================================================== + * DEPRECATED + *) + + exception Deprecated of string + + fun d name (_: 'a) : 'b = + raise Deprecated ("MPL.GC." ^ name) + + val rootBytesReclaimed = d "rootBytesReclaimed" + val rootBytesReclaimedOfProc = d "rootBytesReclaimedOfProc" + val internalBytesReclaimed = d "internalBytesReclaimed" + val internalBytesReclaimedOfProc = d "internalBytesReclaimedOfProc" + val numRootCCs = d "numRootCCs" + val numRootCCsOfProc = d "numRootCCsOfProc" + val numInternalCCs = d "numInternalCCs" + val numInternalCCsOfProc = d "numInternalCCsOfProc" + val rootCCTime = d "rootCCTime" + val rootCCTimeOfProc = d "rootCCTimeOfProc" + val internalCCTime = d "internalCCTime" + val internalCCTimeOfProc = d "internalCCTimeOfProc" + end From 515b2a02aee7d51340a3170d02577205e645d19c Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Thu, 27 Oct 2022 12:58:11 -0400 Subject: [PATCH 103/114] add number-entanglements statistic (number of times entanglement is detected) --- basis-library/mpl/gc.sig | 5 +++-- basis-library/mpl/gc.sml | 3 +++ basis-library/primitive/prim-mlton.sml | 1 + runtime/gc/assign.c | 5 +++++ runtime/gc/gc_state.c | 8 ++++++++ runtime/gc/gc_state.h | 1 + runtime/gc/statistics.c | 1 + runtime/gc/statistics.h | 3 ++- 8 files changed, 24 insertions(+), 3 deletions(-) diff --git a/basis-library/mpl/gc.sig b/basis-library/mpl/gc.sig index db6980ec7..9d04632d9 100644 --- a/basis-library/mpl/gc.sig +++ b/basis-library/mpl/gc.sig @@ -17,8 +17,9 @@ sig *) val numberDisentanglementChecks: unit -> IntInf.int - (* How many times entanglement has been detected at a read barrier. - *) + (* How many times the entanglement is detected *) + val numberEntanglements: unit -> IntInf.int + val approxRaceFactor: unit -> Real32.real val numberSuspectsMarked: unit -> IntInf.int diff --git a/basis-library/mpl/gc.sml b/basis-library/mpl/gc.sml index 5924e90e9..51cc56a81 100644 --- a/basis-library/mpl/gc.sml +++ b/basis-library/mpl/gc.sml @@ -34,6 +34,9 @@ struct fun numberDisentanglementChecks () = C_UIntmax.toLargeInt (GC.numberDisentanglementChecks (gcState ())) + fun numberEntanglements () = + C_UIntmax.toLargeInt (GC.numberEntanglements (gcState ())) + fun approxRaceFactor () = (GC.approxRaceFactor (gcState ())) diff --git a/basis-library/primitive/prim-mlton.sml b/basis-library/primitive/prim-mlton.sml index 80197a4d1..5528c4751 100644 --- a/basis-library/primitive/prim-mlton.sml +++ b/basis-library/primitive/prim-mlton.sml @@ -164,6 +164,7 @@ structure GC = val getCCBytesReclaimedOfProc = _import "GC_getCCBytesReclaimedOfProc" runtime private: GCState.t * Word32.word -> C_UIntmax.t; val numberDisentanglementChecks = _import "GC_numDisentanglementChecks" runtime private: GCState.t -> C_UIntmax.t; + val numberEntanglements = _import "GC_numEntanglements" runtime private: GCState.t -> C_UIntmax.t; val approxRaceFactor = _import "GC_approxRaceFactor" runtime private: GCState.t -> Real32.real; diff --git a/runtime/gc/assign.c b/runtime/gc/assign.c index 4a80545d3..1d5560a59 100644 --- a/runtime/gc/assign.c +++ b/runtime/gc/assign.c @@ -25,6 +25,7 @@ objptr Assignable_decheckObjptr(objptr dst, objptr src) if (!decheck(s, src)) { assert (isMutable(s, dstp)); + s->cumulativeStatistics->numEntanglements++; new_src = manage_entangled(s, src, getThreadCurrent(s)->decheckState); assert (isPinned(new_src)); } @@ -62,6 +63,7 @@ objptr Assignable_readBarrier( // assert(!hasFwdPtr(ptr)); // assert(pinType(getHeader(ptr)) == PIN_ANY); // } + s->cumulativeStatistics->numEntanglements++; ptr = manage_entangled(s, ptr, getThreadCurrent(s)->decheckState); } // HM_EBR_enterQuiescentState(s); @@ -216,6 +218,9 @@ void Assignable_writeBarrier( } else if(dstHH->depth != 0) { // traverseAndCheck(s, &dst, dst, NULL); + + // SAM_NOTE: TODO: do we count this one?? + s->cumulativeStatistics->numEntanglements++; manage_entangled (s, src, HM_getChunkOf(dstp)->decheckState); } diff --git a/runtime/gc/gc_state.c b/runtime/gc/gc_state.c index f732433ae..28b2051ae 100644 --- a/runtime/gc/gc_state.c +++ b/runtime/gc/gc_state.c @@ -196,6 +196,14 @@ uintmax_t GC_numDisentanglementChecks(GC_state s) { return count; } +uintmax_t GC_numEntanglements(GC_state s) { + uintmax_t count = 0; + for (uint32_t p = 0; p < s->numberOfProcs; p++) { + count += s->procStates[p].cumulativeStatistics->numEntanglements; + } + return count; +} + uintmax_t GC_numChecksSkipped(GC_state s) { uintmax_t count = 0; diff --git a/runtime/gc/gc_state.h b/runtime/gc/gc_state.h index df31890ce..69a9decb8 100644 --- a/runtime/gc/gc_state.h +++ b/runtime/gc/gc_state.h @@ -137,6 +137,7 @@ PRIVATE uintmax_t GC_getNumCCsOfProc(GC_state s, uint32_t proc); PRIVATE uintmax_t GC_getCCMillisecondsOfProc(GC_state s, uint32_t proc); PRIVATE uintmax_t GC_getCCBytesReclaimedOfProc(GC_state s, uint32_t proc); PRIVATE uintmax_t GC_numDisentanglementChecks(GC_state s); +PRIVATE uintmax_t GC_numEntanglements(GC_state s); PRIVATE float GC_approxRaceFactor(GC_state s); PRIVATE uintmax_t GC_numChecksSkipped(GC_state s); PRIVATE uintmax_t GC_numSuspectsMarked(GC_state s); diff --git a/runtime/gc/statistics.c b/runtime/gc/statistics.c index 58e70527f..fb26c2d19 100644 --- a/runtime/gc/statistics.c +++ b/runtime/gc/statistics.c @@ -68,6 +68,7 @@ struct GC_cumulativeStatistics *newCumulativeStatistics(void) { cumulativeStatistics->numHHLocalGCs = 0; cumulativeStatistics->numCCs = 0; cumulativeStatistics->numDisentanglementChecks = 0; + cumulativeStatistics->numEntanglements = 0; cumulativeStatistics->numChecksSkipped = 0; cumulativeStatistics->numSuspectsMarked = 0; cumulativeStatistics->numSuspectsCleared = 0; diff --git a/runtime/gc/statistics.h b/runtime/gc/statistics.h index 1d49fa2db..34e43df1a 100644 --- a/runtime/gc/statistics.h +++ b/runtime/gc/statistics.h @@ -63,7 +63,8 @@ struct GC_cumulativeStatistics { uintmax_t numMinorGCs; uintmax_t numHHLocalGCs; uintmax_t numCCs; - uintmax_t numDisentanglementChecks; + uintmax_t numDisentanglementChecks; // count full read barriers + uintmax_t numEntanglements; // count instances entanglement is detected uintmax_t numChecksSkipped; uintmax_t numSuspectsMarked; uintmax_t numSuspectsCleared; From 385eb8e3b1830c8295ec904ee3d6db8b61d0a2e5 Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Thu, 27 Oct 2022 17:26:59 -0400 Subject: [PATCH 104/114] rough entanglement watermark upper bound --- basis-library/mlton/thread.sig | 2 ++ basis-library/mlton/thread.sml | 3 ++ basis-library/mpl/gc.sig | 1 + basis-library/mpl/gc.sml | 3 ++ basis-library/primitive/prim-mlton.sml | 5 ++++ basis-library/schedulers/shh/Scheduler.sml | 5 ++++ runtime/gc/gc_state.c | 35 ++++++++++++++++++++++ runtime/gc/gc_state.h | 2 ++ runtime/gc/pin.c | 7 ++++- runtime/gc/statistics.c | 2 ++ runtime/gc/statistics.h | 2 ++ 11 files changed, 66 insertions(+), 1 deletion(-) diff --git a/basis-library/mlton/thread.sig b/basis-library/mlton/thread.sig index 2a3fae6da..9b25da5b9 100644 --- a/basis-library/mlton/thread.sig +++ b/basis-library/mlton/thread.sig @@ -79,6 +79,8 @@ signature MLTON_THREAD = val commitFinishedClearSetGrain: thread * finished_clear_set_grain -> unit val deleteClearSet: clear_set -> unit + val updateBytesPinnedEntangledWatermark: unit -> unit + (* "put a new thread in the hierarchy *) val moveNewThreadToDepth : thread * int -> unit diff --git a/basis-library/mlton/thread.sml b/basis-library/mlton/thread.sml index 8ef2d1c95..1e7bf8c5c 100644 --- a/basis-library/mlton/thread.sml +++ b/basis-library/mlton/thread.sml @@ -114,6 +114,9 @@ struct fun deleteClearSet c = Prim.deleteClearSet (gcState (), c) + + fun updateBytesPinnedEntangledWatermark () = + Prim.updateBytesPinnedEntangledWatermark (gcState ()) end structure Disentanglement = diff --git a/basis-library/mpl/gc.sig b/basis-library/mpl/gc.sig index 9d04632d9..d1c73f7ba 100644 --- a/basis-library/mpl/gc.sig +++ b/basis-library/mpl/gc.sig @@ -25,6 +25,7 @@ sig val numberSuspectsMarked: unit -> IntInf.int val numberSuspectsCleared: unit -> IntInf.int val bytesPinnedEntangled: unit -> IntInf.int + val bytesPinnedEntangledWatermark: unit -> IntInf.int val getControlMaxCCDepth: unit -> int diff --git a/basis-library/mpl/gc.sml b/basis-library/mpl/gc.sml index 51cc56a81..e902c771d 100644 --- a/basis-library/mpl/gc.sml +++ b/basis-library/mpl/gc.sml @@ -51,6 +51,9 @@ struct fun bytesPinnedEntangled () = C_UIntmax.toLargeInt (GC.bytesPinnedEntangled (gcState ())) + + fun bytesPinnedEntangledWatermark () = + C_UIntmax.toLargeInt (GC.bytesPinnedEntangledWatermark (gcState ())) end exception NotYetImplemented of string diff --git a/basis-library/primitive/prim-mlton.sml b/basis-library/primitive/prim-mlton.sml index 5528c4751..3e95b569a 100644 --- a/basis-library/primitive/prim-mlton.sml +++ b/basis-library/primitive/prim-mlton.sml @@ -173,6 +173,7 @@ structure GC = val numberSuspectsCleared = _import "GC_numSuspectsCleared" runtime private: GCState.t -> C_UIntmax.t; val bytesPinnedEntangled = _import "GC_bytesPinnedEntangled" runtime private: GCState.t -> C_UIntmax.t; + val bytesPinnedEntangledWatermark = _import "GC_bytesPinnedEntangledWatermark" runtime private: GCState.t -> C_UIntmax.t; end structure HM = @@ -383,6 +384,10 @@ structure Thread = val deleteClearSet = _import "GC_HH_deleteClearSet" runtime private: GCState.t * Pointer.t -> unit; + val updateBytesPinnedEntangledWatermark = + _import "GC_updateBytesPinnedEntangledWatermark" runtime private: + GCState.t -> unit; + val decheckFork = _import "GC_HH_decheckFork" runtime private: GCState.t * Word64.word ref * Word64.word ref -> unit; diff --git a/basis-library/schedulers/shh/Scheduler.sml b/basis-library/schedulers/shh/Scheduler.sml index 1ecbde1a7..65070b41d 100644 --- a/basis-library/schedulers/shh/Scheduler.sml +++ b/basis-library/schedulers/shh/Scheduler.sml @@ -313,6 +313,7 @@ struct ; HH.setDepth (thread, depth) ; DE.decheckJoin (tidLeft, tidRight) ; maybeParClearSuspectsAtDepth (thread, depth) + ; if depth <> 1 then () else HH.updateBytesPinnedEntangledWatermark () (* ; dbgmsg' (fn _ => "join fast at depth " ^ Int.toString depth) *) (* ; HH.forceNewChunk () *) ; let @@ -337,6 +338,7 @@ struct DE.decheckJoin (tidLeft, tidRight); setQueueDepth (myWorkerId ()) depth; maybeParClearSuspectsAtDepth (thread, depth); + if depth <> 1 then () else HH.updateBytesPinnedEntangledWatermark (); (* dbgmsg' (fn _ => "join slow at depth " ^ Int.toString depth); *) case HM.refDerefNoBarrier rightSideResult of NONE => die (fn _ => "scheduler bug: join failed: missing result") @@ -389,6 +391,7 @@ struct ; HH.setDepth (thread, depth) ; DE.decheckJoin (tidLeft, tidRight) ; maybeParClearSuspectsAtDepth (thread, depth) + ; if depth <> 1 then () else HH.updateBytesPinnedEntangledWatermark () (* ; dbgmsg' (fn _ => "join fast at depth " ^ Int.toString depth) *) (* ; HH.forceNewChunk () *) ; let @@ -413,6 +416,7 @@ struct DE.decheckJoin (tidLeft, tidRight); setQueueDepth (myWorkerId ()) depth; maybeParClearSuspectsAtDepth (thread, depth); + if depth <> 1 then () else HH.updateBytesPinnedEntangledWatermark (); (* dbgmsg' (fn _ => "join slow at depth " ^ Int.toString depth); *) case HM.refDerefNoBarrier rightSideResult of NONE => die (fn _ => "scheduler bug: join failed: missing result") @@ -467,6 +471,7 @@ struct val _ = HH.promoteChunks thread val _ = HH.setDepth (thread, depth) val _ = maybeParClearSuspectsAtDepth (thread, depth) + val _ = if depth <> 1 then () else HH.updateBytesPinnedEntangledWatermark () (* val _ = dbgmsg' (fn _ => "join CC at depth " ^ Int.toString depth) *) in result diff --git a/runtime/gc/gc_state.c b/runtime/gc/gc_state.c index 28b2051ae..ca036aa4d 100644 --- a/runtime/gc/gc_state.c +++ b/runtime/gc/gc_state.c @@ -244,6 +244,41 @@ uintmax_t GC_bytesPinnedEntangled(GC_state s) return count; } +uintmax_t GC_bytesPinnedEntangledWatermark(GC_state s) +{ + uintmax_t mark = 0; + for (uint32_t p = 0; p < s->numberOfProcs; p++) + { + mark = max(mark, + s->procStates[p].cumulativeStatistics->bytesPinnedEntangledWatermark); + } + return mark; +} + +// must only be called immediately after join at root depth +void GC_updateBytesPinnedEntangledWatermark(GC_state s) +{ + uintmax_t total = 0; + for (uint32_t p = 0; p < s->numberOfProcs; p++) + { + uintmax_t *currp = + &(s->procStates[p].cumulativeStatistics->currentPhaseBytesPinnedEntangled); + uintmax_t curr = __atomic_load_n(currp, __ATOMIC_SEQ_CST); + __atomic_store_n(currp, 0, __ATOMIC_SEQ_CST); + total += curr; + } + + // if (total > 0) { + // LOG(LM_HIERARCHICAL_HEAP, LL_FORCE, "hello %zu", total); + // } + + s->cumulativeStatistics->bytesPinnedEntangledWatermark = + max( + s->cumulativeStatistics->bytesPinnedEntangledWatermark, + total + ); +} + float GC_approxRaceFactor(GC_state s) { float count = 0; diff --git a/runtime/gc/gc_state.h b/runtime/gc/gc_state.h index 69a9decb8..519817c76 100644 --- a/runtime/gc/gc_state.h +++ b/runtime/gc/gc_state.h @@ -143,6 +143,8 @@ PRIVATE uintmax_t GC_numChecksSkipped(GC_state s); PRIVATE uintmax_t GC_numSuspectsMarked(GC_state s); PRIVATE uintmax_t GC_numSuspectsCleared(GC_state s); PRIVATE uintmax_t GC_bytesPinnedEntangled(GC_state s); +PRIVATE uintmax_t GC_bytesPinnedEntangledWatermark(GC_state s); +PRIVATE void GC_updateBytesPinnedEntangledWatermark(GC_state s); PRIVATE uint32_t GC_getControlMaxCCDepth(GC_state s); diff --git a/runtime/gc/pin.c b/runtime/gc/pin.c index afcd48e02..fca2c138f 100644 --- a/runtime/gc/pin.c +++ b/runtime/gc/pin.c @@ -118,7 +118,12 @@ objptr pinObjectInfo( bool didPinChange = (nt != pinType(header)); *pinChange = didPinChange; if (nt == PIN_ANY && didPinChange) { - s->cumulativeStatistics->bytesPinnedEntangled += objectSize(s, p); + size_t sz = objectSize(s, p); + s->cumulativeStatistics->bytesPinnedEntangled += sz; + __sync_fetch_and_add( + &(s->cumulativeStatistics->currentPhaseBytesPinnedEntangled), + (uintmax_t)sz + ); } assert (!hasFwdPtr(p)); assert(pinType(newHeader) == nt); diff --git a/runtime/gc/statistics.c b/runtime/gc/statistics.c index fb26c2d19..861a7cb7e 100644 --- a/runtime/gc/statistics.c +++ b/runtime/gc/statistics.c @@ -73,6 +73,8 @@ struct GC_cumulativeStatistics *newCumulativeStatistics(void) { cumulativeStatistics->numSuspectsMarked = 0; cumulativeStatistics->numSuspectsCleared = 0; cumulativeStatistics->bytesPinnedEntangled = 0; + cumulativeStatistics->currentPhaseBytesPinnedEntangled = 0; + cumulativeStatistics->bytesPinnedEntangledWatermark = 0; cumulativeStatistics->approxRaceFactor = 0; cumulativeStatistics->timeLocalGC.tv_sec = 0; diff --git a/runtime/gc/statistics.h b/runtime/gc/statistics.h index 34e43df1a..1609ca8a5 100644 --- a/runtime/gc/statistics.h +++ b/runtime/gc/statistics.h @@ -69,6 +69,8 @@ struct GC_cumulativeStatistics { uintmax_t numSuspectsMarked; uintmax_t numSuspectsCleared; uintmax_t bytesPinnedEntangled; + uintmax_t currentPhaseBytesPinnedEntangled; + uintmax_t bytesPinnedEntangledWatermark; float approxRaceFactor; struct timespec timeLocalGC; From 83b4b68d7e3c939119ea9221a1f98323631a2009 Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Thu, 27 Oct 2022 23:22:36 -0400 Subject: [PATCH 105/114] add bytesInScope stats for lgc,cgc --- basis-library/mpl/gc.sig | 4 ++++ basis-library/mpl/gc.sml | 6 ++++++ basis-library/primitive/prim-mlton.sml | 8 ++++++++ runtime/gc/concurrent-collection.c | 1 + runtime/gc/gc_state.c | 16 ++++++++++++++++ runtime/gc/gc_state.h | 2 ++ runtime/gc/hierarchical-heap-collection.c | 2 ++ runtime/gc/statistics.c | 2 ++ runtime/gc/statistics.h | 2 ++ 9 files changed, 43 insertions(+) diff --git a/basis-library/mpl/gc.sig b/basis-library/mpl/gc.sig index d1c73f7ba..1b4c5cabb 100644 --- a/basis-library/mpl/gc.sig +++ b/basis-library/mpl/gc.sig @@ -46,6 +46,8 @@ sig val localBytesReclaimed: unit -> IntInf.int val localBytesReclaimedOfProc: int -> IntInf.int + val bytesInScopeForLocal: unit -> IntInf.int + val numLocalGCs: unit -> IntInf.int val numLocalGCsOfProc: int -> IntInf.int @@ -61,6 +63,8 @@ sig val ccBytesReclaimed: unit -> IntInf.int val ccBytesReclaimedOfProc: int -> IntInf.int + val bytesInScopeForCC: unit -> IntInf.int + val ccTime: unit -> Time.time val ccTimeOfProc: int -> Time.time diff --git a/basis-library/mpl/gc.sml b/basis-library/mpl/gc.sml index e902c771d..5f2417e04 100644 --- a/basis-library/mpl/gc.sml +++ b/basis-library/mpl/gc.sml @@ -31,6 +31,12 @@ struct fun getCCBytesReclaimedOfProc p = GC.getCCBytesReclaimedOfProc (gcState (), Word32.fromInt p) + fun bytesInScopeForLocal () = + C_UIntmax.toLargeInt (GC.bytesInScopeForLocal (gcState ())) + + fun bytesInScopeForCC () = + C_UIntmax.toLargeInt (GC.bytesInScopeForCC (gcState ())) + fun numberDisentanglementChecks () = C_UIntmax.toLargeInt (GC.numberDisentanglementChecks (gcState ())) diff --git a/basis-library/primitive/prim-mlton.sml b/basis-library/primitive/prim-mlton.sml index 3e95b569a..54376834c 100644 --- a/basis-library/primitive/prim-mlton.sml +++ b/basis-library/primitive/prim-mlton.sml @@ -159,6 +159,14 @@ structure GC = val getCumulativeStatisticsLocalBytesReclaimedOfProc = _import "GC_getCumulativeStatisticsLocalBytesReclaimedOfProc" runtime private: GCState.t * Word32.word -> C_UIntmax.t; + val bytesInScopeForLocal = + _import "GC_bytesInScopeForLocal" runtime private: + GCState.t -> C_UIntmax.t; + + val bytesInScopeForCC = + _import "GC_bytesInScopeForCC" runtime private: + GCState.t -> C_UIntmax.t; + val getNumCCsOfProc = _import "GC_getNumCCsOfProc" runtime private: GCState.t * Word32.word -> C_UIntmax.t; val getCCMillisecondsOfProc = _import "GC_getCCMillisecondsOfProc" runtime private: GCState.t * Word32.word -> C_UIntmax.t; val getCCBytesReclaimedOfProc = _import "GC_getCCBytesReclaimedOfProc" runtime private: GCState.t * Word32.word -> C_UIntmax.t; diff --git a/runtime/gc/concurrent-collection.c b/runtime/gc/concurrent-collection.c index ef1ca466d..7eebc3997 100644 --- a/runtime/gc/concurrent-collection.c +++ b/runtime/gc/concurrent-collection.c @@ -1192,6 +1192,7 @@ size_t CC_collectWithRoots( s->cumulativeStatistics->numCCs++; assert(bytesScanned >= bytesSaved); uintmax_t bytesReclaimed = bytesScanned-bytesSaved; + s->cumulativeStatistics->bytesInScopeForCC += bytesScanned; s->cumulativeStatistics->bytesReclaimedByCC += bytesReclaimed; return lists.bytesSaved; diff --git a/runtime/gc/gc_state.c b/runtime/gc/gc_state.c index ca036aa4d..c821e9741 100644 --- a/runtime/gc/gc_state.c +++ b/runtime/gc/gc_state.c @@ -98,6 +98,22 @@ uintmax_t GC_getCumulativeStatisticsLocalBytesReclaimedOfProc(GC_state s, uint32 return s->procStates[proc].cumulativeStatistics->bytesReclaimedByLocal; } +uintmax_t GC_bytesInScopeForLocal(GC_state s) { + uintmax_t retVal = 0; + for (size_t i = 0; i < s->numberOfProcs; i++) { + retVal += s->procStates[i].cumulativeStatistics->bytesInScopeForLocal; + } + return retVal; +} + +uintmax_t GC_bytesInScopeForCC(GC_state s) { + uintmax_t retVal = 0; + for (size_t i = 0; i < s->numberOfProcs; i++) { + retVal += s->procStates[i].cumulativeStatistics->bytesInScopeForCC; + } + return retVal; +} + uintmax_t GC_getCumulativeStatisticsBytesAllocated (GC_state s) { /* return sum across all processors */ size_t retVal = 0; diff --git a/runtime/gc/gc_state.h b/runtime/gc/gc_state.h index 519817c76..ff8246d7f 100644 --- a/runtime/gc/gc_state.h +++ b/runtime/gc/gc_state.h @@ -136,6 +136,8 @@ PRIVATE uintmax_t GC_getCumulativeStatisticsNumLocalGCsOfProc(GC_state s, uint32 PRIVATE uintmax_t GC_getNumCCsOfProc(GC_state s, uint32_t proc); PRIVATE uintmax_t GC_getCCMillisecondsOfProc(GC_state s, uint32_t proc); PRIVATE uintmax_t GC_getCCBytesReclaimedOfProc(GC_state s, uint32_t proc); +PRIVATE uintmax_t GC_bytesInScopeForLocal(GC_state s); +PRIVATE uintmax_t GC_bytesInScopeForCC(GC_state s); PRIVATE uintmax_t GC_numDisentanglementChecks(GC_state s); PRIVATE uintmax_t GC_numEntanglements(GC_state s); PRIVATE float GC_approxRaceFactor(GC_state s); diff --git a/runtime/gc/hierarchical-heap-collection.c b/runtime/gc/hierarchical-heap-collection.c index 1dbc64433..444f8744f 100644 --- a/runtime/gc/hierarchical-heap-collection.c +++ b/runtime/gc/hierarchical-heap-collection.c @@ -957,6 +957,8 @@ void HM_HHC_collectLocal(uint32_t desiredScope) } } + s->cumulativeStatistics->bytesInScopeForLocal += totalSizeBefore; + if (totalSizeAfter > totalSizeBefore) { // whoops? diff --git a/runtime/gc/statistics.c b/runtime/gc/statistics.c index 861a7cb7e..fc3fc2880 100644 --- a/runtime/gc/statistics.c +++ b/runtime/gc/statistics.c @@ -48,6 +48,8 @@ struct GC_cumulativeStatistics *newCumulativeStatistics(void) { cumulativeStatistics->bytesHHLocaled = 0; cumulativeStatistics->bytesReclaimedByLocal = 0; cumulativeStatistics->bytesReclaimedByCC = 0; + cumulativeStatistics->bytesInScopeForLocal = 0; + cumulativeStatistics->bytesInScopeForCC = 0; cumulativeStatistics->maxBytesLive = 0; cumulativeStatistics->maxBytesLiveSinceReset = 0; cumulativeStatistics->maxHeapSize = 0; diff --git a/runtime/gc/statistics.h b/runtime/gc/statistics.h index 1609ca8a5..391d7f647 100644 --- a/runtime/gc/statistics.h +++ b/runtime/gc/statistics.h @@ -37,6 +37,8 @@ struct GC_cumulativeStatistics { uintmax_t bytesHHLocaled; uintmax_t bytesReclaimedByLocal; uintmax_t bytesReclaimedByCC; + uintmax_t bytesInScopeForLocal; + uintmax_t bytesInScopeForCC; size_t maxBytesLive; size_t maxBytesLiveSinceReset; From 4d343e4900fa658511f0c3c7f9bb71a400bd7635 Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Tue, 2 May 2023 12:17:50 -0400 Subject: [PATCH 106/114] Bugfix: exception handling and CGC scheduling This fixes a subtle bug where, when scheduling a CGC, an unhandled exception can skip the cleanup code that happens after checking if the CGC-task can be popped and executed locally. In the scheduler, in `forkGC`, there is a call to `fork ... (f, g)` between the `push` and `popDiscard` for the CGC-task. Previously, if this call raised an exception, then the `popDiscard` (and other associated cleanup) was skipped. The solution is straightforward, similar to how exceptions are normally caught and re-raised across task boundaries: 1. catch the exception if needed (via `result (fn () => ...)`) 2. perform the cleanup 3. re-raise the exception if needed (via `extractResult`) --- basis-library/schedulers/shh/Scheduler.sml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/basis-library/schedulers/shh/Scheduler.sml b/basis-library/schedulers/shh/Scheduler.sml index 65070b41d..57517c441 100644 --- a/basis-library/schedulers/shh/Scheduler.sml +++ b/basis-library/schedulers/shh/Scheduler.sml @@ -452,7 +452,8 @@ struct val _ = HH.setDepth (thread, depth + 1) val _ = HH.forceLeftHeap(myWorkerId(), thread) (* val _ = dbgmsg' (fn _ => "fork CC at depth " ^ Int.toString depth) *) - val result = fork' {ccOkayAtThisDepth=false} (f, g) + val res = + result (fn () => fork' {ccOkayAtThisDepth=false} (f, g)) val _ = if popDiscard() then @@ -474,7 +475,7 @@ struct val _ = if depth <> 1 then () else HH.updateBytesPinnedEntangledWatermark () (* val _ = dbgmsg' (fn _ => "join CC at depth " ^ Int.toString depth) *) in - result + extractResult res end end From 5acec528c6176707ecf22218fb95b3a172211e5f Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Tue, 2 May 2023 12:25:00 -0400 Subject: [PATCH 107/114] more CGC and block allocator stats logging --- runtime/gc/block-allocator.c | 38 ++++++++++++++++++++++++++++-- runtime/gc/block-allocator.h | 2 ++ runtime/gc/concurrent-collection.c | 32 ++++++++++++++++++------- runtime/gc/concurrent-collection.h | 10 ++++++-- 4 files changed, 70 insertions(+), 12 deletions(-) diff --git a/runtime/gc/block-allocator.c b/runtime/gc/block-allocator.c index 8b18f45b0..abc97d9a4 100644 --- a/runtime/gc/block-allocator.c +++ b/runtime/gc/block-allocator.c @@ -399,7 +399,7 @@ static void clearOutOtherFrees(GC_state s) { } if (numFreed > 400) { - LOG(LM_CHUNK_POOL, LL_INFO, + LOG(LM_CHUNK_POOL, LL_DEBUG, "number of freed blocks: %zu", numFreed); } @@ -411,6 +411,10 @@ static void freeMegaBlock(GC_state s, MegaBlock mb, size_t sizeClass) { size_t nb = mb->numBlocks; enum BlockPurpose purpose = mb->purpose; + LOG(LM_CHUNK_POOL, LL_INFO, + "Freeing megablock of %zu blocks", + nb); + if (sizeClass >= s->controls->megablockThreshold) { GC_release((pointer)mb, s->controls->blockSize * mb->numBlocks); LOG(LM_CHUNK_POOL, LL_INFO, @@ -505,6 +509,11 @@ static MegaBlock mmapNewMegaBlock(GC_state s, size_t numBlocks, enum BlockPurpos mb->numBlocks = numBlocks; mb->nextMegaBlock = NULL; mb->purpose = purpose; + + LOG(LM_CHUNK_POOL, LL_INFO, + "mmap'ed new megablock of size %zu", + numBlocks); + return mb; } @@ -652,12 +661,16 @@ void freeBlocks(GC_state s, Blocks bs, writeFreedBlockInfoFnClosure f) { void queryCurrentBlockUsage( GC_state s, size_t *numBlocksMapped, + size_t *numGlobalBlocksMapped, size_t *numBlocksReleased, + size_t *numGlobalBlocksReleased, size_t *numBlocksAllocated, size_t *numBlocksFreed) { *numBlocksMapped = 0; + *numGlobalBlocksMapped = 0; *numBlocksReleased = 0; + *numGlobalBlocksReleased = 0; for (enum BlockPurpose p = 0; p < NUM_BLOCK_PURPOSES; p++) { numBlocksAllocated[p] = 0; numBlocksFreed[p] = 0; @@ -682,6 +695,9 @@ void queryCurrentBlockUsage( numBlocksAllocated[p] += global->numBlocksAllocated[p]; numBlocksFreed[p] += global->numBlocksFreed[p]; } + + *numGlobalBlocksMapped += global->numBlocksMapped; + *numGlobalBlocksReleased += global->numBlocksReleased; } @@ -691,10 +707,20 @@ void logCurrentBlockUsage( __attribute__((unused)) void *env) { size_t mapped; + size_t globalMapped; size_t released; + size_t globalReleased; size_t allocated[NUM_BLOCK_PURPOSES]; size_t freed[NUM_BLOCK_PURPOSES]; - queryCurrentBlockUsage(s, &mapped, &released, (size_t*)allocated, (size_t*)freed); + queryCurrentBlockUsage( + s, + &mapped, + &globalMapped, + &released, + &globalReleased, + (size_t*)allocated, + (size_t*)freed + ); size_t inUse[NUM_BLOCK_PURPOSES]; for (enum BlockPurpose p = 0; p < NUM_BLOCK_PURPOSES; p++) { @@ -709,9 +735,13 @@ void logCurrentBlockUsage( size_t count = mapped-released; if (released > mapped) count = 0; + size_t globalCount = globalMapped - globalReleased; + if (globalReleased > globalMapped) globalCount = 0; + LOG(LM_BLOCK_ALLOCATOR, LL_INFO, "block-allocator(%zu.%.9zu)\n" " currently mapped %zu (= %zu - %zu)\n" + " currently mapped (global) %zu (= %zu - %zu)\n" " BLOCK_FOR_HEAP_CHUNK %zu (%zu%%) (= %zu - %zu)\n" " BLOCK_FOR_REMEMBERED_SET %zu (%zu%%) (= %zu - %zu)\n" " BLOCK_FOR_FORGOTTEN_SET %zu (%zu%%) (= %zu - %zu)\n" @@ -726,6 +756,10 @@ void logCurrentBlockUsage( count, mapped, released, + + globalCount, + globalMapped, + globalReleased, inUse[BLOCK_FOR_HEAP_CHUNK], (size_t)(100.0 * (double)inUse[BLOCK_FOR_HEAP_CHUNK] / (double)count), diff --git a/runtime/gc/block-allocator.h b/runtime/gc/block-allocator.h index a7a965228..47b55a2dc 100644 --- a/runtime/gc/block-allocator.h +++ b/runtime/gc/block-allocator.h @@ -211,7 +211,9 @@ void freeBlocks(GC_state s, Blocks bs, writeFreedBlockInfoFnClosure f); void queryCurrentBlockUsage( GC_state s, size_t *numBlocksMapped, + size_t *numGlobalBlocksMapped, size_t *numBlocksReleased, + size_t *numGlobalBlocksReleased, size_t *blocksAllocated, size_t *blocksFreed); diff --git a/runtime/gc/concurrent-collection.c b/runtime/gc/concurrent-collection.c index 7eebc3997..f7133f87e 100644 --- a/runtime/gc/concurrent-collection.c +++ b/runtime/gc/concurrent-collection.c @@ -325,6 +325,7 @@ void tryMarkAndAddToWorkList( if (!CC_isPointerMarked(p)) { markObj(p); args->bytesSaved += sizeofObject(s, p); + args->numObjectsMarked++; assert(CC_isPointerMarked(p)); CC_workList_push(s, &(args->worklist), op); } @@ -540,6 +541,7 @@ void forceForward(GC_state s, objptr *opp, void* rawArgs) { markObj(p); assert(CC_isPointerMarked(p)); args->bytesSaved += sizeofObject(s, p); + args->numObjectsMarked++; } CC_workList_push(s, &(args->worklist), op); @@ -652,19 +654,22 @@ void CC_collectAtRoot(pointer threadp, pointer hhp) { #endif size_t beforeSize = HM_getChunkListSize(HM_HH_getChunkList(heap)); - size_t live = CC_collectWithRoots(s, heap, thread); + size_t live = 0; + size_t numObjectsMarked = 0; + CC_collectWithRoots(s, heap, thread, &live, &numObjectsMarked); size_t afterSize = HM_getChunkListSize(HM_HH_getChunkList(heap)); size_t diff = beforeSize > afterSize ? beforeSize - afterSize : 0; LOG(LM_CC_COLLECTION, LL_INFO, - "finished at depth %u. before: %zu after: %zu (-%.01lf%%) live: %zu (%.01lf%% fragmented)", + "finished at depth %u. before: %zu after: %zu (-%.01lf%%) live: %zu (%.01lf%% fragmented) objects: %zu", heap->depth, beforeSize, afterSize, 100.0 * ((double)diff / (double)beforeSize), live, - 100.0 * (1.0 - (double)live / (double)afterSize)); + 100.0 * (1.0 - (double)live / (double)afterSize), + numObjectsMarked); // HM_HH_getConcurrentPack(heap)->ccstate = CC_UNREG; __atomic_store_n(&(HM_HH_getConcurrentPack(heap)->ccstate), CC_DONE, __ATOMIC_SEQ_CST); @@ -698,7 +703,7 @@ void CC_collectAtPublicLevel(GC_state s, GC_thread thread, uint32_t depth) { // collect only if the heap is above a threshold size if (HM_getChunkListSize(&(heap->chunkList)) >= 2 * HM_BLOCK_SIZE) { assert(getThreadCurrent(s) == thread); - CC_collectWithRoots(s, heap, thread); + CC_collectWithRoots(s, heap, thread, NULL, NULL); } // Mark that collection is complete @@ -908,10 +913,12 @@ void CC_filterDownPointers(GC_state s, HM_chunkList x, HM_HierarchicalHeap hh){ #endif -size_t CC_collectWithRoots( +void CC_collectWithRoots( GC_state s, HM_HierarchicalHeap targetHH, - __attribute__((unused)) GC_thread thread) + __attribute__((unused)) GC_thread thread, + size_t *outputBytesSaved, + size_t *outputNumObjectsMarked) { getStackCurrent(s)->used = sizeofGCStateCurrentStackUsed(s); getThreadCurrent(s)->exnStack = s->exnStack; @@ -949,7 +956,8 @@ size_t CC_collectWithRoots( .repList = repList, .toHead = (void*)repList, .fromHead = (void*) &(origList), - .bytesSaved = 0 + .bytesSaved = 0, + .numObjectsMarked = 0 }; CC_workList_init(s, &(lists.worklist)); @@ -1195,7 +1203,15 @@ size_t CC_collectWithRoots( s->cumulativeStatistics->bytesInScopeForCC += bytesScanned; s->cumulativeStatistics->bytesReclaimedByCC += bytesReclaimed; - return lists.bytesSaved; + if (outputBytesSaved != NULL) { + *outputBytesSaved = lists.bytesSaved; + } + + if (outputNumObjectsMarked != NULL) { + *outputNumObjectsMarked = lists.numObjectsMarked; + } + + return; } #endif diff --git a/runtime/gc/concurrent-collection.h b/runtime/gc/concurrent-collection.h index a52dd757b..db0bfc561 100644 --- a/runtime/gc/concurrent-collection.h +++ b/runtime/gc/concurrent-collection.h @@ -28,6 +28,7 @@ typedef struct ConcurrentCollectArgs { void* toHead; void* fromHead; size_t bytesSaved; + size_t numObjectsMarked; } ConcurrentCollectArgs; @@ -89,8 +90,13 @@ PRIVATE void GC_updateObjectHeader(GC_state s, pointer p, GC_header newHeader); // in the chunk is live then the whole chunk is. However, tracing is at the granularity of objects. // Objects in chunks that are preserved may point to chunks that are not. But such objects aren't // reachable. -size_t CC_collectWithRoots(GC_state s, struct HM_HierarchicalHeap * targetHH, GC_thread thread); - +void CC_collectWithRoots( + GC_state s, + struct HM_HierarchicalHeap * targetHH, + GC_thread thread, + size_t *bytesSaved, + size_t *numObjectsMarked); + void CC_collectAtPublicLevel(GC_state s, GC_thread thread, uint32_t depth); void CC_addToStack(GC_state s, ConcurrentPackage cp, pointer p); void CC_initStack(GC_state s, ConcurrentPackage cp); From 5b375a7c42a21740a0e8b95db0641a88dcec08ef Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Tue, 2 May 2023 15:31:13 -0400 Subject: [PATCH 108/114] notes about gc policy --- runtime/gc/hierarchical-heap.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/runtime/gc/hierarchical-heap.c b/runtime/gc/hierarchical-heap.c index 9d4a028c1..987cb26f1 100644 --- a/runtime/gc/hierarchical-heap.c +++ b/runtime/gc/hierarchical-heap.c @@ -794,8 +794,17 @@ void mergeCompletedCCs(GC_state s, HM_HierarchicalHeap hh) { HM_HierarchicalHeap completed = hh->subHeapCompletedCC; while (completed != NULL) { HM_HierarchicalHeap next = completed->subHeapCompletedCC; + + /* consider using max instead of addition */ HM_HH_getConcurrentPack(hh)->bytesSurvivedLastCollection += HM_HH_getConcurrentPack(completed)->bytesSurvivedLastCollection; + + /* + HM_HH_getConcurrentPack(hh)->bytesSurvivedLastCollection = + max(HM_HH_getConcurrentPack(hh)->bytesSurvivedLastCollection, + HM_HH_getConcurrentPack(completed)->bytesSurvivedLastCollection); + */ + CC_freeStack(s, HM_HH_getConcurrentPack(completed)); linkInto(s, hh, completed); HM_appendChunkList(HM_HH_getChunkList(hh), HM_HH_getChunkList(completed)); @@ -848,6 +857,8 @@ bool checkPolicyforRoot( } size_t bytesSurvived = HM_HH_getConcurrentPack(hh)->bytesSurvivedLastCollection; + + /* consider removing this: */ for (HM_HierarchicalHeap cursor = hh->subHeapCompletedCC; NULL != cursor; cursor = cursor->subHeapCompletedCC) From b8d03538c18520b5813b3d2f7ab4c49961f0a558 Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Fri, 5 May 2023 16:28:23 -0400 Subject: [PATCH 109/114] bugfix: update structure One to be thread-safe basis-library/util/one.sml defines a structure called `One` which is used in the basis library to optimize memory usage of a few functions, including: * Int.{fmt,toString} * Word.{fmt,toString} * Real.{split,toManExp} This patch fixes a buggy race condition in the implementation of `One`. With this patch, the above functions should be safe for parallelism and concurrency. The idea behind `One` is straightforward: a static buffer or mutable cell is allocated to be shared across calls. When a call is made, if the shared buffer is not in use, then the shared buffer can be claimed and used for the duration of that call, and then released. The mechanism for claiming the buffer (inherited from MLton) was previously not thread-safe, because it was not atomic at the hardware level. For MLton, it didn't need to be. But for MPL this is no longer correct, hence the bug. This patch switches to using an atomic compare-and-swap (CAS) to claim the buffer. --- basis-library/util/one.sml | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/basis-library/util/one.sml b/basis-library/util/one.sml index a4b11da1b..f04b82dba 100644 --- a/basis-library/util/one.sml +++ b/basis-library/util/one.sml @@ -1,5 +1,6 @@ (* Copyright (C) 2006-2006 Henry Cejtin, Matthew Fluet, Suresh * Jagannathan, and Stephen Weeks. + * Copyright (C) 2023 Sam Westrick. * * MLton is released under a HPND-style license. * See the file MLton-LICENSE for details. @@ -13,28 +14,44 @@ structure One: val use: 'a t * ('a -> 'b) -> 'b end = struct + + (* SAM_NOTE: using Word8 instead of bool here to work around the + * compilation bug with primitive compareAndSwap... (The compilation + * passes splitTypes1 and splitTypes2 cause a crash when compareAndSwap + * is used on certain data types, including bool.) + * + * Here I use 0w0 for false, and 0w1 for true + * + * When we fix compilation for compareAndSwap, we can switch back + * to using bool. + *) + datatype 'a t = T of {more: unit -> 'a, static: 'a, - staticIsInUse: bool ref} + staticIsInUse: Primitive.Word8.word ref} fun make f = T {more = f, static = f (), - staticIsInUse = ref false} + staticIsInUse = ref 0w0} + + val cas = Primitive.MLton.Parallel.compareAndSwap fun use (T {more, static, staticIsInUse}, f) = let val () = Primitive.MLton.Thread.atomicBegin () - val b = ! staticIsInUse + val claimed = + (!staticIsInUse) = 0w0 + andalso + 0w0 = cas (staticIsInUse, 0w0, 0w1) val d = - if b then + if not claimed then (Primitive.MLton.Thread.atomicEnd (); more ()) else - (staticIsInUse := true; - Primitive.MLton.Thread.atomicEnd (); + (Primitive.MLton.Thread.atomicEnd (); static) in DynamicWind.wind (fn () => f d, - fn () => if b then () else staticIsInUse := false) + fn () => if claimed then staticIsInUse := 0w0 else ()) end end From 5a983376cfdc2d2546c3f1a27783fd18333a018c Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Thu, 25 May 2023 15:04:17 -0400 Subject: [PATCH 110/114] fix compilation error for compare-and-swap on `Real32.real array` --- include/c-chunk.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/include/c-chunk.h b/include/c-chunk.h index c0a74b34f..27641c1b8 100644 --- a/include/c-chunk.h +++ b/include/c-chunk.h @@ -65,6 +65,13 @@ Real64 ArrayR64_cas(Real64* a, Word64 i, Real64 x, Real64 y) { return *((Real64*)&result); } +static inline +Real32 ArrayR32_cas(Real32* a, Word64 i, Real32 x, Real32 y) { + Word32 result = + __sync_val_compare_and_swap(((Word32*)a) + i, *((Word32*)&x), *((Word32*)&y)); + return *((Real32*)&result); +} + #define RefW8_cas(r, x, y) __sync_val_compare_and_swap((Word8*)(r), (x), (y)) #define RefW16_cas(r, x, y) __sync_val_compare_and_swap((Word16*)(r), (x), (y)) #define RefW32_cas(r, x, y) __sync_val_compare_and_swap((Word32*)(r), (x), (y)) @@ -87,7 +94,7 @@ Objptr RefP_cas(Objptr* r, Objptr x, Objptr y) { #define ArrayW32_cas(a, i, x, y) __sync_val_compare_and_swap(((Word32*)(a)) + (i), (x), (y)) #define ArrayW64_cas(a, i, x, y) __sync_val_compare_and_swap(((Word64*)(a)) + (i), (x), (y)) -#define ArrayR32_cas(a, i, x, y) __sync_val_compare_and_swap(((Real32*)(a)) + (i), (x), (y)) +// #define ArrayR32_cas(a, i, x, y) __sync_val_compare_and_swap(((Real32*)(a)) + (i), (x), (y)) // #define ArrayR64_cas(a, i, x, y) __sync_val_compare_and_swap(((Real64*)(a)) + (i), (x), (y)) // #define ArrayP_cas(a, i, x, y) __sync_val_compare_and_swap(((Objptr*)(a)) + (i), (x), (y)) From ba59f033cc0f3e7224e2fe8b70b934ee5266e1da Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Thu, 22 Jun 2023 10:03:04 -0400 Subject: [PATCH 111/114] disable splitTypes1 and splitTypes2 by default --- mlton/ssa/simplify.fun | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/mlton/ssa/simplify.fun b/mlton/ssa/simplify.fun index 4646565b3..7ba3d4710 100644 --- a/mlton/ssa/simplify.fun +++ b/mlton/ssa/simplify.fun @@ -55,7 +55,10 @@ val ssaPassesDefault = {name = "localFlatten1", doit = LocalFlatten.transform, execute = true} :: {name = "constantPropagation", doit = ConstantPropagation.transform, execute = true} :: {name = "duplicateGlobals1", doit = DuplicateGlobals.transform, execute = false} :: - {name = "splitTypes1", doit = SplitTypes.transform, execute = true} :: + (* SAM_NOTE: disabling splitTypes1 because it does not yet support primitive + * polymorphic CAS. We should update the pass and then re-enable. + *) + {name = "splitTypes1", doit = SplitTypes.transform, execute = false} :: (* useless should run * - after constant propagation because constant propagation makes * slots of tuples that are constant useless @@ -67,7 +70,10 @@ val ssaPassesDefault = {name = "loopUnroll1", doit = LoopUnroll.transform, execute = false} :: {name = "removeUnused2", doit = RemoveUnused.transform, execute = true} :: {name = "duplicateGlobals2", doit = DuplicateGlobals.transform, execute = true} :: - {name = "splitTypes2", doit = SplitTypes.transform, execute = true} :: + (* SAM_NOTE: disabling splitTypes2 because it does not yet support primitive + * polymorphic CAS. We should update the pass and then re-enable. + *) + {name = "splitTypes2", doit = SplitTypes.transform, execute = false} :: {name = "simplifyTypes", doit = SimplifyTypes.transform, execute = true} :: (* polyEqual should run * - after types are simplified so that many equals are turned into eqs From 13103d5a47ac350781046a4175e8b892e35d5899 Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Thu, 22 Jun 2023 11:37:34 -0400 Subject: [PATCH 112/114] readme: highlight benchmark suite and available libraries --- README.md | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8d6d955aa..3090a7676 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ compiler for Standard ML which implements support for nested (fork-join) parallelism. MPL generates executables with excellent multicore performance, utilizing a novel approach to memory management based on the theory of disentanglement -[[1](#rmab16),[2](#gwraf18),[3](#wyfa20),[4](#awa21),[5](#waa22)]. +[[1](#rmab16),[2](#gwraf18),[3](#wyfa20),[4](#awa21),[5](#waa22),[6](#awa23)]. MPL is research software and is being actively developed. @@ -38,6 +38,29 @@ $ docker run -it -v $(pwd -P):/root/mycode shwestrick/mpl /bin/bash ...# ./main @mpl procs 4 -- ``` +## Benchmark Suite + +The [Parallel ML benchmark suite](https://github.com/MPLLang/parallel-ml-bench) +provides examples of dozens of sophisticated parallel algorithms and +applications in MPL, as well as cross-language comparisons with C++, Go, Java, +and multicore OCaml. + +## Libraries + +We recommend using the [smlpkg](https://github.com/diku-dk/smlpkg) package +manager. MPL supports the full SML language, so all existing libraries for +SML can be used. + +In addition, here are a few libraries that make use of MPL for parallelism: + * [`github.com/MPLLang/mpllib`](https://github.com/MPLLang/mpllib): implements + a variety of data structures (sequences, sets, dictionaries, graphs, matrices, meshes, + images, etc.) and parallel algorithms (map, reduce, scan, filter, sorting, + search, tokenization, graph processing, computational geometry, etc.). Also + includes basic utilies (e.g. parsing command-line arguments) and + benchmarking infrastructure. + * [`github.com/shwestrick/sml-audio`](https://github.com/shwestrick/sml-audio): + a library for audio processing with I/O support for `.wav` files. + ## Build and Install (from source) @@ -300,3 +323,8 @@ POPL 2021. [Entanglement Detection with Near-Zero Cost](http://www.cs.cmu.edu/~swestric/22/icfp-detect.pdf). Sam Westrick, Jatin Arora, and Umut A. Acar. ICFP 2022. + +[5] +[Efficient Parallel Functional Programming with Effects](https://www.cs.cmu.edu/~swestric/23/epfpe.pdf). +Jatin Arora, Sam Westrick, and Umut A. Acar. +PLDI 2023. From 48f33c4bc734cf7ea2e5997259798217549ca0be Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Thu, 22 Jun 2023 11:38:28 -0400 Subject: [PATCH 113/114] typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3090a7676..4f8b10bec 100644 --- a/README.md +++ b/README.md @@ -324,7 +324,7 @@ POPL 2021. Sam Westrick, Jatin Arora, and Umut A. Acar. ICFP 2022. -[5] +[6] [Efficient Parallel Functional Programming with Effects](https://www.cs.cmu.edu/~swestric/23/epfpe.pdf). Jatin Arora, Sam Westrick, and Umut A. Acar. PLDI 2023. From 66ac61d6fdefb22a0d82fb110b6c7325cbc08443 Mon Sep 17 00:00:00 2001 From: Sam Westrick Date: Tue, 15 Aug 2023 11:49:45 -0400 Subject: [PATCH 114/114] readme tweaks: add info about mpl-switch, remove old disentanglement restriction, etc. --- README.md | 151 ++++++++++++++++++++++-------------------------------- 1 file changed, 62 insertions(+), 89 deletions(-) diff --git a/README.md b/README.md index 4f8b10bec..185761704 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ $ docker run -it shwestrick/mpl /bin/bash ...# examples/bin/primes @mpl procs 4 -- ``` -If you want to try out MPL by writing and compiling your own code, we recommend +To write and compile your own code, we recommend mounting a local directory inside the container. For example, here's how you can use MPL to compile and run your own `main.mlb` in the current directory. (To mount some other directory, replace `$(pwd -P)` with a different path.) @@ -41,14 +41,15 @@ $ docker run -it -v $(pwd -P):/root/mycode shwestrick/mpl /bin/bash ## Benchmark Suite The [Parallel ML benchmark suite](https://github.com/MPLLang/parallel-ml-bench) -provides examples of dozens of sophisticated parallel algorithms and -applications in MPL, as well as cross-language comparisons with C++, Go, Java, +provides many examples of sophisticated parallel algorithms and +applications in MPL, as well as cross-language performance comparisons with +C++, Go, Java, and multicore OCaml. ## Libraries We recommend using the [smlpkg](https://github.com/diku-dk/smlpkg) package -manager. MPL supports the full SML language, so all existing libraries for +manager. MPL supports the full SML language, so existing libraries for SML can be used. In addition, here are a few libraries that make use of MPL for parallelism: @@ -62,45 +63,11 @@ In addition, here are a few libraries that make use of MPL for parallelism: a library for audio processing with I/O support for `.wav` files. -## Build and Install (from source) - -### Requirements - -MPL has only been tested on Linux with x86-64. The following software is -required. - * [GCC](http://gcc.gnu.org) - * [GMP](http://gmplib.org) (GNU Multiple Precision arithmetic library) - * [GNU Make](http://savannah.gnu.org/projects/make), [GNU Bash](http://www.gnu.org/software/bash/) - * binutils (`ar`, `ranlib`, `strip`, ...) - * miscellaneous Unix utilities (`diff`, `find`, `grep`, `gzip`, `patch`, `sed`, `tar`, `xargs`, ...) - * Standard ML compiler and tools: - - Recommended: [MLton](http://mlton.org) (`mlton`, `mllex`, and `mlyacc`). Pre-built binary packages for MLton can be installed via an OS package manager or (for select platforms) obtained from http://mlton.org. - - Supported but not recommended: [SML/NJ](http://www.smlnj.org) (`sml`, `ml-lex`, `ml-yacc`). - -### Instructions - -The following builds the compiler at `build/bin/mpl`. -``` -$ make all -``` - -After building, MPL can then be installed to `/usr/local`: -``` -$ make install -``` -or to a custom directory with the `PREFIX` option: -``` -$ make PREFIX=/opt/mpl install -``` - ## Parallel and Concurrent Extensions MPL extends SML with a number of primitives for parallelism and concurrency. Take a look at `examples/` to see these primitives in action. -**Note**: Before writing any of your own code, make sure to read the section -"Disentanglement" below. - ### The `ForkJoin` Structure ``` val par: (unit -> 'a) * (unit -> 'b) -> 'a * 'b @@ -186,8 +153,6 @@ by default. * `-debug true -debug-runtime true -keep g` For debugging, keeps the generated C files and uses the debug version of the runtime (with assertions enabled). The resulting executable is somewhat peruse-able with tools like `gdb`. -* `-detect-entanglement true` enables the dynamic entanglement detector. -See below for more information. For example: ``` @@ -221,60 +186,13 @@ argument `bar` using 4 pinned processors. $ foo @mpl procs 4 set-affinity -- bar ``` -## Disentanglement - -Currently, MPL only supports programs that are **disentangled**, which -(roughly speaking) is the property that concurrent threads remain oblivious -to each other's allocations [[3](#wyfa20)]. - -Here are a number of different ways to guarantee that your code is -disentangled. -- (Option 1) Use only purely functional data (no `ref`s or `array`s). This is -the simplest but most restrictive approach. -- (Option 2) If using mutable data, use only non-pointer data. MPL guarantees -that simple types (`int`, `word`, `char`, `real`, etc.) are never -indirected through a -pointer, so for example it is safe to use `int array`. Other types such as -`int list array` and `int array array` should be avoided. This approach -is very easy to check and is surprisingly general. Data races are fine! -- (Option 3) Make sure that your program is race-free. This can be -tricky to check but allows you to use any type of data. Many of our example -programs are race-free. - -## Entanglement Detection - -Whenever a thread acquires a reference -to an object allocated concurrently by some other thread, then we say that -the two threads are **entangled**. This is a violation of disentanglement, -which MPL currently does not allow. - -MPL has a built-in dynamic entanglement detector which is enabled by default. -The entanglement detector monitors individual reads and writes during execution; -if entanglement is found, the program will terminate with an error message. - -The entanglement detector is both "sound" and "complete": there are neither -false negatives nor false positives. In other words, the detector always raises -an alarm when entanglement occurs, and never raises an alarm otherwise. Note -however that entanglement (and therefore also entanglement detection) can -be execution-dependent: if your program is non-deterministic (e.g. racy), -then entanglement may or may not occur depending on the outcome of a race -condition. Similarly, entanglement could be input-dependent. - -Entanglement detection is highly optimized, and typically has negligible -overhead (see [[5](#waa22)]). It can be disabled at compile-time by passing -`-detect-entanglement false`; however, we recommend against doing so. MPL -relies on entanglement detection to ensure memory safety. We recommend leaving -entanglement detection enabled at all times. ## Bugs and Known Issues ### Basis Library -In general, the basis library has not yet been thoroughly scrubbed, and many -functions may not be safe for parallelism +The basis library is inherited from (sequential) SML. It has not yet been +thoroughly scrubbed, and some functions may not be safe for parallelism ([#41](https://github.com/MPLLang/mpl/issues/41)). -Some known issues: -* `Int.toString` is racy when called in parallel. -* `Real.fromString` may throw an error when called in parallel. ### Garbage Collection * ([#115](https://github.com/MPLLang/mpl/issues/115)) The GC is currently @@ -297,6 +215,61 @@ unsupported, including (but not limited to): * `Weak` * `World` + +## Build and Install (from source) + +### Requirements + +MPL has only been tested on Linux with x86-64. The following software is +required. + * [GCC](http://gcc.gnu.org) + * [GMP](http://gmplib.org) (GNU Multiple Precision arithmetic library) + * [GNU Make](http://savannah.gnu.org/projects/make), [GNU Bash](http://www.gnu.org/software/bash/) + * binutils (`ar`, `ranlib`, `strip`, ...) + * miscellaneous Unix utilities (`diff`, `find`, `grep`, `gzip`, `patch`, `sed`, `tar`, `xargs`, ...) + * Standard ML compiler and tools: + - Recommended: [MLton](http://mlton.org) (`mlton`, `mllex`, and `mlyacc`). Pre-built binary packages for MLton can be installed via an OS package manager or (for select platforms) obtained from http://mlton.org. + - Supported but not recommended: [SML/NJ](http://www.smlnj.org) (`sml`, `ml-lex`, `ml-yacc`). + * (If using [`mpl-switch`](https://github.com/mpllang/mpl-switch)): Python 3, and `git`. + +### Installation with `mpl-switch` + +The [`mpl-switch`](https://github.com/mpllang/mpl-switch) utility makes it +easy to install multiple versions of MPL on the same system and switch +between them. After setting up `mpl-switch`, you can install MPL as follows: +``` +$ mpl-switch install v0.4 +$ mpl-switch select v0.4 +``` + +You can use any commit hash or tag name from the MPL repo to pick a +particular version of MPL. Installed versions are stored in `~/.mpl/`; this +folder is safe to delete at any moment, as it can always be regenerated. To +see what versions of MPL are currently installed, do: +``` +$ mpl-switch list +``` + +### Manual Instructions + +Alternatively, you can manually build `mpl` by cloning this repo and then +performing the following. + +**Build the executable**. This produces an executable at `build/bin/mpl`: +``` +$ make +``` + +**Put it where you want it**. After building, MPL can then be installed to +`/usr/local`: +``` +$ make install +``` +or to a custom directory with the `PREFIX` option: +``` +$ make PREFIX=/opt/mpl install +``` + ## References [1]