diff --git a/src/Amalgam/Parser.cpp b/src/Amalgam/Parser.cpp index 31f57ac6..a48b7dc9 100644 --- a/src/Amalgam/Parser.cpp +++ b/src/Amalgam/Parser.cpp @@ -991,7 +991,7 @@ void Parser::Unparse(UnparseData &upd, EvaluableNode *tree, EvaluableNode *paren std::swap(upd.parentNodes, references); Unparse(upd, code_to_print, nullptr, expanded_whitespace, indentation_depth, need_initial_indent); std::swap(upd.parentNodes, references); //put the old parentNodes back - enm.FreeNodeTree(code_to_print); + //don't free code_to_print, but let enm's destructor clean it up return; } diff --git a/src/Amalgam/entity/Entity.cpp b/src/Amalgam/entity/Entity.cpp index 558df5dd..e5fc47bd 100644 --- a/src/Amalgam/entity/Entity.cpp +++ b/src/Amalgam/entity/Entity.cpp @@ -859,11 +859,8 @@ void Entity::SetRoot(EvaluableNode *_code, bool allocated_with_entity_enm, Evalu EvaluableNode *cur_root = GetRoot(); bool entity_previously_empty = (cur_root == nullptr || cur_root->GetNumChildNodes() == 0); - if(_code == nullptr) - { - evaluableNodeManager.SetRootNode(evaluableNodeManager.AllocNode(ENT_NULL)); - } - else if(allocated_with_entity_enm && metadata_modifier == EvaluableNodeManager::ENMM_NO_CHANGE) + if(_code == nullptr + || (allocated_with_entity_enm && metadata_modifier == EvaluableNodeManager::ENMM_NO_CHANGE)) { evaluableNodeManager.SetRootNode(_code); } diff --git a/src/Amalgam/evaluablenode/EvaluableNode.h b/src/Amalgam/evaluablenode/EvaluableNode.h index 850f6031..8dab3062 100644 --- a/src/Amalgam/evaluablenode/EvaluableNode.h +++ b/src/Amalgam/evaluablenode/EvaluableNode.h @@ -19,6 +19,7 @@ #define AMALGAM_FAST_MEMORY_INTEGRITY #endif + //forward declarations: class EvaluableNodeManager; @@ -145,7 +146,7 @@ class EvaluableNode inline void InitializeType(EvaluableNodeType _type) { #ifdef AMALGAM_FAST_MEMORY_INTEGRITY - assert(IsEvaluableNodeTypeValid(_type)); + assert(IsEvaluableNodeTypeValid(_type) || _type == ENT_DEALLOCATED); #endif type = _type; @@ -171,6 +172,17 @@ class EvaluableNode attributes.individualAttribs.isIdempotent = true; value.ConstructMappedChildNodes(); } + else if(_type == ENT_DEALLOCATED) + { + #ifdef AMALGAM_FAST_MEMORY_INTEGRITY + //use a value that is more apparent that something went wrong + value.numberValueContainer.numberValue = std::numeric_limits::quiet_NaN(); + #else + value.numberValueContainer.numberValue = 0; + #endif + + value.numberValueContainer.labelStringID = StringInternPool::NOT_A_STRING_ID; + } else { value.ConstructOrderedChildNodes(); diff --git a/src/Amalgam/evaluablenode/EvaluableNodeManagement.cpp b/src/Amalgam/evaluablenode/EvaluableNodeManagement.cpp index 81721a75..0e9028a8 100644 --- a/src/Amalgam/evaluablenode/EvaluableNodeManagement.cpp +++ b/src/Amalgam/evaluablenode/EvaluableNodeManagement.cpp @@ -59,63 +59,76 @@ EvaluableNode *EvaluableNodeManager::AllocNode(EvaluableNode *original, Evaluabl return n; } + +void InitializeListHeadOrNode(EvaluableNode *node, EvaluableNode *parent, EvaluableNodeType child_node_type, size_t node_index, std::vector *ocn_buffer) +{ + if(node_index == 0) + { + // parent + node->InitializeType(ENT_LIST); + std::vector *ocn_ptr = &node->GetOrderedChildNodesReference(); + std::swap(*ocn_buffer, *ocn_ptr); + } + else + { + // child node; initialize it and add it to the list items + auto &ocn = parent->GetOrderedChildNodesReference(); + ocn[node_index-1] = node; + node->InitializeType(child_node_type); + } +} + EvaluableNode *EvaluableNodeManager::AllocListNodeWithOrderedChildNodes(EvaluableNodeType child_node_type, size_t num_child_nodes) { if(num_child_nodes == 0) return AllocNode(ENT_LIST); + EvaluableNode *parent = nullptr; + // Allocate from TLab + + size_t num_to_alloc = 1 + num_child_nodes; size_t num_allocated = 0; - size_t num_to_alloc = num_child_nodes + 1; size_t num_total_nodes_needed = 0; - EvaluableNode *retval = nullptr; - - //start off allocating the parent node, then switch to child_node_type - EvaluableNodeType cur_type = ENT_LIST; - //ordered child nodes destination; preallocate outside of the lock (for performance) and swap in - std::vector *ocn_ptr = nullptr; std::vector ocn_buffer; ocn_buffer.resize(num_child_nodes); - //outer loop needed for multithreading, but doesn't hurt anything for single threading while(num_allocated < num_to_alloc) { + EvaluableNode *newNode = nullptr; + + while((newNode = GetNextNodeFromTLab()) && num_allocated < num_to_alloc) { - #ifdef MULTITHREAD_SUPPORT - //attempt to allocate as many as possible using an atomic without write locking - Concurrency::ReadLock lock(managerAttributesMutex); - #endif + if(parent == nullptr) + parent = newNode; + + InitializeListHeadOrNode(newNode, parent, child_node_type, num_allocated, &ocn_buffer); + num_allocated++; + } + + if(num_allocated >= num_to_alloc) + { + //we got enough nodes out of the tlab + return parent; + } + + { // Not enough nodes in TLab; add some. + #ifdef MULTITHREAD_SUPPORT + Concurrency::ReadLock lock(managerAttributesMutex); + #endif - for(; num_allocated < num_to_alloc; num_allocated++) + int num_added_to_tlab = 0; + for(; num_allocated + num_added_to_tlab < num_to_alloc; num_added_to_tlab++) { - //attempt to allocate a node and make sure it's valid size_t allocated_index = firstUnusedNodeIndex++; if(allocated_index < nodes.size()) { - if(nodes[allocated_index] != nullptr) - nodes[allocated_index]->InitializeType(cur_type); - else - nodes[allocated_index] = new EvaluableNode(cur_type); - - //if first node, populate the parent node - if(num_allocated == 0) - { - //prep parent node - retval = nodes[allocated_index]; - - //get the pointer to place child elements, - // but swap out the preallocated ordered child nodes - ocn_ptr = &retval->GetOrderedChildNodesReference(); - std::swap(ocn_buffer, *ocn_ptr); + if(nodes[allocated_index] == nullptr) + nodes[allocated_index] = new EvaluableNode(ENT_DEALLOCATED); + - //advance type to child node type - cur_type = child_node_type; - } - else //set the appropriate child node - { - (*ocn_ptr)[num_allocated - 1] = nodes[allocated_index]; - } + AddNodeToTLab(nodes[allocated_index]); } else { @@ -125,34 +138,38 @@ EvaluableNode *EvaluableNodeManager::AllocListNodeWithOrderedChildNodes(Evaluabl } } - //if have allocated enough, just return - if(num_allocated == num_to_alloc) - return retval; - - num_total_nodes_needed = firstUnusedNodeIndex + (num_to_alloc - num_allocated); + // If we added enough nodes to the tlab, use them in the next loop iteration + if( num_added_to_tlab + num_allocated >= num_to_alloc) + continue; } - #ifdef MULTITHREAD_SUPPORT - //don't have enough nodes, so need to attempt a write lock to allocate more - Concurrency::WriteLock write_lock(managerAttributesMutex); + // There weren't enough free nodes available to fill the tlab; allocate more + num_total_nodes_needed = firstUnusedNodeIndex + (num_to_alloc - num_allocated); + { + #ifdef MULTITHREAD_SUPPORT + //don't have enough nodes, so need to attempt a write lock to allocate more + Concurrency::WriteLock write_lock(managerAttributesMutex); - //try again after write lock to allocate a node in case another thread has performed the allocation - //already have the write lock, so don't need to worry about another thread stealing firstUnusedNodeIndex - #endif + //try again after write lock to allocate a node in case another thread has performed the allocation + //already have the write lock, so don't need to worry about another thread stealing firstUnusedNodeIndex + #endif - //if don't currently have enough free nodes to meet the needs, then expand the allocation - if(nodes.size() <= num_total_nodes_needed) - { - size_t new_num_nodes = static_cast(allocExpansionFactor * num_total_nodes_needed) + 1; - //fill new EvaluableNode slots with nullptr - nodes.resize(new_num_nodes, nullptr); + //if don't currently have enough free nodes to meet the needs, then expand the allocation + if(nodes.size() <= num_total_nodes_needed) + { + size_t new_num_nodes = static_cast(allocExpansionFactor * num_total_nodes_needed) + 1; + + //fill new EvaluableNode slots with nullptr + nodes.resize(new_num_nodes, nullptr); + } } } - //shouldn't make it here - return retval; + // unreachable + assert(false); + return nullptr; } void EvaluableNodeManager::UpdateGarbageCollectionTrigger(size_t previous_num_nodes) @@ -184,8 +201,10 @@ void EvaluableNodeManager::CollectGarbage() PerformanceProfiler::StartOperation(collect_garbage_string, GetNumberOfUsedNodes()); } + ClearThreadLocalAllocationBuffer(); + #ifdef MULTITHREAD_SUPPORT - + //free lock so can attempt to enter write lock to collect garbage if(memory_modification_lock != nullptr) memory_modification_lock->unlock(); @@ -250,24 +269,38 @@ void EvaluableNodeManager::FreeAllNodes() } EvaluableNode *EvaluableNodeManager::AllocUninitializedNode() -{ - size_t allocated_index = 0; +{ + EvaluableNode *tlab_node = GetNextNodeFromTLab(); + //Fast Path; get node from thread local buffer + if(tlab_node != nullptr) + return tlab_node; + #ifdef MULTITHREAD_SUPPORT { - //attempt to allocate using an atomic without write locking + + //slow path allocation; attempt to allocate using an atomic without write locking Concurrency::ReadLock lock(managerAttributesMutex); - //attempt to allocate a node and make sure it's valid - allocated_index = firstUnusedNodeIndex++; - if(allocated_index < nodes.size()) + //attempt to allocate enough nodes to refill thread local buffer + size_t first_index_to_allocate = firstUnusedNodeIndex.fetch_add(tlabSize); + size_t last_index_to_allocate = first_index_to_allocate + tlabSize; + + if(last_index_to_allocate < nodes.size()) { - if(nodes[allocated_index] == nullptr) - nodes[allocated_index] = new EvaluableNode(); + for(size_t i = first_index_to_allocate; i < last_index_to_allocate; i++) + { + if(nodes[i] == nullptr) + nodes[i] = new EvaluableNode(ENT_DEALLOCATED); - return nodes[allocated_index]; + AddNodeToTLab(nodes[i]); + } + + return GetNextNodeFromTLab(); } - //the node wasn't valid; put it back and do a write lock to allocate more - --firstUnusedNodeIndex; + + //couldn't allocate enough valid nodes; reset index and allocate more + firstUnusedNodeIndex -= tlabSize; + ClearThreadLocalAllocationBuffer(); } //don't have enough nodes, so need to attempt a write lock to allocate more Concurrency::WriteLock write_lock(managerAttributesMutex); @@ -276,17 +309,18 @@ EvaluableNode *EvaluableNodeManager::AllocUninitializedNode() //already have the write lock, so don't need to worry about another thread stealing firstUnusedNodeIndex //use the cached value for firstUnusedNodeIndex, allocated_index, to check if another thread has performed the allocation //as other threads may have reduced firstUnusedNodeIndex, incurring more unnecessary write locks when a memory expansion is needed -#else - allocated_index = firstUnusedNodeIndex; + #endif + //reduce accesses to the atomic variable for performance + size_t allocated_index = firstUnusedNodeIndex++; size_t num_nodes = nodes.size(); - if(allocated_index < num_nodes && firstUnusedNodeIndex < num_nodes) + if(allocated_index < num_nodes) { - if(nodes[firstUnusedNodeIndex] == nullptr) - nodes[firstUnusedNodeIndex] = new EvaluableNode(); + if(nodes[allocated_index] == nullptr) + nodes[allocated_index] = new EvaluableNode(); - return nodes[firstUnusedNodeIndex++]; + return nodes[allocated_index]; } //ran out, so need another node; push a bunch on the heap so don't need to reallocate as often and slow down garbage collection @@ -295,10 +329,10 @@ EvaluableNode *EvaluableNodeManager::AllocUninitializedNode() //fill new EvaluableNode slots with nullptr nodes.resize(new_num_nodes, nullptr); - if(nodes[firstUnusedNodeIndex] == nullptr) - nodes[firstUnusedNodeIndex] = new EvaluableNode(); + if(nodes[allocated_index] == nullptr) + nodes[allocated_index] = new EvaluableNode(); - return nodes[firstUnusedNodeIndex++]; + return nodes[allocated_index]; } void EvaluableNodeManager::FreeAllNodesExceptReferencedNodes(size_t cur_first_unused_node_index) @@ -435,6 +469,9 @@ void EvaluableNodeManager::FreeNodeTreeRecurse(EvaluableNode *tree) } tree->Invalidate(); + + tree->InitializeType(ENT_DEALLOCATED); + AddNodeToTLab(tree); } void EvaluableNodeManager::FreeNodeTreeWithCyclesRecurse(EvaluableNode *tree) @@ -451,6 +488,7 @@ void EvaluableNodeManager::FreeNodeTreeWithCyclesRecurse(EvaluableNode *tree) auto &tree_mcn = tree->GetMappedChildNodesReference(); std::swap(mcn, tree_mcn); tree->Invalidate(); + AddNodeToTLab(tree); for(auto &[_, e] : mcn) { @@ -464,6 +502,7 @@ void EvaluableNodeManager::FreeNodeTreeWithCyclesRecurse(EvaluableNode *tree) else if(tree->IsImmediate()) { tree->Invalidate(); + AddNodeToTLab(tree); } else //ordered { @@ -473,6 +512,7 @@ void EvaluableNodeManager::FreeNodeTreeWithCyclesRecurse(EvaluableNode *tree) auto &tree_ocn = tree->GetOrderedChildNodesReference(); std::swap(ocn, tree_ocn); tree->Invalidate(); + AddNodeToTLab(tree); for(auto &e : ocn) { diff --git a/src/Amalgam/evaluablenode/EvaluableNodeManagement.h b/src/Amalgam/evaluablenode/EvaluableNodeManagement.h index 8ba1c950..d552a446 100644 --- a/src/Amalgam/evaluablenode/EvaluableNodeManagement.h +++ b/src/Amalgam/evaluablenode/EvaluableNodeManagement.h @@ -742,7 +742,7 @@ class EvaluableNodeManager #endif en->Invalidate(); - ReclaimFreedNodesAtEnd(); + AddNodeToTLab(en); } //attempts to free the node reference @@ -758,7 +758,7 @@ class EvaluableNodeManager if(enr.unique && enr != nullptr && !enr->GetNeedCycleCheck()) { enr->Invalidate(); - ReclaimFreedNodesAtEnd(); + AddNodeToTLab(enr); } } @@ -778,6 +778,7 @@ class EvaluableNodeManager if(IsEvaluableNodeTypeImmediate(en->GetType())) { en->Invalidate(); + AddNodeToTLab(en); } else if(!en->GetNeedCycleCheck()) { @@ -793,8 +794,6 @@ class EvaluableNodeManager #endif FreeNodeTreeWithCyclesRecurse(en); } - - ReclaimFreedNodesAtEnd(); } //attempts to free the node reference @@ -825,8 +824,6 @@ class EvaluableNodeManager FreeNodeTreeRecurse(e); } } - - ReclaimFreedNodesAtEnd(); } //retuns the nodes currently referenced, allocating if they don't exist @@ -895,24 +892,6 @@ class EvaluableNodeManager // and can improve reuse without calling the more expensive FreeAllNodesExceptReferencedNodes void CompactAllocatedNodes(); - //allows freed nodes at the end of nodes to be reallocated - inline void ReclaimFreedNodesAtEnd() - { - #ifndef MULTITHREAD_SUPPORT - //this cannot be used with multithreading because each thread will be using RecommendGarbageCollection - //to determine whether it should stay in garbage collection, and this can break the logic - //an alternative implementation would be to have a separate variable to indicate that everything should - //go into garbage collection, regardless of the current state of firstUnusedNodeIndex, but the extra - //overhead of that logic called for each opcode is not worth the gains of acquiring a write lock here - //and occasionally freeing a small bit of memory - - //if any group of nodes on the top are ready to be cleaned up cheaply, do so - while(firstUnusedNodeIndex > 0 && nodes[firstUnusedNodeIndex - 1] != nullptr - && nodes[firstUnusedNodeIndex - 1]->IsNodeDeallocated()) - firstUnusedNodeIndex--; - #endif - } - //returns the number of nodes currently being used that have not been freed yet __forceinline size_t GetNumberOfUsedNodes() { return firstUnusedNodeIndex; } @@ -947,8 +926,12 @@ class EvaluableNodeManager //sets the root node, implicitly defined as the first node in memory, to new_root // note that new_root MUST have been allocated by this EvaluableNodeManager //ensures that the new root node is kept and the old is released + //if new_root is nullptr, then it allocates its own ENT_NULL node inline void SetRootNode(EvaluableNode *new_root) { + if(new_root == nullptr) + new_root = AllocNode(ENT_NULL); + #ifdef MULTITHREAD_SUPPORT //use WriteLock to be safe Concurrency::WriteLock lock(managerAttributesMutex); @@ -1005,6 +988,12 @@ class EvaluableNodeManager //when numNodesToRunGarbageCollection are allocated, then it is time to run garbage collection size_t numNodesToRunGarbageCollection; + // Remove all EvaluableNodes from the thread local allocation buffer, leaving it empty. + inline static void ClearThreadLocalAllocationBuffer() + { + threadLocalAllocationBuffer.clear(); + } + protected: //allocates an EvaluableNode of the respective memory type in the appropriate way // returns an uninitialized EvaluableNode -- care must be taken to set fields properly @@ -1107,4 +1096,65 @@ class EvaluableNodeManager //extra space to allocate when allocating static const double allocExpansionFactor; + +#if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + thread_local +#endif + + // We need to keep track of the the last EvaluableNodeManager that accessed + // the thread local allocation buffer for a given thread. We do this so + // a given thread local allocation buffer has only nodes associated with one manager. + // If a different manager accesses the buffer, we clear the buffer to maintain this invariant. + static inline EvaluableNodeManager *lastEvaluableNodeManager; + + // Get a pointer to the next available node from the thread local allocation buffer. + // If the buffer is empty, returns null. + inline EvaluableNode *GetNextNodeFromTLab() + { + if(threadLocalAllocationBuffer.size() > 0 && this == lastEvaluableNodeManager) + { + EvaluableNode *end = threadLocalAllocationBuffer.back(); + threadLocalAllocationBuffer.pop_back(); + return end; + } + else + { + if(lastEvaluableNodeManager != this) + ClearThreadLocalAllocationBuffer(); + + lastEvaluableNodeManager = this; + return nullptr; + } + + } + + // Adds a node to the thread local allocation buffer. + // If this is accessed by a different EvaluableNode manager than + // the last time it was called on this thread, it will clear the buffer + // before adding the node. + inline void AddNodeToTLab(EvaluableNode *en) + { + assert(en->IsNodeDeallocated()); + + if(this != lastEvaluableNodeManager) + { + threadLocalAllocationBuffer.clear(); + lastEvaluableNodeManager = this; + } + + threadLocalAllocationBuffer.push_back(en); + } + +private: + + static const int tlabSize = 20; + + typedef std::vector TLab; + + #if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + thread_local + #endif + // This buffer holds EvaluableNode*'s reserved for allocation by a specific thread + inline static TLab threadLocalAllocationBuffer; + }; diff --git a/src/Amalgam/interpreter/Interpreter.h b/src/Amalgam/interpreter/Interpreter.h index 30ef8724..c74727c3 100644 --- a/src/Amalgam/interpreter/Interpreter.h +++ b/src/Amalgam/interpreter/Interpreter.h @@ -753,6 +753,7 @@ class Interpreter result = result_ref; resultsSaver.SetStackLocation(results_saver_location, result); + EvaluableNodeManager::ClearThreadLocalAllocationBuffer(); interpreter.memoryModificationLock.unlock(); taskSet.MarkTaskCompleted(); } @@ -821,6 +822,7 @@ class Interpreter resultsSaver.SetStackLocation(results_saver_location, *result); } + EvaluableNodeManager::ClearThreadLocalAllocationBuffer(); interpreter.memoryModificationLock.unlock(); taskSet.MarkTaskCompleted(); } @@ -831,6 +833,7 @@ class Interpreter inline void EndConcurrency() { //allow other threads to perform garbage collection + EvaluableNodeManager::ClearThreadLocalAllocationBuffer(); parentInterpreter->memoryModificationLock.unlock(); taskSet.WaitForTasks(taskEnqueueLock); parentInterpreter->memoryModificationLock.lock();