From 8fb08d67098b594ef872b2fcc4420ea5733bb6d8 Mon Sep 17 00:00:00 2001 From: howsohazard <143410553+howsohazard@users.noreply.github.com> Date: Thu, 24 Oct 2024 08:31:17 -0400 Subject: [PATCH] 21983: Improves logic and performance around code differencing and merging (#296) --- src/Amalgam/AssetManager.cpp | 2 +- src/Amalgam/GeneralizedDistance.h | 4 +- src/Amalgam/Merger.h | 2 +- src/Amalgam/Parser.cpp | 4 +- src/Amalgam/SBFDSColumnData.h | 2 +- src/Amalgam/entity/Entity.h | 10 +- src/Amalgam/evaluablenode/EvaluableNode.cpp | 6 +- src/Amalgam/evaluablenode/EvaluableNode.h | 10 +- .../EvaluableNodeTreeManipulation.cpp | 84 +++-- .../EvaluableNodeTreeManipulation.h | 6 +- src/Amalgam/importexport/FileSupportJSON.cpp | 6 +- src/Amalgam/importexport/FileSupportYAML.cpp | 6 +- .../interpreter/InterpreterDebugger.cpp | 2 +- .../InterpreterOpcodesDataTypes.cpp | 6 +- .../InterpreterOpcodesEntityControl.cpp | 2 +- .../InterpreterOpcodesListManipulation.cpp | 10 +- .../InterpreterOpcodesTransformations.cpp | 2 +- src/Amalgam/out.txt | 314 +++++++++--------- src/Amalgam/string/StringInternPool.h | 6 +- 19 files changed, 268 insertions(+), 216 deletions(-) diff --git a/src/Amalgam/AssetManager.cpp b/src/Amalgam/AssetManager.cpp index da2b7504..cc5161d8 100644 --- a/src/Amalgam/AssetManager.cpp +++ b/src/Amalgam/AssetManager.cpp @@ -437,7 +437,7 @@ std::string AssetManager::GetEvaluableNodeSourceFromComments(EvaluableNode *en) { if(en->HasComments()) { - auto comment = en->GetCommentsString(); + auto &comment = en->GetCommentsString(); auto first_line_end = comment.find('\n'); if(first_line_end == std::string::npos) source = comment; diff --git a/src/Amalgam/GeneralizedDistance.h b/src/Amalgam/GeneralizedDistance.h index bc9f6cf5..05fd9520 100644 --- a/src/Amalgam/GeneralizedDistance.h +++ b/src/Amalgam/GeneralizedDistance.h @@ -843,8 +843,8 @@ class GeneralizedDistanceEvaluator { if(a_type == ENIVT_STRING_ID && b_type == ENIVT_STRING_ID) { - auto a_str = string_intern_pool.GetStringFromID(a.stringID); - auto b_str = string_intern_pool.GetStringFromID(b.stringID); + auto &a_str = string_intern_pool.GetStringFromID(a.stringID); + auto &b_str = string_intern_pool.GetStringFromID(b.stringID); return static_cast(EvaluableNodeTreeManipulation::EditDistance(a_str, b_str)); } diff --git a/src/Amalgam/Merger.h b/src/Amalgam/Merger.h index f3c7eacc..d004b6a1 100644 --- a/src/Amalgam/Merger.h +++ b/src/Amalgam/Merger.h @@ -17,7 +17,7 @@ class MergeMetricResultsBase public: //starts off with an exact match of nothing constexpr MergeMetricResultsBase() - : commonality(0.0), mustMatch(false), exactMatch(false) + : commonality(0.0), mustMatch(false), exactMatch(true) { } constexpr MergeMetricResultsBase(double _similarity, bool must_match = false, bool exact_match = true) diff --git a/src/Amalgam/Parser.cpp b/src/Amalgam/Parser.cpp index 7f011880..3c8c1039 100644 --- a/src/Amalgam/Parser.cpp +++ b/src/Amalgam/Parser.cpp @@ -826,7 +826,7 @@ void Parser::AppendAssocKeyValuePair(UnparseData &upd, StringInternPool::StringI } else { - auto key_str = string_intern_pool.GetStringFromID(key_sid); + auto &key_str = string_intern_pool.GetStringFromID(key_sid); //surround in quotes only if needed if(HasCharactersBeyondIdentifier(key_str)) @@ -928,7 +928,7 @@ void Parser::Unparse(UnparseData &upd, EvaluableNode *tree, EvaluableNode *paren { upd.result.push_back('"'); - auto s = tree->GetStringValue(); + auto &s = tree->GetStringValue(); if(NeedsBackslashify(s)) upd.result.append(Backslashify(s)); else diff --git a/src/Amalgam/SBFDSColumnData.h b/src/Amalgam/SBFDSColumnData.h index f9158fa4..2b36904a 100644 --- a/src/Amalgam/SBFDSColumnData.h +++ b/src/Amalgam/SBFDSColumnData.h @@ -1154,7 +1154,7 @@ class SBFDSColumnData //updates longestStringLength and indexWithLongestString based on parameters inline void UpdateLongestString(StringInternPool::StringID sid, size_t index) { - auto str = string_intern_pool.GetStringFromID(sid); + auto &str = string_intern_pool.GetStringFromID(sid); size_t str_size = StringManipulation::GetUTF8CharacterLength(str); if(str_size > longestStringLength) { diff --git a/src/Amalgam/entity/Entity.h b/src/Amalgam/entity/Entity.h index 042bbb06..5d7506e8 100644 --- a/src/Amalgam/entity/Entity.h +++ b/src/Amalgam/entity/Entity.h @@ -365,7 +365,7 @@ class Entity bool RebuildLabelIndex(); //Returns the id for this Entity - inline const std::string GetId() + inline const std::string &GetId() { return string_intern_pool.GetStringFromID(GetIdStringId()); } @@ -673,7 +673,7 @@ class Entity inline static bool IsNamedEntity(StringInternPool::StringID id) { - auto id_name = string_intern_pool.GetStringFromID(id); + auto &id_name = string_intern_pool.GetStringFromID(id); if(id_name == StringInternPool::EMPTY_STRING) return false; return IsNamedEntity(id_name); @@ -718,7 +718,7 @@ class Entity if(label_sid == string_intern_pool.NOT_A_STRING_ID) return false; - auto label_name = string_intern_pool.GetStringFromID(label_sid); + auto &label_name = string_intern_pool.GetStringFromID(label_sid); return IsLabelValidAndPublic(label_name); } @@ -737,7 +737,7 @@ class Entity //returns true if the label is only accessible to itself (starts with !) static inline bool IsLabelPrivate(StringInternPool::StringID label_sid) { - auto label_name = string_intern_pool.GetStringFromID(label_sid); + auto &label_name = string_intern_pool.GetStringFromID(label_sid); return IsLabelPrivate(label_name); } @@ -754,7 +754,7 @@ class Entity //returns true if the label is accessible to contained entities (starts with ^) static inline bool IsLabelAccessibleToContainedEntities(StringInternPool::StringID label_sid) { - auto label_name = string_intern_pool.GetStringFromID(label_sid); + auto &label_name = string_intern_pool.GetStringFromID(label_sid); return IsLabelAccessibleToContainedEntities(label_name); } diff --git a/src/Amalgam/evaluablenode/EvaluableNode.cpp b/src/Amalgam/evaluablenode/EvaluableNode.cpp index 8c5b22a3..46fa9d3c 100644 --- a/src/Amalgam/evaluablenode/EvaluableNode.cpp +++ b/src/Amalgam/evaluablenode/EvaluableNode.cpp @@ -197,7 +197,7 @@ double EvaluableNode::ToNumber(EvaluableNode *e, double value_if_null) auto sid = e->GetStringIDReference(); if(sid == string_intern_pool.NOT_A_STRING_ID) return value_if_null; - auto str = string_intern_pool.GetStringFromID(sid); + auto &str = string_intern_pool.GetStringFromID(sid); auto [value, success] = Platform_StringToNumber(str); if(success) return value; @@ -769,7 +769,7 @@ void EvaluableNode::SetStringID(StringInternPool::StringID id) } } -std::string EvaluableNode::GetStringValue() +const std::string &EvaluableNode::GetStringValue() { if(DoesEvaluableNodeTypeUseStringData(GetType())) { @@ -1100,7 +1100,7 @@ std::vector EvaluableNode::GetCommentsSeparateLines() if(comment_sid == string_intern_pool.NOT_A_STRING_ID || comment_sid == string_intern_pool.emptyStringId) return comment_lines; - auto full_comments = string_intern_pool.GetStringFromID(comment_sid); + auto &full_comments = string_intern_pool.GetStringFromID(comment_sid); //early exit if(full_comments.empty()) diff --git a/src/Amalgam/evaluablenode/EvaluableNode.h b/src/Amalgam/evaluablenode/EvaluableNode.h index 501ab502..f667d638 100644 --- a/src/Amalgam/evaluablenode/EvaluableNode.h +++ b/src/Amalgam/evaluablenode/EvaluableNode.h @@ -332,7 +332,7 @@ class EvaluableNode } } - //returns true is node pointer e is nullptr or value of e has type ENT_NULL + //returns true if e is nullptr or value of e has type ENT_NULL static __forceinline bool IsNull(EvaluableNode *e) { return (e == nullptr || e->GetType() == ENT_NULL); @@ -511,7 +511,7 @@ class EvaluableNode return StringInternPool::NOT_A_STRING_ID; } void SetStringID(StringInternPool::StringID id); - std::string GetStringValue(); + const std::string &GetStringValue(); void SetStringValue(const std::string &v); //gets the string ID and clears the node's string ID, but does not destroy the string reference, // leaving the reference handling up to the caller @@ -538,7 +538,7 @@ class EvaluableNode //functions for getting and setting node comments by string or by StringID // all Comment functions perform any reference counting management necessary when setting and clearing StringInternPool::StringID GetCommentsStringId(); - inline std::string GetCommentsString() + inline const std::string &GetCommentsString() { return string_intern_pool.GetStringFromID(GetCommentsStringId()); } @@ -1272,7 +1272,7 @@ class EvaluableNodeImmediateValueWithType if(nodeValue.stringID == string_intern_pool.NOT_A_STRING_ID) return value_if_null; - auto str = string_intern_pool.GetStringFromID(nodeValue.stringID); + auto &str = string_intern_pool.GetStringFromID(nodeValue.stringID); auto [value, success] = Platform_StringToNumber(str); if(success) return value; @@ -1296,7 +1296,7 @@ class EvaluableNodeImmediateValueWithType if(nodeValue.stringID == string_intern_pool.NOT_A_STRING_ID) return std::make_pair(false, ""); - auto str = string_intern_pool.GetStringFromID(nodeValue.stringID); + auto &str = string_intern_pool.GetStringFromID(nodeValue.stringID); return std::make_pair(true, str); } diff --git a/src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.cpp b/src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.cpp index 12e2e30f..43c8f9e1 100644 --- a/src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.cpp +++ b/src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.cpp @@ -61,8 +61,8 @@ inline StringInternPool::StringID MixStringValues(StringInternPool::StringID a, if(b == StringInternPool::NOT_A_STRING_ID) return string_intern_pool.CreateStringReference(a); - auto a_str = string_intern_pool.GetStringFromID(a); - auto b_str = string_intern_pool.GetStringFromID(b); + auto &a_str = string_intern_pool.GetStringFromID(a); + auto &b_str = string_intern_pool.GetStringFromID(b); std::string result = EvaluableNodeTreeManipulation::MixStrings(a_str, b_str, random_stream, fraction_a, fraction_b); @@ -156,9 +156,9 @@ bool EvaluableNodeTreeManipulation::NodesMixMethod::AreMergeable(EvaluableNode * MergeMetricResults EvaluableNodeTreeManipulation::StringSequenceMergeMetric::MergeMetric(std::string *a, std::string *b) { if(a == b || (a != nullptr && b != nullptr && *a == *b)) - return MergeMetricResults(1.0, a, b); + return MergeMetricResults(1.0, a, b, false, true); else - return MergeMetricResults(0.0, a, b); + return MergeMetricResults(0.0, a, b, false, false); } std::string *EvaluableNodeTreeManipulation::StringSequenceMergeMetric::MergeValues(std::string *a, std::string *b, bool must_merge) @@ -699,10 +699,6 @@ MergeMetricResults EvaluableNodeTreeManipulation::NumberOfShare if(tree1 == nullptr && tree2 == nullptr) return MergeMetricResults(1.0, tree1, tree2, false, true); - //if one is null and the other isn't, then stop - if( (tree1 == nullptr && tree2 != nullptr) || (tree1 != nullptr && tree2 == nullptr) ) - return MergeMetricResults(0.0, tree1, tree2, false, false); - //if the pair of nodes has already been computed, then just return the result auto found = memoized.find(std::make_pair(tree1, tree2)); if(found != end(memoized)) @@ -732,14 +728,14 @@ MergeMetricResults EvaluableNodeTreeManipulation::NumberOfShare size_t tree2_ordered_nodes_size = 0; size_t tree2_mapped_nodes_size = 0; - if(tree1->IsAssociativeArray()) + if(EvaluableNode::IsAssociativeArray(tree1)) tree1_mapped_nodes_size = tree1->GetMappedChildNodesReference().size(); - else if(!tree1->IsImmediate()) + else if(EvaluableNode::IsOrderedArray(tree1)) tree1_ordered_nodes_size = tree1->GetOrderedChildNodesReference().size(); - if(tree2->IsAssociativeArray()) + if(EvaluableNode::IsAssociativeArray(tree2)) tree2_mapped_nodes_size = tree2->GetMappedChildNodesReference().size(); - else if(!tree2->IsImmediate()) + else if(EvaluableNode::IsOrderedArray(tree2)) tree2_ordered_nodes_size = tree2->GetOrderedChildNodesReference().size(); if(tree1_ordered_nodes_size == 0 && tree2_ordered_nodes_size == 0 @@ -917,8 +913,7 @@ MergeMetricResults EvaluableNodeTreeManipulation::NumberOfShare } } - - if(tree1_mapped_nodes_size > 0 && tree2_mapped_nodes_size > 0) + else if(tree1_mapped_nodes_size > 0 && tree2_mapped_nodes_size > 0) { //use keys from first node auto &tree_2_mcn = tree2->GetMappedChildNodesReference(); @@ -940,28 +935,62 @@ MergeMetricResults EvaluableNodeTreeManipulation::NumberOfShare { for(auto node : tree1->GetOrderedChildNodesReference()) { - auto sub_match = NumberOfSharedNodes(tree2, node, memoized, checked); + auto sub_match = NumberOfSharedNodes(node, tree2, memoized, checked); + + //mark as nonexact match because had to traverse downward, + // but preserve whether was an exact match for early stopping + bool exact_match = sub_match.exactMatch; + sub_match.exactMatch = false; if(sub_match > commonality) + { commonality = sub_match; + memoized.emplace(std::make_pair(node, tree2), commonality); + if(exact_match) + break; + } } } else if(tree1_mapped_nodes_size > 0) { for(auto &[node_id, node] : tree1->GetMappedChildNodesReference()) { - auto sub_match = NumberOfSharedNodes(tree2, node, memoized, checked); + auto sub_match = NumberOfSharedNodes(node, tree2, memoized, checked); + + //mark as nonexact match because had to traverse downward, + // but preserve whether was an exact match for early stopping + bool exact_match = sub_match.exactMatch; + sub_match.exactMatch = false; if(sub_match > commonality) + { commonality = sub_match; + memoized.emplace(std::make_pair(node, tree2), commonality); + if(exact_match) + break; + } } } + } + //check again for commonality in case exact match was found by iterating via tree1 above + if(!commonality.exactMatch) + { if(tree2_ordered_nodes_size > 0) { - for(auto cn : tree2->GetOrderedChildNodesReference()) + for(auto node : tree2->GetOrderedChildNodesReference()) { - auto sub_match = NumberOfSharedNodes(tree1, cn, memoized, checked); + auto sub_match = NumberOfSharedNodes(tree1, node, memoized, checked); + + //mark as nonexact match because had to traverse downward, + // but preserve whether was an exact match for early stopping + bool exact_match = sub_match.exactMatch; + sub_match.exactMatch = false; if(sub_match > commonality) + { commonality = sub_match; + memoized.emplace(std::make_pair(tree1, node), commonality); + if(exact_match) + break; + } } } else if(tree2_mapped_nodes_size > 0) @@ -969,8 +998,18 @@ MergeMetricResults EvaluableNodeTreeManipulation::NumberOfShare for(auto &[node_id, node] : tree2->GetMappedChildNodesReference()) { auto sub_match = NumberOfSharedNodes(tree1, node, memoized, checked); + + //mark as nonexact match because had to traverse downward, + // but preserve whether was an exact match for early stopping + bool exact_match = sub_match.exactMatch; + sub_match.exactMatch = false; if(sub_match > commonality) + { commonality = sub_match; + memoized.emplace(std::make_pair(tree1, node), commonality); + if(exact_match) + break; + } } } } @@ -1054,7 +1093,7 @@ bool EvaluableNodeTreeManipulation::CollectLabelIndexesFromTree(EvaluableNode *t for(size_t i = 0; i < num_labels; i++) { auto label_sid = tree->GetLabelStringId(i); - auto label_name = string_intern_pool.GetStringFromID(label_sid); + auto &label_name = string_intern_pool.GetStringFromID(label_sid); if(label_name.size() == 0) continue; @@ -1109,7 +1148,7 @@ bool EvaluableNodeTreeManipulation::CollectLabelIndexesFromTreeAndMakeLabelNorma for(size_t i = 0; i < num_labels; i++) { auto label_sid = tree->GetLabelStringId(i); - auto label_name = string_intern_pool.GetStringFromID(label_sid); + auto &label_name = string_intern_pool.GetStringFromID(label_sid); if(label_name.size() == 0) continue; @@ -1173,9 +1212,6 @@ MergeMetricResults EvaluableNodeTreeManipulation::CommonalityBe if(n1 == nullptr && n2 == nullptr) return MergeMetricResults(1.0, n1, n2, false, true); - if(n1 == nullptr || n2 == nullptr) - return MergeMetricResults(0.0, n1, n2, false, false); - auto [num_common_labels, num_unique_labels] = EvaluableNode::GetNodeCommonAndUniqueLabelCounts(n1, n2); auto [_, commonality] = CommonalityBetweenNodeTypesAndValues(n1, n2); @@ -1214,7 +1250,7 @@ std::pair EvaluableNodeTreeManipulation::CommonalityBet double n2_value = n2->GetNumberValueReference(); return std::make_pair(n1, n1_value == n2_value ? 1.0 : 0.0); } - if(n1_type == ENT_STRING) + if(n1_type == ENT_STRING || n1_type == ENT_SYMBOL) { auto n1_sid = n1->GetStringID(); auto n2_sid = n2->GetStringID(); diff --git a/src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.h b/src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.h index 148d54cc..d10e632b 100644 --- a/src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.h +++ b/src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.h @@ -271,7 +271,7 @@ class EvaluableNodeTreeManipulation if(a == b) return MergeMetricResults(1.0, a, b); else - return MergeMetricResults(0.0, a, b); + return MergeMetricResults(0.0, a, b, false, false); } virtual uint32_t MergeValues(uint32_t a, uint32_t b, bool must_merge = false) @@ -357,8 +357,8 @@ class EvaluableNodeTreeManipulation if(sid1 == string_intern_pool.NOT_A_STRING_ID || sid2 == string_intern_pool.NOT_A_STRING_ID) return 0.125; - auto s1 = string_intern_pool.GetStringFromID(sid1); - auto s2 = string_intern_pool.GetStringFromID(sid2); + auto &s1 = string_intern_pool.GetStringFromID(sid1); + auto &s2 = string_intern_pool.GetStringFromID(sid2); size_t s1_len = 0; size_t s2_len = 0; diff --git a/src/Amalgam/importexport/FileSupportJSON.cpp b/src/Amalgam/importexport/FileSupportJSON.cpp index 580671cc..3a20841e 100644 --- a/src/Amalgam/importexport/FileSupportJSON.cpp +++ b/src/Amalgam/importexport/FileSupportJSON.cpp @@ -154,7 +154,7 @@ bool EvaluableNodeToJsonStringRecurse(EvaluableNode *en, std::string &json_str, else first_cn = false; - auto str = string_intern_pool.GetStringFromID(cn_id); + auto &str = string_intern_pool.GetStringFromID(cn_id); EscapeAndAppendStringToJsonString(str, json_str); json_str += ':'; @@ -184,7 +184,7 @@ bool EvaluableNodeToJsonStringRecurse(EvaluableNode *en, std::string &json_str, if(i > 0) json_str += ','; - auto str = string_intern_pool.GetStringFromID(key_sids[i]); + auto &str = string_intern_pool.GetStringFromID(key_sids[i]); EscapeAndAppendStringToJsonString(str, json_str); json_str += ':'; @@ -265,7 +265,7 @@ bool EvaluableNodeToJsonStringRecurse(EvaluableNode *en, std::string &json_str, } else { - auto str_value = en->GetStringValue(); + auto &str_value = en->GetStringValue(); EscapeAndAppendStringToJsonString(str_value, json_str); } } diff --git a/src/Amalgam/importexport/FileSupportYAML.cpp b/src/Amalgam/importexport/FileSupportYAML.cpp index 680a1898..73255df4 100644 --- a/src/Amalgam/importexport/FileSupportYAML.cpp +++ b/src/Amalgam/importexport/FileSupportYAML.cpp @@ -76,7 +76,7 @@ bool EvaluableNodeToYamlStringRecurse(EvaluableNode *en, ryml::NodeRef &built_el { for(auto &[cn_id, cn] : mcn) { - auto str = string_intern_pool.GetStringFromID(cn_id); + auto &str = string_intern_pool.GetStringFromID(cn_id); auto new_element = built_element.append_child(); new_element << ryml::key(str); if(!EvaluableNodeToYamlStringRecurse(cn, new_element, sort_keys)) @@ -96,7 +96,7 @@ bool EvaluableNodeToYamlStringRecurse(EvaluableNode *en, ryml::NodeRef &built_el { auto k = mcn.find(key_sids[i]); - auto str = string_intern_pool.GetStringFromID(k->first); + auto &str = string_intern_pool.GetStringFromID(k->first); auto new_element = built_element.append_child(); new_element << ryml::key(str); @@ -145,7 +145,7 @@ bool EvaluableNodeToYamlStringRecurse(EvaluableNode *en, ryml::NodeRef &built_el } else { - auto str_value = en->GetStringValue(); + auto &str_value = en->GetStringValue(); built_element << str_value; } } diff --git a/src/Amalgam/interpreter/InterpreterDebugger.cpp b/src/Amalgam/interpreter/InterpreterDebugger.cpp index 0054fe73..8f260fd9 100644 --- a/src/Amalgam/interpreter/InterpreterDebugger.cpp +++ b/src/Amalgam/interpreter/InterpreterDebugger.cpp @@ -672,7 +672,7 @@ void Interpreter::DebugCheckBreakpointsAndUpdateState(EvaluableNode *en, bool be && _interpreter_debug_data.breakLineFile.size() > 0) { //if it has a source, check against all of the source break points - std::string comment_str = en->GetCommentsString(); + auto &comment_str = en->GetCommentsString(); if(comment_str.rfind(Parser::sourceCommentPrefix, 0) != std::string::npos) { for(auto &breakpoint_str : _interpreter_debug_data.breakLineFile) diff --git a/src/Amalgam/interpreter/InterpreterOpcodesDataTypes.cpp b/src/Amalgam/interpreter/InterpreterOpcodesDataTypes.cpp index eab58348..ae6922b4 100644 --- a/src/Amalgam/interpreter/InterpreterOpcodesDataTypes.cpp +++ b/src/Amalgam/interpreter/InterpreterOpcodesDataTypes.cpp @@ -527,7 +527,7 @@ EvaluableNodeReference Interpreter::InterpretNode_ENT_FORMAT(EvaluableNode *en, } else //need to parse the string { - auto from_type_str = string_intern_pool.GetStringFromID(from_type); + auto &from_type_str = string_intern_pool.GetStringFromID(from_type); //see if it starts with the date or time string if(from_type_str.compare(0, date_string.size(), date_string) == 0) @@ -844,7 +844,7 @@ EvaluableNodeReference Interpreter::InterpretNode_ENT_FORMAT(EvaluableNode *en, } else //need to parse the string { - auto to_type_str = string_intern_pool.GetStringFromID(to_type); + auto &to_type_str = string_intern_pool.GetStringFromID(to_type); //if it starts with the date or time string if(to_type_str.compare(0, date_string.size(), date_string) == 0) @@ -1480,7 +1480,7 @@ EvaluableNodeReference Interpreter::InterpretNode_ENT_SUBSTR(EvaluableNode *en, { //make a copy of the string so the node can be freed //(if this is a performance cost found in profiling, it can be fixed with more logic) - std::string regex_str = substr_node->GetStringValue(); + auto ®ex_str = substr_node->GetStringValue(); evaluableNodeManager->FreeNodeTreeIfPossible(substr_node); if(replace_string) diff --git a/src/Amalgam/interpreter/InterpreterOpcodesEntityControl.cpp b/src/Amalgam/interpreter/InterpreterOpcodesEntityControl.cpp index cf94159d..cadd7054 100644 --- a/src/Amalgam/interpreter/InterpreterOpcodesEntityControl.cpp +++ b/src/Amalgam/interpreter/InterpreterOpcodesEntityControl.cpp @@ -367,7 +367,7 @@ EvaluableNodeReference Interpreter::InterpretNode_ENT_CREATE_ENTITIES(EvaluableN continue; } - auto new_entity_id_string = string_intern_pool.GetStringFromID(new_entity_id); + auto &new_entity_id_string = string_intern_pool.GetStringFromID(new_entity_id); std::string rand_state = entity_container->CreateRandomStreamFromStringAndRand(new_entity_id_string); //create new entity diff --git a/src/Amalgam/interpreter/InterpreterOpcodesListManipulation.cpp b/src/Amalgam/interpreter/InterpreterOpcodesListManipulation.cpp index 29bfce58..d6baaf5b 100644 --- a/src/Amalgam/interpreter/InterpreterOpcodesListManipulation.cpp +++ b/src/Amalgam/interpreter/InterpreterOpcodesListManipulation.cpp @@ -78,7 +78,7 @@ EvaluableNodeReference Interpreter::InterpretNode_ENT_FIRST(EvaluableNode *en, b if(sid == string_intern_pool.NOT_A_STRING_ID || sid == string_intern_pool.emptyStringId) return AllocReturn(StringInternPool::NOT_A_STRING_ID, immediate_result); - std::string s = string_intern_pool.GetStringFromID(sid); + auto &s = string_intern_pool.GetStringFromID(sid); size_t utf8_char_length = StringManipulation::GetUTF8CharacterLength(s, 0); std::string substring = s.substr(0, utf8_char_length); return ReuseOrAllocReturn(list, substring, immediate_result); @@ -187,7 +187,7 @@ EvaluableNodeReference Interpreter::InterpretNode_ENT_TAIL(EvaluableNode *en, bo if(sid == string_intern_pool.NOT_A_STRING_ID || sid == string_intern_pool.emptyStringId) return AllocReturn(StringInternPool::NOT_A_STRING_ID, immediate_result); - std::string s = string_intern_pool.GetStringFromID(sid); + auto &s = string_intern_pool.GetStringFromID(sid); //remove the first element(s) size_t num_chars_to_drop = 0; @@ -285,7 +285,7 @@ EvaluableNodeReference Interpreter::InterpretNode_ENT_LAST(EvaluableNode *en, bo if(sid == string_intern_pool.NOT_A_STRING_ID || sid == string_intern_pool.emptyStringId) return AllocReturn(StringInternPool::NOT_A_STRING_ID, immediate_result); - std::string s = string_intern_pool.GetStringFromID(sid); + auto &s = string_intern_pool.GetStringFromID(sid); auto [utf8_char_start_offset, utf8_char_length] = StringManipulation::GetLastUTF8CharacterOffsetAndLength(s); @@ -392,7 +392,7 @@ EvaluableNodeReference Interpreter::InterpretNode_ENT_TRUNC(EvaluableNode *en, b if(sid == string_intern_pool.NOT_A_STRING_ID || sid == string_intern_pool.emptyStringId) return AllocReturn(StringInternPool::NOT_A_STRING_ID, immediate_result); - std::string s = string_intern_pool.GetStringFromID(sid); + auto &s = string_intern_pool.GetStringFromID(sid); //remove the last element(s) size_t num_chars_to_keep = 0; @@ -536,7 +536,7 @@ EvaluableNodeReference Interpreter::InterpretNode_ENT_SIZE(EvaluableNode *en, bo { if(cur->GetType() == ENT_STRING) { - auto s = cur->GetStringValue(); + auto &s = cur->GetStringValue(); size = StringManipulation::GetNumUTF8Characters(s); } else diff --git a/src/Amalgam/interpreter/InterpreterOpcodesTransformations.cpp b/src/Amalgam/interpreter/InterpreterOpcodesTransformations.cpp index 56d6342a..6d67bcbf 100644 --- a/src/Amalgam/interpreter/InterpreterOpcodesTransformations.cpp +++ b/src/Amalgam/interpreter/InterpreterOpcodesTransformations.cpp @@ -1191,7 +1191,7 @@ EvaluableNodeReference Interpreter::InterpretNode_ENT_CONTAINS_VALUE(EvaluableNo else if(container->GetType() == ENT_STRING && !EvaluableNode::IsNull(value)) { //compute regular expression - std::string s = container->GetStringValue(); + auto &s = container->GetStringValue(); std::string value_as_str = EvaluableNode::ToStringPreservingOpcodeType(value); diff --git a/src/Amalgam/out.txt b/src/Amalgam/out.txt index e290ea15..1611ffcd 100644 --- a/src/Amalgam/out.txt +++ b/src/Amalgam/out.txt @@ -231,10 +231,10 @@ hello world: 12 and 2 (print "hello") [(null) (null) .infinity -.infinity] -{a 1 b 2 c ["alpha" "beta" "gamma"]} +{b 2 a 1 c ["alpha" "beta" "gamma"]} { - a 1 b 2 + a 1 c ["alpha" "beta" "gamma"] } @@ -635,7 +635,7 @@ abcdef 0.14384103622589045 --first-- 4 -1 +2 1 0 a @@ -652,16 +652,16 @@ a [] { a 1 - b 2 c 3 + d 4 e 5 f 6 } -{b 2 c 3} +{d 4 f 6} { a 1 - b 2 c 3 + d 4 f 6 } { @@ -686,7 +686,7 @@ abcdef --last-- this -1 +2 1 0 c @@ -703,16 +703,16 @@ c [] { a 1 - b 2 c 3 + d 4 e 5 f 6 } -{b 2 c 3} +{d 4 f 6} { a 1 - b 2 c 3 + d 4 f 6 } { @@ -1064,7 +1064,7 @@ abcdef [1 3] [9 5] --indices-- -["a" "4" "b" "c"] +["b" "4" "a" "c"] [ 0 1 @@ -1076,7 +1076,7 @@ abcdef 7 ] --values-- -[1 "d" 2 3] +[2 "d" 1 3] [ "a" 1 @@ -1097,7 +1097,7 @@ abcdef 4 "d" ] -[1 "d" 2 3] +[2 1 3 "d"] [ 1 2 @@ -1327,7 +1327,7 @@ current_index: 2 rmfile "del /s /q " rwww 1 slash "\\" - start_time 1729551682.854369 + start_time 1729766223.550492 www 1 x 12 zz 10 @@ -1373,7 +1373,7 @@ current_index: 2 rmfile "del /s /q " rwww 1 slash "\\" - start_time 1729551682.854369 + start_time 1729766223.550492 www 1 x 12 zz 10 @@ -1418,7 +1418,7 @@ current_index: 2 rmfile "del /s /q " rwww 1 slash "\\" - start_time 1729551682.854369 + start_time 1729766223.550492 www 1 x 12 zz 10 @@ -1551,7 +1551,7 @@ true --weighted_rand-- b -["a" "a" "a" "a"] +["b" "b" "b" "b"] b ["a" "b" @(get (target 2) 1) @(get (target 2) 1)] @@ -1562,11 +1562,11 @@ infinity test c or d: ["c" "c" "c" "c"] infinity test c or d: ["c" @(get (target 2) 0) @(get (target 2) 0) @(get (target 2) 0)] -{a 29 b 44 c 27} +{a 18 b 55 c 27} {a 25 b 50 c 25} -["1" "7" "2"] +["1" "8" "3"] --get_rand_seed-- ¶‡¨± ÎA)p1͉ÿ @@ -1612,8 +1612,8 @@ infinity test c or d: ["c" @(get (target 2) 0) @(get (target 2) 0) @(get (target string --set_type-- (- 3 4) -["a" 4 "b" 3] -["a" 4 "b" 3] +["b" 3 "a" 4] +["b" 3 "a" 4] {a 4 b 3} 8.7 (parallel @@ -1662,17 +1662,17 @@ string {a 3 b 4} {c "c"} ] -21: [{"a":3,"b":4},{"d":null,"c":"c"}] +21: [{"b":4,"a":3},{"c":"c","d":null}] 22: [{"a":3,"b":4},{"c":"c","d":null}] -23: d: 4 +23: b: 2 e: - a - b - - .inf a: 1 -b: 2 c: 3 +d: 4 24: a: 1 b: 2 @@ -1685,7 +1685,7 @@ e: - .inf 25: {a 1} -current date-time in epoch: 2024-10-21-19.01.23.5965500 +current date-time in epoch: 2024-10-24-06.37.03.8694990 2020-06-07 00:22:59 1391230800 1391230800 @@ -2234,16 +2234,6 @@ decrypted: hello {_ (null)} (replace _ - [] - (lambda - { - a 2 - g (get - (current_value 1) - "g" - ) - } - ) ["g"] (lambda [ @@ -2254,6 +2244,16 @@ decrypted: hello 4 ] ) + [] + (lambda + { + a 2 + g (get + (current_value 1) + "g" + ) + } + ) ) ) (declare @@ -2455,17 +2455,19 @@ decrypted: hello [ 1 5 - 3 - 2 + 2.5 (associate "a" 3 "b" 4) (lambda (if true 1 - (parallel (get_entity_comments) 1) + (parallel + (get_entity_comments) + (lambda (null)) + ) ) ) - [] + [5] ] [ 1 @@ -2478,8 +2480,11 @@ decrypted: hello 1 (seq (get_entity_comments) - (lambda (null)) - 1 + (lambda + (print + [2 9] + ) + ) ) ) ) @@ -2824,16 +2829,18 @@ flatten restore with parallel 19.264241099357605 --intersect_entities-- (associate "b" 4) -MergeEntityChild2 -(associate "p" 3 "q" 4) MergeEntityChild1 (associate "x" 3 "y" 4) -_2710920158 -(associate "E" 3 "F" 4) +MergeEntityChild2 +(associate "p" 3 "q" 4) _1797215995 (associate "e" 3 "f" 4) +_2710920158 +(associate "E" 3 "F" 4) --union_entities-- (associate "b" 4 "a" 3 "c" 3) +MergeEntityChild1 +(associate "x" 3 "y" 4 "z" 5) MergeEntityChild2 (associate "p" @@ -2847,28 +2854,26 @@ MergeEntityChild2 "w" 7 ) -MergeEntityChild1 -(associate "x" 3 "y" 4 "z" 5) -_2710920158 +_1797215995 (associate - "E" + "e" 3 - "F" + "f" 4 - "G" + "g" 5 - "H" + "h" 6 ) -_1797215995 +_2710920158 (associate - "e" + "E" 3 - "f" + "F" 4 - "g" + "G" 5 - "h" + "H" 6 ) (parallel @@ -3225,8 +3230,14 @@ _830877783 [] (lambda { - E 3 - F 4 + E (get + (current_value 1) + "E" + ) + F (get + (current_value 1) + "F" + ) G 5 H 6 } @@ -3251,7 +3262,16 @@ _830877783 _ [] (lambda - {e 3 f 4} + { + e (get + (current_value 1) + "e" + ) + f (get + (current_value 1) + "f" + ) + } ) ) ) @@ -3281,10 +3301,6 @@ DiffEntityChild1 root: {x 3 y 4 z 5} contained_entities new_entity: ["DiffEntityChild1" "OnlyIn2" "_445026204" "_1497566482"] difference between DiffEntity2 and new_entity: -(declare - {_ (null) new_entity (null)} - (clone_entities _ new_entity) -) (declare {_ (null) new_entity (null)} (assign @@ -3296,19 +3312,7 @@ difference between DiffEntity2 and new_entity: (lambda (declare {_ (null)} - (replace - _ - [] - (lambda - { - b (get - (current_value 1) - "b" - ) - c 3 - } - ) - ) + (replace _) ) ) { @@ -3319,29 +3323,7 @@ difference between DiffEntity2 and new_entity: ) ) (create_entities - (append new_entity "OnlyIn2") - (call - (lambda - (declare - {_ (null)} - (replace - _ - [] - (lambda - {o 6} - ) - ) - ) - ) - { - _ (retrieve_entity_root - (append _ "OnlyIn2") - ) - } - ) - ) - (create_entities - (append new_entity "_2860796594") + (append new_entity "_445026204") (call (lambda (declare @@ -3351,16 +3333,16 @@ difference between DiffEntity2 and new_entity: [] (lambda { - E (get + E (null) + F (null) + G (get (current_value 1) - "E" + "G" ) - F (get + H (get (current_value 1) - "F" + "H" ) - G 5 - H 6 } ) ) @@ -3368,13 +3350,13 @@ difference between DiffEntity2 and new_entity: ) { _ (retrieve_entity_root - (append _ "_2860796594") + (append _ "_445026204") ) } ) ) (create_entities - (append new_entity "_2612373163") + (append new_entity "_1497566482") (call (lambda (declare @@ -3383,23 +3365,14 @@ difference between DiffEntity2 and new_entity: _ [] (lambda - { - e (get - (current_value 1) - "e" - ) - f (get - (current_value 1) - "f" - ) - } + {e (null) f (null)} ) ) ) ) { _ (retrieve_entity_root - (append _ "_2612373163") + (append _ "_1497566482") ) } ) @@ -3408,14 +3381,12 @@ difference between DiffEntity2 and new_entity: (append _ "DiffEntityChild1") (append new_entity "DiffEntityChild1") ) + (clone_entities + (append _ "OnlyIn2") + (append new_entity "OnlyIn2") + ) new_entity ) -new_entity: DiffContainerReconstructed -new_entity root: {b 4 c 3} -DiffEntityChild1 root: -{x 3 y 4 z 6} -contained_entities new_entity: ["OnlyIn2" "_2860796594" "_2612373163" "DiffEntityChild1"] -difference between DiffContainer and DiffEntity2: (declare {_ (null) new_entity (null)} (assign @@ -3427,7 +3398,19 @@ difference between DiffContainer and DiffEntity2: (lambda (declare {_ (null)} - (replace _) + (replace + _ + [] + (lambda + { + b (get + (current_value 1) + "b" + ) + c 3 + } + ) + ) ) ) { @@ -3437,6 +3420,28 @@ difference between DiffContainer and DiffEntity2: ) ) ) + (create_entities + (append new_entity "OnlyIn2") + (call + (lambda + (declare + {_ (null)} + (replace + _ + [] + (lambda + {o 6} + ) + ) + ) + ) + { + _ (retrieve_entity_root + (append _ "OnlyIn2") + ) + } + ) + ) (create_entities (append new_entity "_2860796594") (call @@ -3448,16 +3453,10 @@ difference between DiffContainer and DiffEntity2: [] (lambda { - E (null) - F (null) - G (get - (current_value 1) - "G" - ) - H (get - (current_value 1) - "H" - ) + E 3 + F 4 + G 5 + H 6 } ) ) @@ -3480,7 +3479,7 @@ difference between DiffContainer and DiffEntity2: _ [] (lambda - {e (null) f (null)} + {e 3 f 4} ) ) ) @@ -3492,26 +3491,43 @@ difference between DiffContainer and DiffEntity2: } ) ) - (clone_entities - (append _ "OnlyIn2") - (append new_entity "OnlyIn2") - ) (clone_entities (append _ "DiffEntityChild1") (append new_entity "DiffEntityChild1") ) new_entity ) +new_entity: DiffContainerReconstructed +new_entity root: {b 4 c 3} +DiffEntityChild1 root: +{x 3 y 4 z 6} +contained_entities new_entity: ["OnlyIn2" "_2860796594" "_2612373163" "DiffEntityChild1"] +difference between DiffContainer and DiffEntity2: +(declare + {_ (null) new_entity (null)} + (clone_entities _ new_entity) +) --mix_entities-- (associate "b" 4 "a" 3) -MergeEntityChild2 -(associate "p" 3 "q" 4) MergeEntityChild1 -(associate "x" 3 "y" 4 "z" 5) -_2710920158 -(associate "E" 3 "F" 4 "H" 6) +(associate "x" 3 "y" 4) +MergeEntityChild2 +(associate + "p" + 3 + "q" + 4 + "u" + 5 + "v" + 6 + "w" + 7 +) _1797215995 -(associate "e" 3 "f" 4 "g" 5) +(associate "e" 3 "f" 4 "h" 6) +_2710920158 +(associate "E" 3 "F" 4 "G" 5) --get_entity_comments-- Full test This is a suite of unit tests. @@ -3590,7 +3606,7 @@ deep sets --set_entity_root_permission-- RootTest -1729551683.747663 +1729766224.040758 (true) RootTest @@ -4938,4 +4954,4 @@ rmdir /s /q amlg_code\persistent_tree_test_root del /s /q amlg_code\persist_module_test\psm.mdam del /s /q amlg_code\persist_module_test.mdam --total execution time-- -2.0564751625061035 +1.2825310230255127 diff --git a/src/Amalgam/string/StringInternPool.h b/src/Amalgam/string/StringInternPool.h index 528892cb..4e02454e 100644 --- a/src/Amalgam/string/StringInternPool.h +++ b/src/Amalgam/string/StringInternPool.h @@ -56,9 +56,9 @@ class StringInternPool } //translates the id to a string, empty string if it does not exist - //because a flat hash map is used as the storage container, it is possible that any allocation or deallocation - //may invalidate the location, so a copy must be made to return the value - inline const std::string GetStringFromID(StringID id) + //note that the reference is only valid as long as the string id is valid; if a string is needed + //after a reference is destroyed, the caller must make a copy first + inline const std::string &GetStringFromID(StringID id) { if(id == NOT_A_STRING_ID) return EMPTY_STRING;