Skip to content

Commit

Permalink
21983: Improves logic and performance around code differencing and me…
Browse files Browse the repository at this point in the history
…rging (#296)
  • Loading branch information
howsohazard authored Oct 24, 2024
1 parent e17f52b commit 8fb08d6
Show file tree
Hide file tree
Showing 19 changed files with 268 additions and 216 deletions.
2 changes: 1 addition & 1 deletion src/Amalgam/AssetManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -437,7 +437,7 @@ std::string AssetManager::GetEvaluableNodeSourceFromComments(EvaluableNode *en)
{
if(en->HasComments())
{
auto comment = en->GetCommentsString();
auto &comment = en->GetCommentsString();
auto first_line_end = comment.find('\n');
if(first_line_end == std::string::npos)
source = comment;
Expand Down
4 changes: 2 additions & 2 deletions src/Amalgam/GeneralizedDistance.h
Original file line number Diff line number Diff line change
Expand Up @@ -843,8 +843,8 @@ class GeneralizedDistanceEvaluator
{
if(a_type == ENIVT_STRING_ID && b_type == ENIVT_STRING_ID)
{
auto a_str = string_intern_pool.GetStringFromID(a.stringID);
auto b_str = string_intern_pool.GetStringFromID(b.stringID);
auto &a_str = string_intern_pool.GetStringFromID(a.stringID);
auto &b_str = string_intern_pool.GetStringFromID(b.stringID);
return static_cast<double>(EvaluableNodeTreeManipulation::EditDistance(a_str, b_str));
}

Expand Down
2 changes: 1 addition & 1 deletion src/Amalgam/Merger.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class MergeMetricResultsBase
public:
//starts off with an exact match of nothing
constexpr MergeMetricResultsBase()
: commonality(0.0), mustMatch(false), exactMatch(false)
: commonality(0.0), mustMatch(false), exactMatch(true)
{ }

constexpr MergeMetricResultsBase(double _similarity, bool must_match = false, bool exact_match = true)
Expand Down
4 changes: 2 additions & 2 deletions src/Amalgam/Parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -826,7 +826,7 @@ void Parser::AppendAssocKeyValuePair(UnparseData &upd, StringInternPool::StringI
}
else
{
auto key_str = string_intern_pool.GetStringFromID(key_sid);
auto &key_str = string_intern_pool.GetStringFromID(key_sid);

//surround in quotes only if needed
if(HasCharactersBeyondIdentifier(key_str))
Expand Down Expand Up @@ -928,7 +928,7 @@ void Parser::Unparse(UnparseData &upd, EvaluableNode *tree, EvaluableNode *paren
{
upd.result.push_back('"');

auto s = tree->GetStringValue();
auto &s = tree->GetStringValue();
if(NeedsBackslashify(s))
upd.result.append(Backslashify(s));
else
Expand Down
2 changes: 1 addition & 1 deletion src/Amalgam/SBFDSColumnData.h
Original file line number Diff line number Diff line change
Expand Up @@ -1154,7 +1154,7 @@ class SBFDSColumnData
//updates longestStringLength and indexWithLongestString based on parameters
inline void UpdateLongestString(StringInternPool::StringID sid, size_t index)
{
auto str = string_intern_pool.GetStringFromID(sid);
auto &str = string_intern_pool.GetStringFromID(sid);
size_t str_size = StringManipulation::GetUTF8CharacterLength(str);
if(str_size > longestStringLength)
{
Expand Down
10 changes: 5 additions & 5 deletions src/Amalgam/entity/Entity.h
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ class Entity
bool RebuildLabelIndex();

//Returns the id for this Entity
inline const std::string GetId()
inline const std::string &GetId()
{
return string_intern_pool.GetStringFromID(GetIdStringId());
}
Expand Down Expand Up @@ -673,7 +673,7 @@ class Entity

inline static bool IsNamedEntity(StringInternPool::StringID id)
{
auto id_name = string_intern_pool.GetStringFromID(id);
auto &id_name = string_intern_pool.GetStringFromID(id);
if(id_name == StringInternPool::EMPTY_STRING)
return false;
return IsNamedEntity(id_name);
Expand Down Expand Up @@ -718,7 +718,7 @@ class Entity
if(label_sid == string_intern_pool.NOT_A_STRING_ID)
return false;

auto label_name = string_intern_pool.GetStringFromID(label_sid);
auto &label_name = string_intern_pool.GetStringFromID(label_sid);
return IsLabelValidAndPublic(label_name);
}

Expand All @@ -737,7 +737,7 @@ class Entity
//returns true if the label is only accessible to itself (starts with !)
static inline bool IsLabelPrivate(StringInternPool::StringID label_sid)
{
auto label_name = string_intern_pool.GetStringFromID(label_sid);
auto &label_name = string_intern_pool.GetStringFromID(label_sid);
return IsLabelPrivate(label_name);
}

Expand All @@ -754,7 +754,7 @@ class Entity
//returns true if the label is accessible to contained entities (starts with ^)
static inline bool IsLabelAccessibleToContainedEntities(StringInternPool::StringID label_sid)
{
auto label_name = string_intern_pool.GetStringFromID(label_sid);
auto &label_name = string_intern_pool.GetStringFromID(label_sid);
return IsLabelAccessibleToContainedEntities(label_name);
}

Expand Down
6 changes: 3 additions & 3 deletions src/Amalgam/evaluablenode/EvaluableNode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ double EvaluableNode::ToNumber(EvaluableNode *e, double value_if_null)
auto sid = e->GetStringIDReference();
if(sid == string_intern_pool.NOT_A_STRING_ID)
return value_if_null;
auto str = string_intern_pool.GetStringFromID(sid);
auto &str = string_intern_pool.GetStringFromID(sid);
auto [value, success] = Platform_StringToNumber(str);
if(success)
return value;
Expand Down Expand Up @@ -769,7 +769,7 @@ void EvaluableNode::SetStringID(StringInternPool::StringID id)
}
}

std::string EvaluableNode::GetStringValue()
const std::string &EvaluableNode::GetStringValue()
{
if(DoesEvaluableNodeTypeUseStringData(GetType()))
{
Expand Down Expand Up @@ -1100,7 +1100,7 @@ std::vector<std::string> EvaluableNode::GetCommentsSeparateLines()
if(comment_sid == string_intern_pool.NOT_A_STRING_ID || comment_sid == string_intern_pool.emptyStringId)
return comment_lines;

auto full_comments = string_intern_pool.GetStringFromID(comment_sid);
auto &full_comments = string_intern_pool.GetStringFromID(comment_sid);

//early exit
if(full_comments.empty())
Expand Down
10 changes: 5 additions & 5 deletions src/Amalgam/evaluablenode/EvaluableNode.h
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ class EvaluableNode
}
}

//returns true is node pointer e is nullptr or value of e has type ENT_NULL
//returns true if e is nullptr or value of e has type ENT_NULL
static __forceinline bool IsNull(EvaluableNode *e)
{
return (e == nullptr || e->GetType() == ENT_NULL);
Expand Down Expand Up @@ -511,7 +511,7 @@ class EvaluableNode
return StringInternPool::NOT_A_STRING_ID;
}
void SetStringID(StringInternPool::StringID id);
std::string GetStringValue();
const std::string &GetStringValue();
void SetStringValue(const std::string &v);
//gets the string ID and clears the node's string ID, but does not destroy the string reference,
// leaving the reference handling up to the caller
Expand All @@ -538,7 +538,7 @@ class EvaluableNode
//functions for getting and setting node comments by string or by StringID
// all Comment functions perform any reference counting management necessary when setting and clearing
StringInternPool::StringID GetCommentsStringId();
inline std::string GetCommentsString()
inline const std::string &GetCommentsString()
{
return string_intern_pool.GetStringFromID(GetCommentsStringId());
}
Expand Down Expand Up @@ -1272,7 +1272,7 @@ class EvaluableNodeImmediateValueWithType
if(nodeValue.stringID == string_intern_pool.NOT_A_STRING_ID)
return value_if_null;

auto str = string_intern_pool.GetStringFromID(nodeValue.stringID);
auto &str = string_intern_pool.GetStringFromID(nodeValue.stringID);
auto [value, success] = Platform_StringToNumber(str);
if(success)
return value;
Expand All @@ -1296,7 +1296,7 @@ class EvaluableNodeImmediateValueWithType
if(nodeValue.stringID == string_intern_pool.NOT_A_STRING_ID)
return std::make_pair(false, "");

auto str = string_intern_pool.GetStringFromID(nodeValue.stringID);
auto &str = string_intern_pool.GetStringFromID(nodeValue.stringID);
return std::make_pair(true, str);
}

Expand Down
84 changes: 60 additions & 24 deletions src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ inline StringInternPool::StringID MixStringValues(StringInternPool::StringID a,
if(b == StringInternPool::NOT_A_STRING_ID)
return string_intern_pool.CreateStringReference(a);

auto a_str = string_intern_pool.GetStringFromID(a);
auto b_str = string_intern_pool.GetStringFromID(b);
auto &a_str = string_intern_pool.GetStringFromID(a);
auto &b_str = string_intern_pool.GetStringFromID(b);
std::string result = EvaluableNodeTreeManipulation::MixStrings(a_str, b_str,
random_stream, fraction_a, fraction_b);

Expand Down Expand Up @@ -156,9 +156,9 @@ bool EvaluableNodeTreeManipulation::NodesMixMethod::AreMergeable(EvaluableNode *
MergeMetricResults<std::string *> EvaluableNodeTreeManipulation::StringSequenceMergeMetric::MergeMetric(std::string *a, std::string *b)
{
if(a == b || (a != nullptr && b != nullptr && *a == *b))
return MergeMetricResults(1.0, a, b);
return MergeMetricResults(1.0, a, b, false, true);
else
return MergeMetricResults(0.0, a, b);
return MergeMetricResults(0.0, a, b, false, false);
}

std::string *EvaluableNodeTreeManipulation::StringSequenceMergeMetric::MergeValues(std::string *a, std::string *b, bool must_merge)
Expand Down Expand Up @@ -699,10 +699,6 @@ MergeMetricResults<EvaluableNode *> EvaluableNodeTreeManipulation::NumberOfShare
if(tree1 == nullptr && tree2 == nullptr)
return MergeMetricResults(1.0, tree1, tree2, false, true);

//if one is null and the other isn't, then stop
if( (tree1 == nullptr && tree2 != nullptr) || (tree1 != nullptr && tree2 == nullptr) )
return MergeMetricResults(0.0, tree1, tree2, false, false);

//if the pair of nodes has already been computed, then just return the result
auto found = memoized.find(std::make_pair(tree1, tree2));
if(found != end(memoized))
Expand Down Expand Up @@ -732,14 +728,14 @@ MergeMetricResults<EvaluableNode *> EvaluableNodeTreeManipulation::NumberOfShare
size_t tree2_ordered_nodes_size = 0;
size_t tree2_mapped_nodes_size = 0;

if(tree1->IsAssociativeArray())
if(EvaluableNode::IsAssociativeArray(tree1))
tree1_mapped_nodes_size = tree1->GetMappedChildNodesReference().size();
else if(!tree1->IsImmediate())
else if(EvaluableNode::IsOrderedArray(tree1))
tree1_ordered_nodes_size = tree1->GetOrderedChildNodesReference().size();

if(tree2->IsAssociativeArray())
if(EvaluableNode::IsAssociativeArray(tree2))
tree2_mapped_nodes_size = tree2->GetMappedChildNodesReference().size();
else if(!tree2->IsImmediate())
else if(EvaluableNode::IsOrderedArray(tree2))
tree2_ordered_nodes_size = tree2->GetOrderedChildNodesReference().size();

if(tree1_ordered_nodes_size == 0 && tree2_ordered_nodes_size == 0
Expand Down Expand Up @@ -917,8 +913,7 @@ MergeMetricResults<EvaluableNode *> EvaluableNodeTreeManipulation::NumberOfShare

}
}

if(tree1_mapped_nodes_size > 0 && tree2_mapped_nodes_size > 0)
else if(tree1_mapped_nodes_size > 0 && tree2_mapped_nodes_size > 0)
{
//use keys from first node
auto &tree_2_mcn = tree2->GetMappedChildNodesReference();
Expand All @@ -940,37 +935,81 @@ MergeMetricResults<EvaluableNode *> EvaluableNodeTreeManipulation::NumberOfShare
{
for(auto node : tree1->GetOrderedChildNodesReference())
{
auto sub_match = NumberOfSharedNodes(tree2, node, memoized, checked);
auto sub_match = NumberOfSharedNodes(node, tree2, memoized, checked);

//mark as nonexact match because had to traverse downward,
// but preserve whether was an exact match for early stopping
bool exact_match = sub_match.exactMatch;
sub_match.exactMatch = false;
if(sub_match > commonality)
{
commonality = sub_match;
memoized.emplace(std::make_pair(node, tree2), commonality);
if(exact_match)
break;
}
}
}
else if(tree1_mapped_nodes_size > 0)
{
for(auto &[node_id, node] : tree1->GetMappedChildNodesReference())
{
auto sub_match = NumberOfSharedNodes(tree2, node, memoized, checked);
auto sub_match = NumberOfSharedNodes(node, tree2, memoized, checked);

//mark as nonexact match because had to traverse downward,
// but preserve whether was an exact match for early stopping
bool exact_match = sub_match.exactMatch;
sub_match.exactMatch = false;
if(sub_match > commonality)
{
commonality = sub_match;
memoized.emplace(std::make_pair(node, tree2), commonality);
if(exact_match)
break;
}
}
}
}

//check again for commonality in case exact match was found by iterating via tree1 above
if(!commonality.exactMatch)
{
if(tree2_ordered_nodes_size > 0)
{
for(auto cn : tree2->GetOrderedChildNodesReference())
for(auto node : tree2->GetOrderedChildNodesReference())
{
auto sub_match = NumberOfSharedNodes(tree1, cn, memoized, checked);
auto sub_match = NumberOfSharedNodes(tree1, node, memoized, checked);

//mark as nonexact match because had to traverse downward,
// but preserve whether was an exact match for early stopping
bool exact_match = sub_match.exactMatch;
sub_match.exactMatch = false;
if(sub_match > commonality)
{
commonality = sub_match;
memoized.emplace(std::make_pair(tree1, node), commonality);
if(exact_match)
break;
}
}
}
else if(tree2_mapped_nodes_size > 0)
{
for(auto &[node_id, node] : tree2->GetMappedChildNodesReference())
{
auto sub_match = NumberOfSharedNodes(tree1, node, memoized, checked);

//mark as nonexact match because had to traverse downward,
// but preserve whether was an exact match for early stopping
bool exact_match = sub_match.exactMatch;
sub_match.exactMatch = false;
if(sub_match > commonality)
{
commonality = sub_match;
memoized.emplace(std::make_pair(tree1, node), commonality);
if(exact_match)
break;
}
}
}
}
Expand Down Expand Up @@ -1054,7 +1093,7 @@ bool EvaluableNodeTreeManipulation::CollectLabelIndexesFromTree(EvaluableNode *t
for(size_t i = 0; i < num_labels; i++)
{
auto label_sid = tree->GetLabelStringId(i);
auto label_name = string_intern_pool.GetStringFromID(label_sid);
auto &label_name = string_intern_pool.GetStringFromID(label_sid);

if(label_name.size() == 0)
continue;
Expand Down Expand Up @@ -1109,7 +1148,7 @@ bool EvaluableNodeTreeManipulation::CollectLabelIndexesFromTreeAndMakeLabelNorma
for(size_t i = 0; i < num_labels; i++)
{
auto label_sid = tree->GetLabelStringId(i);
auto label_name = string_intern_pool.GetStringFromID(label_sid);
auto &label_name = string_intern_pool.GetStringFromID(label_sid);

if(label_name.size() == 0)
continue;
Expand Down Expand Up @@ -1173,9 +1212,6 @@ MergeMetricResults<EvaluableNode *> EvaluableNodeTreeManipulation::CommonalityBe
if(n1 == nullptr && n2 == nullptr)
return MergeMetricResults(1.0, n1, n2, false, true);

if(n1 == nullptr || n2 == nullptr)
return MergeMetricResults(0.0, n1, n2, false, false);

auto [num_common_labels, num_unique_labels] = EvaluableNode::GetNodeCommonAndUniqueLabelCounts(n1, n2);

auto [_, commonality] = CommonalityBetweenNodeTypesAndValues(n1, n2);
Expand Down Expand Up @@ -1214,7 +1250,7 @@ std::pair<EvaluableNode *, double> EvaluableNodeTreeManipulation::CommonalityBet
double n2_value = n2->GetNumberValueReference();
return std::make_pair(n1, n1_value == n2_value ? 1.0 : 0.0);
}
if(n1_type == ENT_STRING)
if(n1_type == ENT_STRING || n1_type == ENT_SYMBOL)
{
auto n1_sid = n1->GetStringID();
auto n2_sid = n2->GetStringID();
Expand Down
6 changes: 3 additions & 3 deletions src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.h
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ class EvaluableNodeTreeManipulation
if(a == b)
return MergeMetricResults(1.0, a, b);
else
return MergeMetricResults(0.0, a, b);
return MergeMetricResults(0.0, a, b, false, false);
}

virtual uint32_t MergeValues(uint32_t a, uint32_t b, bool must_merge = false)
Expand Down Expand Up @@ -357,8 +357,8 @@ class EvaluableNodeTreeManipulation
if(sid1 == string_intern_pool.NOT_A_STRING_ID || sid2 == string_intern_pool.NOT_A_STRING_ID)
return 0.125;

auto s1 = string_intern_pool.GetStringFromID(sid1);
auto s2 = string_intern_pool.GetStringFromID(sid2);
auto &s1 = string_intern_pool.GetStringFromID(sid1);
auto &s2 = string_intern_pool.GetStringFromID(sid2);

size_t s1_len = 0;
size_t s2_len = 0;
Expand Down
Loading

0 comments on commit 8fb08d6

Please sign in to comment.