Skip to content

Commit

Permalink
21953: Fixes bugs in merging code, especially regarding label matches (
Browse files Browse the repository at this point in the history
  • Loading branch information
howsohazard authored Oct 22, 2024
1 parent 7d8295c commit e17f52b
Show file tree
Hide file tree
Showing 6 changed files with 80 additions and 106 deletions.
6 changes: 2 additions & 4 deletions src/Amalgam/Merger.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,13 @@ class MergeMetricResultsBase
// if require_nontrivial_match, then it requires at least one node or atomic value to be equal
constexpr bool IsBetterMatchThan(const MergeMetricResultsBase &mmr)
{
if(mmr.mustMatch)
return false;
if(mustMatch)
if(mustMatch && !mmr.mustMatch)
return true;

//if same amount of commonality, prefer exact matches
if(commonality == mmr.commonality)
{
if(mmr.exactMatch && !exactMatch)
if(!exactMatch && mmr.exactMatch)
return false;
if(exactMatch && !mmr.exactMatch)
return true;
Expand Down
4 changes: 0 additions & 4 deletions src/Amalgam/entity/EntityManipulation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -468,10 +468,6 @@ MergeMetricResults<Entity *> EntityManipulation::NumberOfSharedNodes(Entity *ent
best_match_found = true;
best_match_value = match_value;
best_match_key = e2c_id;

//don't need to check any more
if(match_value.mustMatch)
break;
}
}

Expand Down
40 changes: 20 additions & 20 deletions src/Amalgam/evaluablenode/EvaluableNode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,47 +24,47 @@ FastHashSet<EvaluableNode *> EvaluableNode::debugWatch;
Concurrency::SingleMutex EvaluableNode::debugWatchMutex;
#endif

void EvaluableNode::GetNodeCommonAndUniqueLabelCounts(EvaluableNode *n1, EvaluableNode *n2, size_t &num_common_labels, size_t &num_unique_labels)
std::pair<size_t, size_t> EvaluableNode::GetNodeCommonAndUniqueLabelCounts(EvaluableNode *n1, EvaluableNode *n2)
{
num_common_labels = 0;
num_unique_labels = 0;
size_t num_n1_labels = 0;
size_t num_n2_labels = 0;
if(n1 == nullptr)
{
if(n2 == nullptr)
return std::make_pair(0, 0);

if(n1 != nullptr)
num_n1_labels = n1->GetNumLabels();
return std::make_pair(0, n2->GetNumLabels());
}

if(n2 != nullptr)
num_n2_labels = n2->GetNumLabels();
if(n2 == nullptr)
return std::make_pair(0, n1->GetNumLabels());

//if no labels in either, then done
if(num_n1_labels == 0 && num_n2_labels == 0)
return;
size_t num_n1_labels = n1->GetNumLabels();
size_t num_n2_labels = n2->GetNumLabels();

//if labels in one (but not both, because would have exited), then count total and done
//if no labels in one, just return the nonzero count as the total unique
if(num_n1_labels == 0 || num_n2_labels == 0)
{
num_unique_labels = std::max(num_n1_labels, num_n2_labels);
return;
}
return std::make_pair(0, std::max(num_n1_labels, num_n2_labels));

//if only have one label in each, compare immediately for speed
if(num_n1_labels == 1 && num_n2_labels == 1)
{
//if the same, only one common label, if unique, then two unique
if(n1->GetLabel(0) == n2->GetLabel(0))
num_common_labels = 1;
return;
return std::make_pair(1, 0);
else
return std::make_pair(0, 2);
}

//compare
size_t num_common_labels = 0;
for(auto s_id : n1->GetLabelsStringIds())
{
auto n2_label_sids = n2->GetLabelsStringIds();
if(std::find(begin(n2_label_sids), end(n2_label_sids), s_id) != end(n2_label_sids))
num_common_labels++;
}

num_unique_labels = num_n1_labels + num_n2_labels - num_common_labels; //don't double-count the common labels
//don't count the common labels in the uncommon
return std::make_pair(num_common_labels, num_n1_labels + num_n2_labels - 2 * num_common_labels);
}

bool EvaluableNode::AreShallowEqual(EvaluableNode *a, EvaluableNode *b)
Expand Down
4 changes: 2 additions & 2 deletions src/Amalgam/evaluablenode/EvaluableNode.h
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,8 @@ class EvaluableNode
}

//Evaluates the fraction of the labels of nodes that are the same, 1.0 if no labels on either
//num_common_labels and num_unique_labels are set to the appropriate number in common and number of labels that are unique when the two sets are merged
static void GetNodeCommonAndUniqueLabelCounts(EvaluableNode *n1, EvaluableNode *n2, size_t &num_common_labels, size_t &num_unique_labels);
//returns the number of followed by the number of unique labels if the two sets were merged
static std::pair<size_t, size_t> GetNodeCommonAndUniqueLabelCounts(EvaluableNode *n1, EvaluableNode *n2);

//Returns true if the immediate data structure of a is equal to b
static bool AreShallowEqual(EvaluableNode *a, EvaluableNode *b);
Expand Down
21 changes: 6 additions & 15 deletions src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,7 @@ inline StringInternPool::StringID MixStringValues(StringInternPool::StringID a,

bool EvaluableNodeTreeManipulation::NodesMergeMethod::AreMergeable(EvaluableNode *a, EvaluableNode *b)
{
size_t num_common_labels;
size_t num_unique_labels;
EvaluableNode::GetNodeCommonAndUniqueLabelCounts(a, b, num_common_labels, num_unique_labels);
auto [num_common_labels, num_unique_labels] = EvaluableNode::GetNodeCommonAndUniqueLabelCounts(a, b);

auto [_, commonality] = CommonalityBetweenNodeTypesAndValues(a, b, true);

Expand Down Expand Up @@ -121,9 +119,7 @@ EvaluableNode *EvaluableNodeTreeManipulation::NodesMixMethod::MergeValues(Evalua

bool EvaluableNodeTreeManipulation::NodesMixMethod::AreMergeable(EvaluableNode *a, EvaluableNode *b)
{
size_t num_common_labels;
size_t num_unique_labels;
EvaluableNode::GetNodeCommonAndUniqueLabelCounts(a, b, num_common_labels, num_unique_labels);
auto [num_common_labels, num_unique_labels] = EvaluableNode::GetNodeCommonAndUniqueLabelCounts(a, b);

auto [_, commonality] = CommonalityBetweenNodeTypesAndValues(a, b);

Expand Down Expand Up @@ -1180,18 +1176,13 @@ MergeMetricResults<EvaluableNode *> EvaluableNodeTreeManipulation::CommonalityBe
if(n1 == nullptr || n2 == nullptr)
return MergeMetricResults(0.0, n1, n2, false, false);

size_t num_common_labels;
size_t num_unique_labels;
EvaluableNode::GetNodeCommonAndUniqueLabelCounts(n1, n2, num_common_labels, num_unique_labels);
auto [num_common_labels, num_unique_labels] = EvaluableNode::GetNodeCommonAndUniqueLabelCounts(n1, n2);

auto [_, commonality] = CommonalityBetweenNodeTypesAndValues(n1, n2);

//if no labels, as is usually the case, then just address normal commonality
// and if the nodes are exactly equal
if(num_unique_labels == 0)
return MergeMetricResults(commonality, n1, n2, false, commonality == 1.0);

return MergeMetricResults(commonality + num_common_labels, n1, n2, num_common_labels == num_unique_labels, commonality == 1.0);
bool must_match = (num_unique_labels == 0 && num_common_labels > 0);
bool exact_match = (num_unique_labels == 0 && commonality == 1.0);
return MergeMetricResults(commonality + num_common_labels, n1, n2, must_match, exact_match);
}

std::pair<EvaluableNode *, double> EvaluableNodeTreeManipulation::CommonalityBetweenNodeTypesAndValues(
Expand Down
Loading

0 comments on commit e17f52b

Please sign in to comment.