Skip to content

Commit

Permalink
22379: Fixes to performance bug involving union and intersect opcodes (
Browse files Browse the repository at this point in the history
  • Loading branch information
howsohazard authored Dec 6, 2024
1 parent 05b99d8 commit ce17738
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 59 deletions.
76 changes: 39 additions & 37 deletions src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,8 @@ EvaluableNode *EvaluableNodeTreeManipulation::CreateGeneralizedNode(NodesMergeMe
return enm->AllocNode(n2);
}

auto [node, commonality] = EvaluableNodeTreeManipulation::CommonalityBetweenNodeTypesAndValues(n1, n2);
auto [node, commonality] = EvaluableNodeTreeManipulation::CommonalityBetweenNodeTypesAndValues(n1, n2,
mm->RequireExactMatches());

//if both are nullptr, nothing more to do
if(node == nullptr)
Expand Down Expand Up @@ -694,33 +695,33 @@ EvaluableNodeType EvaluableNodeTreeManipulation::GetRandomEvaluableNodeType(Rand
}

MergeMetricResults<EvaluableNode *> EvaluableNodeTreeManipulation::NumberOfSharedNodes(EvaluableNode *tree1, EvaluableNode *tree2,
MergeMetricResultsCache &memoized, EvaluableNode::ReferenceSetType *checked)
MergeMetricResultsParams &mmrp)
{
if(tree1 == nullptr && tree2 == nullptr)
return MergeMetricResults(1.0, tree1, tree2, false, true);

//if the pair of nodes has already been computed, then just return the result
auto found = memoized.find(std::make_pair(tree1, tree2));
if(found != end(memoized))
auto found = mmrp.memoizedNodeMergePairs.find(std::make_pair(tree1, tree2));
if(found != end(mmrp.memoizedNodeMergePairs))
return found->second;

if(checked != nullptr)
if(mmrp.checked != nullptr)
{
//if either is already checked, then neither adds shared nodes
if(checked->find(tree1) != end(*checked) || checked->find(tree2) != end(*checked))
if(mmrp.checked->find(tree1) != end(*(mmrp.checked)) || mmrp.checked->find(tree2) != end(*(mmrp.checked)))
return MergeMetricResults(0.0, tree1, tree2, false, true);
}

//if the trees are the same, then just return the size
if(tree1 == tree2)
{
MergeMetricResults results(static_cast<double>(EvaluableNode::GetDeepSize(tree1)), tree1, tree2, true, true);
memoized.emplace(std::make_pair(tree1, tree2), results);
mmrp.memoizedNodeMergePairs.emplace(std::make_pair(tree1, tree2), results);
return results;
}

//check current top nodes
auto commonality = CommonalityBetweenNodes(tree1, tree2);
auto commonality = CommonalityBetweenNodes(tree1, tree2, mmrp.requireExactMatches);

//see if can exit early, before inserting the nodes into the checked list and then removing them
size_t tree1_ordered_nodes_size = 0;
Expand All @@ -741,15 +742,15 @@ MergeMetricResults<EvaluableNode *> EvaluableNodeTreeManipulation::NumberOfShare
if(tree1_ordered_nodes_size == 0 && tree2_ordered_nodes_size == 0
&& tree1_mapped_nodes_size == 0 && tree2_mapped_nodes_size == 0)
{
memoized.emplace(std::make_pair(tree1, tree2), commonality);
mmrp.memoizedNodeMergePairs.emplace(std::make_pair(tree1, tree2), commonality);
return commonality;
}

if(checked != nullptr)
if(mmrp.checked != nullptr)
{
//remember that it has already checked when traversing tree, and then remove from checked at the end of the function
checked->insert(tree1);
checked->insert(tree2);
mmrp.checked->insert(tree1);
mmrp.checked->insert(tree2);
}

if(tree1_ordered_nodes_size > 0 && tree2_ordered_nodes_size > 0)
Expand All @@ -776,7 +777,7 @@ MergeMetricResults<EvaluableNode *> EvaluableNodeTreeManipulation::NumberOfShare
MergeMetricResults best_match_value(0.0, tree1, tree2, false, false);
for(size_t match_index = 0; match_index < a2.size(); match_index++)
{
auto match_value = NumberOfSharedNodes(a1_current, a2[match_index], memoized, checked);
auto match_value = NumberOfSharedNodes(a1_current, a2[match_index], mmrp);
if(!best_match_found || match_value > best_match_value)
{
best_match_found = true;
Expand Down Expand Up @@ -814,17 +815,17 @@ MergeMetricResults<EvaluableNode *> EvaluableNodeTreeManipulation::NumberOfShare
{
auto smallest_list_size = std::min(size1, size2);
if(smallest_list_size >= 1)
commonality += NumberOfSharedNodes(ocn1[0], ocn2[0], memoized, checked);
commonality += NumberOfSharedNodes(ocn1[0], ocn2[0], mmrp);

starting_index = 1;
}

FlatMatrix<MergeMetricResults<EvaluableNode *>> sequence_commonality;
ComputeSequenceCommonalityMatrix(sequence_commonality, ocn1, ocn2,
[&memoized, checked]
[&mmrp]
(EvaluableNode *a, EvaluableNode *b)
{
return EvaluableNodeTreeManipulation::NumberOfSharedNodes(a, b, memoized, checked);
return EvaluableNodeTreeManipulation::NumberOfSharedNodes(a, b, mmrp);
}, starting_index);

commonality += sequence_commonality.At(size1, size2);
Expand All @@ -842,7 +843,7 @@ MergeMetricResults<EvaluableNode *> EvaluableNodeTreeManipulation::NumberOfShare
auto smallest_list_size = std::min(a1.size(), a2.size());
if(smallest_list_size >= 1)
{
commonality += NumberOfSharedNodes(a1[0], a2[0], memoized, checked);
commonality += NumberOfSharedNodes(a1[0], a2[0], mmrp);

a1.erase(begin(a1));
a2.erase(begin(a2));
Expand All @@ -860,7 +861,7 @@ MergeMetricResults<EvaluableNode *> EvaluableNodeTreeManipulation::NumberOfShare

for(size_t match_index = 0; match_index < a2.size(); match_index += 2)
{
auto match_key = NumberOfSharedNodes(a1[0], a2[match_index], memoized, checked);
auto match_key = NumberOfSharedNodes(a1[0], a2[match_index], mmrp);

// key match dominates value match
if(!best_match_found || match_key > best_match_key)
Expand All @@ -871,7 +872,7 @@ MergeMetricResults<EvaluableNode *> EvaluableNodeTreeManipulation::NumberOfShare

//count the value node commonality as long as it exists and is nontrivial
if(match_key.IsNontrivialMatch() && a1.size() > 1 && a2.size() > match_index + 1)
best_match_value = NumberOfSharedNodes(a1[1], a2[match_index + 1], memoized, checked);
best_match_value = NumberOfSharedNodes(a1[1], a2[match_index + 1], mmrp);
else
best_match_value = MergeMetricResults<EvaluableNode *>(0.0, nullptr, nullptr, false, false);
}
Expand Down Expand Up @@ -906,7 +907,7 @@ MergeMetricResults<EvaluableNode *> EvaluableNodeTreeManipulation::NumberOfShare
//use size of smallest list
auto smallest_list_size = std::min(ocn1.size(), ocn2.size());
for(size_t i = 0; i < smallest_list_size; i++)
commonality += NumberOfSharedNodes(ocn1[i], ocn2[i], memoized, checked);
commonality += NumberOfSharedNodes(ocn1[i], ocn2[i], mmrp);

break;
}
Expand All @@ -924,7 +925,7 @@ MergeMetricResults<EvaluableNode *> EvaluableNodeTreeManipulation::NumberOfShare
if(other_node == end(tree_2_mcn))
continue;

commonality += NumberOfSharedNodes(node, other_node->second, memoized, checked);
commonality += NumberOfSharedNodes(node, other_node->second, mmrp);
}
}

Expand All @@ -935,7 +936,7 @@ MergeMetricResults<EvaluableNode *> EvaluableNodeTreeManipulation::NumberOfShare
{
for(auto node : tree1->GetOrderedChildNodesReference())
{
auto sub_match = NumberOfSharedNodes(node, tree2, memoized, checked);
auto sub_match = NumberOfSharedNodes(node, tree2, mmrp);

//mark as nonexact match because had to traverse downward,
// but preserve whether was an exact match for early stopping
Expand All @@ -944,7 +945,7 @@ MergeMetricResults<EvaluableNode *> EvaluableNodeTreeManipulation::NumberOfShare
if(sub_match > commonality)
{
commonality = sub_match;
memoized.emplace(std::make_pair(node, tree2), commonality);
mmrp.memoizedNodeMergePairs.emplace(std::make_pair(node, tree2), commonality);
if(exact_match)
break;
}
Expand All @@ -954,7 +955,7 @@ MergeMetricResults<EvaluableNode *> EvaluableNodeTreeManipulation::NumberOfShare
{
for(auto &[node_id, node] : tree1->GetMappedChildNodesReference())
{
auto sub_match = NumberOfSharedNodes(node, tree2, memoized, checked);
auto sub_match = NumberOfSharedNodes(node, tree2, mmrp);

//mark as nonexact match because had to traverse downward,
// but preserve whether was an exact match for early stopping
Expand All @@ -963,7 +964,7 @@ MergeMetricResults<EvaluableNode *> EvaluableNodeTreeManipulation::NumberOfShare
if(sub_match > commonality)
{
commonality = sub_match;
memoized.emplace(std::make_pair(node, tree2), commonality);
mmrp.memoizedNodeMergePairs.emplace(std::make_pair(node, tree2), commonality);
if(exact_match)
break;
}
Expand All @@ -978,7 +979,7 @@ MergeMetricResults<EvaluableNode *> EvaluableNodeTreeManipulation::NumberOfShare
{
for(auto node : tree2->GetOrderedChildNodesReference())
{
auto sub_match = NumberOfSharedNodes(tree1, node, memoized, checked);
auto sub_match = NumberOfSharedNodes(tree1, node, mmrp);

//mark as nonexact match because had to traverse downward,
// but preserve whether was an exact match for early stopping
Expand All @@ -987,7 +988,7 @@ MergeMetricResults<EvaluableNode *> EvaluableNodeTreeManipulation::NumberOfShare
if(sub_match > commonality)
{
commonality = sub_match;
memoized.emplace(std::make_pair(tree1, node), commonality);
mmrp.memoizedNodeMergePairs.emplace(std::make_pair(tree1, node), commonality);
if(exact_match)
break;
}
Expand All @@ -997,7 +998,7 @@ MergeMetricResults<EvaluableNode *> EvaluableNodeTreeManipulation::NumberOfShare
{
for(auto &[node_id, node] : tree2->GetMappedChildNodesReference())
{
auto sub_match = NumberOfSharedNodes(tree1, node, memoized, checked);
auto sub_match = NumberOfSharedNodes(tree1, node, mmrp);

//mark as nonexact match because had to traverse downward,
// but preserve whether was an exact match for early stopping
Expand All @@ -1006,22 +1007,22 @@ MergeMetricResults<EvaluableNode *> EvaluableNodeTreeManipulation::NumberOfShare
if(sub_match > commonality)
{
commonality = sub_match;
memoized.emplace(std::make_pair(tree1, node), commonality);
mmrp.memoizedNodeMergePairs.emplace(std::make_pair(tree1, node), commonality);
if(exact_match)
break;
}
}
}
}

if(checked != nullptr)
if(mmrp.checked != nullptr)
{
//remove from the checked list so don't block other traversals
checked->erase(tree1);
checked->erase(tree2);
mmrp.checked->erase(tree1);
mmrp.checked->erase(tree2);
}

memoized.emplace(std::make_pair(tree1, tree2), commonality);
mmrp.memoizedNodeMergePairs.emplace(std::make_pair(tree1, tree2), commonality);
return commonality;
}

Expand Down Expand Up @@ -1207,22 +1208,23 @@ bool EvaluableNodeTreeManipulation::CollectLabelIndexesFromTreeAndMakeLabelNorma
return collected_all_label_values;
}

MergeMetricResults<EvaluableNode *> EvaluableNodeTreeManipulation::CommonalityBetweenNodes(EvaluableNode *n1, EvaluableNode *n2)
MergeMetricResults<EvaluableNode *> EvaluableNodeTreeManipulation::CommonalityBetweenNodes(
EvaluableNode *n1, EvaluableNode *n2, bool require_exact_matches)
{
if(n1 == nullptr && n2 == nullptr)
return MergeMetricResults(1.0, n1, n2, false, true);

auto [num_common_labels, num_unique_labels] = EvaluableNode::GetNodeCommonAndUniqueLabelCounts(n1, n2);

auto [_, commonality] = CommonalityBetweenNodeTypesAndValues(n1, n2);
auto [_, commonality] = CommonalityBetweenNodeTypesAndValues(n1, n2, require_exact_matches);

bool must_match = (num_unique_labels == 0 && num_common_labels > 0);
bool exact_match = (num_unique_labels == 0 && commonality == 1.0);
return MergeMetricResults(commonality + num_common_labels, n1, n2, must_match, exact_match);
}

std::pair<EvaluableNode *, double> EvaluableNodeTreeManipulation::CommonalityBetweenNodeTypesAndValues(
EvaluableNode *n1, EvaluableNode *n2, bool require_exact_node_match)
EvaluableNode *n1, EvaluableNode *n2, bool require_exact_matches)
{
bool n1_null = EvaluableNode::IsNull(n1);
bool n2_null = EvaluableNode::IsNull(n2);
Expand All @@ -1239,7 +1241,7 @@ std::pair<EvaluableNode *, double> EvaluableNodeTreeManipulation::CommonalityBet
auto n2_type = n2->GetType();

//can have much faster and lighter computations if only checking for exact matches
if(require_exact_node_match)
if(require_exact_matches)
{
if(n1_type != n2_type)
return std::make_pair(n1, 0.0);
Expand Down
47 changes: 25 additions & 22 deletions src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,12 @@ constexpr bool operator==(const std::pair<T *, T *> &a, const std::pair<T *, T *
}

//for caching pairs of EvaluableNode *'s into MergeMetricResults
typedef FastHashMap<std::pair<EvaluableNode *, EvaluableNode *>, MergeMetricResults<EvaluableNode *>> MergeMetricResultsCache;
struct MergeMetricResultsParams
{
EvaluableNode::ReferenceSetType *checked;
FastHashMap<std::pair<EvaluableNode *, EvaluableNode *>, MergeMetricResults<EvaluableNode *>> memoizedNodeMergePairs;
bool requireExactMatches;
};

//returns a commonality measure of difference between numbers a and b in [0,1]
//if the numbers are equal, returns 1, returns closer to 0 the less similar they are
Expand Down Expand Up @@ -122,15 +127,7 @@ class EvaluableNodeTreeManipulation

virtual MergeMetricResults<EvaluableNode *> MergeMetric(EvaluableNode *a, EvaluableNode *b)
{
if((a != nullptr && a->GetNeedCycleCheck()) || (b != nullptr && b->GetNeedCycleCheck()))
{
EvaluableNode::ReferenceSetType checked;
return NumberOfSharedNodes(a, b, memoizedMergeMetricResults, &checked);
}
else //don't need to check for cycles
{
return NumberOfSharedNodes(a, b, memoizedMergeMetricResults, nullptr);
}
return NumberOfSharedNodes(a, b, requireExactMatches);
}

virtual EvaluableNode *MergeValues(EvaluableNode *a, EvaluableNode *b, bool must_merge = false)
Expand Down Expand Up @@ -168,7 +165,6 @@ class EvaluableNodeTreeManipulation
bool keepAllOfBoth;
bool requireExactMatches;
EvaluableNode::ReferenceAssocType references;
MergeMetricResultsCache memoizedMergeMetricResults;
};

//functionality to mix nodes
Expand Down Expand Up @@ -422,9 +418,10 @@ class EvaluableNodeTreeManipulation
}

//computes the edit distance between the two trees
static double EditDistance(EvaluableNode *tree1, EvaluableNode *tree2)
// If require_exact_matches is true, then it will only compare nodes that match exactly
static double EditDistance(EvaluableNode *tree1, EvaluableNode *tree2, bool require_exact_matches = false)
{
auto shared_nodes = NumberOfSharedNodes(tree1, tree2);
auto shared_nodes = NumberOfSharedNodes(tree1, tree2, require_exact_matches);
size_t tree_1_size = EvaluableNode::GetDeepSize(tree1);
size_t tree_2_size = EvaluableNode::GetDeepSize(tree2);

Expand All @@ -433,25 +430,29 @@ class EvaluableNodeTreeManipulation
}

//Computes the total number of nodes in both trees that are equal
inline static MergeMetricResults<EvaluableNode *> NumberOfSharedNodes(EvaluableNode *tree1, EvaluableNode *tree2)
// If require_exact_matches is true, then it will only compare nodes that match exactly
inline static MergeMetricResults<EvaluableNode *> NumberOfSharedNodes(
EvaluableNode *tree1, EvaluableNode *tree2, bool require_exact_matches = false)
{
MergeMetricResultsCache memoized;
MergeMetricResultsParams mmrp;
mmrp.requireExactMatches = require_exact_matches;
if((tree1 != nullptr && tree1->GetNeedCycleCheck()) || (tree2 != nullptr && tree2->GetNeedCycleCheck()))
{
EvaluableNode::ReferenceSetType checked;
return NumberOfSharedNodes(tree1, tree2, memoized, &checked);
mmrp.checked = &checked;
return NumberOfSharedNodes(tree1, tree2, mmrp);
}
else //don't need to check for cycles
{
return NumberOfSharedNodes(tree1, tree2, memoized, nullptr);
mmrp.checked = nullptr;
return NumberOfSharedNodes(tree1, tree2, mmrp);
}
}

//Returns the total number of nodes in both trees that are equal, ignoring those in the checked set
// Assists the public function NumberOfSharedNodes
//if checked is nullptr, then it will not keep track of the nodes, which can be done if neither needs cycle checks
static MergeMetricResults<EvaluableNode *> NumberOfSharedNodes(EvaluableNode *tree1, EvaluableNode *tree2,
MergeMetricResultsCache &memoized, EvaluableNode::ReferenceSetType *checked);
MergeMetricResultsParams &mmrp);

//Recursively traverses tree, storing any nodes with labels into an index map, and returning the map,
// as well as a flag indicating true if it was able to just retrieve the labels, or false
Expand Down Expand Up @@ -543,16 +544,18 @@ class EvaluableNodeTreeManipulation
EvaluableNode *replacement, EvaluableNode::ReferenceSetType &checked);

//Evaluates commonality metric between the two nodes passed in, including labels. 1.0 if identical, 0.0 if completely different, and some value between if similar
static MergeMetricResults<EvaluableNode *> CommonalityBetweenNodes(EvaluableNode *n1, EvaluableNode *n2);
// If require_exact_matches is true, then it will only return 1.0 or 0.0
static MergeMetricResults<EvaluableNode *> CommonalityBetweenNodes(
EvaluableNode *n1, EvaluableNode *n2, bool require_exact_matches = false);

//Evaluates the functional commonality between the types and immediate values of n1 and n2 (excluding labels, comments, etc.)
// Returns a pair: the first value is the more general of the two nodes and the second is a commonality value
// The more general of the two nodes will be the one whose type is more general
// The commonality metric will return 1.0 if identical, 0.0 if completely different, and some value between if similar
// If require_exact_node_match is true, then it will only return 1.0 or 0.0
// If require_exact_matches is true, then it will only return 1.0 or 0.0
//The EvaluableNode * returned should not be modified, nor should it be included in any data outside the scope of the caller
static std::pair<EvaluableNode *, double> CommonalityBetweenNodeTypesAndValues(
EvaluableNode *n1, EvaluableNode *n2, bool require_exact_node_match = false);
EvaluableNode *n1, EvaluableNode *n2, bool require_exact_matches = false);

//Mutates the current node n, changing its type or value, based on the mutation_rate
// strings contains a list of strings to likely choose from if mutating to a string value
Expand Down

0 comments on commit ce17738

Please sign in to comment.