Skip to content

Commit

Permalink
22397: Additional fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
howsohazard committed Dec 11, 2024
1 parent 9858d53 commit 4ef628d
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 18 deletions.
44 changes: 31 additions & 13 deletions src/Amalgam/GeneralizedDistance.h
Original file line number Diff line number Diff line change
Expand Up @@ -1057,7 +1057,7 @@ class RepeatedGeneralizedDistanceEvaluator
enum EffectiveFeatureDifferenceType : uint32_t
{
//everything that isn't initially populated shares the same value
//represented by precomputedRemainingIdenticalDistanceTerm
//represented by defaultNominalNonMatchDistanceTerm
EFDT_REMAINING_IDENTICAL_PRECOMPUTED,
//everything is precomputed from interned values that are looked up
EFDT_UNIVERSALLY_INTERNED_PRECOMPUTED,
Expand Down Expand Up @@ -1132,7 +1132,10 @@ class RepeatedGeneralizedDistanceEvaluator
double default_deviation = deviations_for_value->second.defaultDeviation;
if(FastIsNaN(default_deviation))
{
deviations.defaultDeviation
feature_data.defaultNominalMatchDistanceTerm
= distEvaluator->ComputeDistanceTermNominalUniversallySymmetricExactMatch(index, high_accuracy);

feature_data.defaultNominalNonMatchDistanceTerm
= distEvaluator->ComputeDistanceTermNominalUniversallySymmetricNonMatch(index, high_accuracy);
}
else
Expand All @@ -1144,8 +1147,12 @@ class RepeatedGeneralizedDistanceEvaluator
//divide the probability among the other classes
double prob_class_given_nonmatch = default_deviation / nonmatching_classes;

deviations.defaultDeviation
= distEvaluator->ComputeDistanceTermNominalNonmatchFromMatchProbabilities(
feature_data.defaultNominalMatchDistanceTerm =
distEvaluator->ComputeDistanceTermNominalMatchFromMatchProbabilities(
index, prob_class_given_match, high_accuracy);

feature_data.defaultNominalNonMatchDistanceTerm =
distEvaluator->ComputeDistanceTermNominalNonmatchFromMatchProbabilities(
index, prob_class_given_match, prob_class_given_nonmatch, high_accuracy);
}
}
Expand All @@ -1169,7 +1176,10 @@ class RepeatedGeneralizedDistanceEvaluator
double default_deviation = deviations_for_sid->second.defaultDeviation;
if(FastIsNaN(default_deviation))
{
deviations.defaultDeviation
feature_data.defaultNominalMatchDistanceTerm
= distEvaluator->ComputeDistanceTermNominalUniversallySymmetricExactMatch(index, high_accuracy);

feature_data.defaultNominalNonMatchDistanceTerm
= distEvaluator->ComputeDistanceTermNominalUniversallySymmetricNonMatch(index, high_accuracy);
}
else
Expand All @@ -1181,7 +1191,11 @@ class RepeatedGeneralizedDistanceEvaluator
//divide the probability among the other classes
double prob_class_given_nonmatch = default_deviation / nonmatching_classes;

deviations.defaultDeviation
feature_data.defaultNominalMatchDistanceTerm =
distEvaluator->ComputeDistanceTermNominalMatchFromMatchProbabilities(
index, prob_class_given_match, high_accuracy);

feature_data.defaultNominalNonMatchDistanceTerm
= distEvaluator->ComputeDistanceTermNominalNonmatchFromMatchProbabilities(
index, prob_class_given_match, prob_class_given_nonmatch, high_accuracy);
}
Expand Down Expand Up @@ -1274,7 +1288,7 @@ class RepeatedGeneralizedDistanceEvaluator
return dist_term_entry->second;

if(other_value.number == feature_data.targetValue.GetValueAsNumber())
return distEvaluator->ComputeDistanceTermNominalUniversallySymmetricExactMatch(index, high_accuracy);
return feature_data.defaultNominalMatchDistanceTerm;
}
else if(other_type == ENIVT_STRING_ID)
{
Expand All @@ -1283,7 +1297,7 @@ class RepeatedGeneralizedDistanceEvaluator
return dist_term_entry->second;

if(other_value.stringID == feature_data.targetValue.GetValueAsStringIDIfExists())
return distEvaluator->ComputeDistanceTermNominalUniversallySymmetricExactMatch(index, high_accuracy);
return feature_data.defaultNominalMatchDistanceTerm;
}

if(EvaluableNodeImmediateValue::IsNull(other_type, other_value))
Expand Down Expand Up @@ -1363,7 +1377,7 @@ class RepeatedGeneralizedDistanceEvaluator
//returns the smallest nonmatching distance term regardless of value
__forceinline double ComputeDistanceTermNominalNonNullSmallestNonmatch(size_t index, bool high_accuracy)
{
double match_dist_term = distEvaluator->ComputeDistanceTermNominalUniversallySymmetricExactMatch(index, high_accuracy);
double match_dist_term = featureData[index].defaultNominalMatchDistanceTerm;
double smallest_nonmatch = ComputeDistanceTermNonNullNominalNextSmallest(match_dist_term, index, high_accuracy);

//if there is no such value, return infinite
Expand Down Expand Up @@ -1408,7 +1422,7 @@ class RepeatedGeneralizedDistanceEvaluator
void Clear()
{
effectiveFeatureType = EFDT_CONTINUOUS_NUMERIC;
precomputedRemainingIdenticalDistanceTerm = 0.0;
defaultNominalNonMatchDistanceTerm = 0.0;
internedDistanceTerms.clear();
nominalStringDistanceTerms.clear();
nominalNumberDistanceTerms.clear();
Expand All @@ -1420,7 +1434,7 @@ class RepeatedGeneralizedDistanceEvaluator
inline void SetPrecomputedRemainingIdenticalDistanceTerm(double dist_term)
{
effectiveFeatureType = EFDT_REMAINING_IDENTICAL_PRECOMPUTED;
precomputedRemainingIdenticalDistanceTerm = dist_term;
defaultNominalNonMatchDistanceTerm = dist_term;
}

//the effective comparison for the feature type, specialized for performance
Expand All @@ -1430,8 +1444,12 @@ class RepeatedGeneralizedDistanceEvaluator
//target that the distance will be computed to
EvaluableNodeImmediateValueWithType targetValue;

//the distance term for EFDT_REMAINING_IDENTICAL_PRECOMPUTED
double precomputedRemainingIdenticalDistanceTerm;
//the default nominal matching distance term if a term is not in the distance term matrix
double defaultNominalMatchDistanceTerm;

//the default nominal nonmatching distance term if a term is not in the distance term matrix
//also used for EFDT_REMAINING_IDENTICAL_PRECOMPUTED
double defaultNominalNonMatchDistanceTerm;

std::vector<double> internedDistanceTerms;

Expand Down
5 changes: 3 additions & 2 deletions src/Amalgam/PlatformSpecific.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,8 @@ inline std::pair<std::string, bool> Platform_OpenFileAsString(const std::string
// not currently have a working implementation on any version.
// note2: std::from_chars is more desirable than std::strtod because it is locale independent
// TODO 15993: Reevaluate when moving to C++20
inline std::pair<double, bool> Platform_StringToNumber(const std::string &s)
template<typename StringType>
inline std::pair<double, bool> Platform_StringToNumber(const StringType &s)
{
#ifdef OS_WINDOWS
const char *first_char = s.data();
Expand All @@ -158,7 +159,7 @@ inline std::pair<double, bool> Platform_StringToNumber(const std::string &s)
#else
//make sure it has a zero terminator
std::string stringified_s(s);
const char *start_pointer = stringified_s.c_str();
const char *start_pointer = stringified_s.data();
char *end_pointer = nullptr;
double value = strtod(start_pointer, &end_pointer);
//if didn't reach the end or grabbed nothing, then it's not a number
Expand Down
2 changes: 1 addition & 1 deletion src/Amalgam/SeparableBoxFilterDataStore.h
Original file line number Diff line number Diff line change
Expand Up @@ -804,7 +804,7 @@ class SeparableBoxFilterDataStore
switch(feature_data.effectiveFeatureType)
{
case RepeatedGeneralizedDistanceEvaluator::EFDT_REMAINING_IDENTICAL_PRECOMPUTED:
return feature_data.precomputedRemainingIdenticalDistanceTerm;
return feature_data.defaultNominalNonMatchDistanceTerm;

case RepeatedGeneralizedDistanceEvaluator::EFDT_CONTINUOUS_UNIVERSALLY_NUMERIC:
{
Expand Down
4 changes: 2 additions & 2 deletions src/Amalgam/entity/EntityQueryBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,10 +156,10 @@ namespace EntityQueryBuilder
//a list indicates that it is a pair of a sparse deviation matrix followed by a default deviation
//the default being for when the first value being compared is not found
auto &ocn = deviation_node->GetOrderedChildNodesReference();
if(ocn.size() > 1)
if(ocn.size() > 0)
PopulateFeatureDeviationNominalValuesMatrixData(feature_attribs, ocn[0]);

if(ocn.size() > 2)
if(ocn.size() > 1)
feature_attribs.deviation = EvaluableNode::ToNumber(ocn[1]);
}
else
Expand Down

0 comments on commit 4ef628d

Please sign in to comment.