Skip to content

Commit

Permalink
Moved FindBestMatches to StringUtilities.hpp
Browse files Browse the repository at this point in the history
* Code formatting

Signed-off-by: Jared Duffey <[email protected]>
  • Loading branch information
JDuffeyBQ committed Apr 17, 2024
1 parent c2909f1 commit 39b4132
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 46 deletions.
48 changes: 2 additions & 46 deletions src/simplnx/Filter/IFilter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,12 @@

#include "simplnx/Filter/DataParameter.hpp"
#include "simplnx/Filter/ValueParameter.hpp"
#include "simplnx/Utilities/StringUtilities.hpp"

#include <fmt/format.h>
#include <nlohmann/json.hpp>

#include <iostream>
#include <limits> // For std::numeric_limits
#include <sstream>
#include <string>
#include <utility> // For std::pair
#include <vector>

using namespace nx::core;
Expand Down Expand Up @@ -260,47 +257,6 @@ nlohmann::json IFilter::toJson(const Arguments& args) const
return json;
}

// Assuming levenshteinDistance function is defined as before
int levenshteinDistance(const std::string& s1, const std::string& s2)
{
const size_t len1 = s1.size(), len2 = s2.size();
std::vector<std::vector<unsigned int>> d(len1 + 1, std::vector<unsigned int>(len2 + 1));

d[0][0] = 0;
for(unsigned int i = 1; i <= len1; ++i)
d[i][0] = i;
for(unsigned int i = 1; i <= len2; ++i)
d[0][i] = i;

for(unsigned int i = 1; i <= len1; ++i)
for(unsigned int j = 1; j <= len2; ++j)
d[i][j] = std::min({d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + (s1[i - 1] == s2[j - 1] ? 0 : 1)});

return d[len1][len2];
}

// Function to find best matching word pairs based on Levenshtein distance
std::vector<std::pair<std::string, std::string>> findBestMatches(const std::vector<std::string>& vec1, const std::vector<std::string>& vec2)
{
std::vector<std::pair<std::string, std::string>> bestPairs;
for(const auto& word1 : vec1)
{
int bestDistance = std::numeric_limits<int>::max();
std::string bestMatch;
for(const auto& word2 : vec2)
{
int currentDistance = levenshteinDistance(word1, word2);
if(currentDistance < bestDistance)
{
bestDistance = currentDistance;
bestMatch = word2;
}
}
bestPairs.emplace_back(word1, bestMatch);
}
return bestPairs;
}

Result<Arguments> IFilter::fromJson(const nlohmann::json& json) const
{
Parameters params = parameters();
Expand Down Expand Up @@ -340,7 +296,7 @@ Result<Arguments> IFilter::fromJson(const nlohmann::json& json) const
}
}

auto bestMatches = findBestMatches(jsonKeyNotFound, paramKeyNotFound);
auto bestMatches = StringUtilities::FindBestMatches(jsonKeyNotFound, paramKeyNotFound);
for(const auto& match : bestMatches)
{
if(!match.first.empty() && !match.second.empty())
Expand Down
63 changes: 63 additions & 0 deletions src/simplnx/Utilities/StringUtilities.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,17 @@
#pragma once

#include "simplnx/Common/StringLiteral.hpp"
#include "simplnx/Common/Types.hpp"

#include <fmt/ranges.h>
#include <nonstd/span.hpp>

#include <algorithm>
#include <cctype>
#include <limits>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

/*' '(0x20)space(SPC)
Expand Down Expand Up @@ -439,4 +444,62 @@ inline std::string toLower(std::string input)
return input;
}

/**
* @brief Calculates the Levenshtein distance between two strings.
* @param s1
* @param s2
* @return uint32
*/
inline uint32 CalculateLevenshteinDistance(const std::string& s1, const std::string& s2)
{
usize len1 = s1.size();
usize len2 = s2.size();
std::vector<std::vector<uint32>> d(len1 + 1, std::vector<uint32>(len2 + 1));

d[0][0] = 0;
for(uint32 i = 1; i <= len1; ++i)
{
d[i][0] = i;
}
for(uint32 i = 1; i <= len2; ++i)
{
d[0][i] = i;
}

for(uint32 i = 1; i <= len1; ++i)
{
for(uint32 j = 1; j <= len2; ++j)
{
d[i][j] = std::min({d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + (s1[i - 1] == s2[j - 1] ? 0 : 1)});
}
}
return d[len1][len2];
}

/**
* @brief Finds the best matches between two lists of strings using the Levenshtein distance.
* @param vec1
* @param vec2
* @return std::vector<std::pair<std::string, std::string>>
*/
inline std::vector<std::pair<std::string, std::string>> FindBestMatches(const std::vector<std::string>& vec1, const std::vector<std::string>& vec2)
{
std::vector<std::pair<std::string, std::string>> bestPairs;
for(const auto& word1 : vec1)
{
uint32 bestDistance = std::numeric_limits<uint32>::max();
std::string bestMatch;
for(const auto& word2 : vec2)
{
uint32 currentDistance = CalculateLevenshteinDistance(word1, word2);
if(currentDistance < bestDistance)
{
bestDistance = currentDistance;
bestMatch = word2;
}
}
bestPairs.emplace_back(word1, bestMatch);
}
return bestPairs;
}
} // namespace nx::core::StringUtilities

0 comments on commit 39b4132

Please sign in to comment.