diff --git a/CMakeLists.txt b/CMakeLists.txt index 2341699500..73fe38c0f7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -511,7 +511,7 @@ set(SIMPLNX_HDRS ${SIMPLNX_SOURCE_DIR}/Utilities/ImageRotationUtilities.hpp ${SIMPLNX_SOURCE_DIR}/Utilities/FlyingEdges.hpp ${SIMPLNX_SOURCE_DIR}/Utilities/SampleSurfaceMesh.hpp - ${SIMPLNX_SOURCE_DIR}/Utilities/KUtilities.hpp + ${SIMPLNX_SOURCE_DIR}/Utilities/ClusteringUtilities.hpp ${SIMPLNX_SOURCE_DIR}/Utilities/MontageUtilities.hpp ${SIMPLNX_SOURCE_DIR}/Utilities/SIMPLConversion.hpp diff --git a/src/Plugins/SimplnxCore/CMakeLists.txt b/src/Plugins/SimplnxCore/CMakeLists.txt index 9a135bad87..41866a844d 100644 --- a/src/Plugins/SimplnxCore/CMakeLists.txt +++ b/src/Plugins/SimplnxCore/CMakeLists.txt @@ -56,6 +56,7 @@ set(FilterList CreatePythonSkeletonFilter CropImageGeometryFilter CropVertexGeometryFilter + DBSCANFilter DeleteDataFilter ErodeDilateBadDataFilter ErodeDilateCoordinationNumberFilter @@ -164,6 +165,7 @@ set(AlgorithmList ConvertColorToGrayScale ConvertData CreatePythonSkeleton + DBSCAN ErodeDilateBadData ErodeDilateCoordinationNumber ErodeDilateMask diff --git a/src/Plugins/SimplnxCore/docs/ComputeKMeansFilter.md b/src/Plugins/SimplnxCore/docs/ComputeKMeansFilter.md index 1c44f3241a..3103cb9a8f 100644 --- a/src/Plugins/SimplnxCore/docs/ComputeKMeansFilter.md +++ b/src/Plugins/SimplnxCore/docs/ComputeKMeansFilter.md @@ -20,6 +20,8 @@ Optimal solutions to the k means partitioning problem are computationally diffic Convergence is defined as when the computed means change very little (precisely, when the differences are within machine epsilon). Since Lloyd's algorithm is iterative, it only serves as an approximation, and may result in different classifications on each execution with the same input data. The user may opt to use a mask to ignore certain points; where the mask is *false*, the points will be placed in cluster 0. +Note: In SIMPLNX there is no explicit positional subtyping for Attribute Matrix, so the next section should be treated as a high-level understanding of what is being created. Naming the Attribute Matrix to include the type listed on the respective line in the 'Attribute Matrix Created' column is encouraged to help with readability and comprehension. + A clustering algorithm can be considered a kind of segmentation; this implementation of k means does not rely on the **Geometry** on which the data lie, only the *topology* of the space that the array itself forms. Therefore, this **Filter** has the effect of creating either **Features** or **Ensembles** depending on the kind of array passed to it for clustering. If an **Element** array (e.g., voxel-level **Cell** data) is passed to the **Filter**, then **Features** are created (in the previous example, a **Cell Feature Attribute Matrix** will be created). If a **Feature** array is passed to the **Filter**, then an Ensemble Attribute Matrix** is created. The following table shows what type of **Attribute Matrix** is created based on what sort of array is used for clustering: | Attribute Matrix Source | Attribute Matrix Created | diff --git a/src/Plugins/SimplnxCore/docs/ComputeKMedoidsFilter.md b/src/Plugins/SimplnxCore/docs/ComputeKMedoidsFilter.md index f5bb8285e9..d1abf352d4 100644 --- a/src/Plugins/SimplnxCore/docs/ComputeKMedoidsFilter.md +++ b/src/Plugins/SimplnxCore/docs/ComputeKMedoidsFilter.md @@ -21,6 +21,8 @@ This **Filter** uses the *Voronoi iteration* algorithm to produce the clustering Convergence is defined as when the medoids no longer change position. Since the algorithm is iterative, it only serves as an approximation, and may result in different classifications on each execution with the same input data. The user may opt to use a mask to ignore certain points; where the mask is *false*, the points will be placed in cluster 0. +Note: In SIMPLNX there is no explicit positional subtyping for Attribute Matrix, so the next section should be treated as a high-level understanding of what is being created. Naming the Attribute Matrix to include the type listed on the respective line in the 'Attribute Matrix Created' column is encouraged to help with readability and comprehension. + A clustering algorithm can be considered a kind of segmentation; this implementation of k medoids does not rely on the **Geometry** on which the data lie, only the *topology* of the space that the array itself forms. Therefore, this **Filter** has the effect of creating either **Features** or **Ensembles** depending on the kind of array passed to it for clustering. If an **Element** array (e.g., voxel-level **Cell** data) is passed to the **Filter**, then **Features** are created (in the previous example, a **Cell Feature Attribute Matrix** will be created). If a **Feature** array is passed to the **Filter**, then an Ensemble Attribute Matrix** is created. The following table shows what type of **Attribute Matrix** is created based on what sort of array is used for clustering: | Attribute Matrix Source | Attribute Matrix Created | diff --git a/src/Plugins/SimplnxCore/docs/DBSCANFilter.md b/src/Plugins/SimplnxCore/docs/DBSCANFilter.md new file mode 100644 index 0000000000..0ecddaccac --- /dev/null +++ b/src/Plugins/SimplnxCore/docs/DBSCANFilter.md @@ -0,0 +1,118 @@ +# DBSCAN + +## Group (Subgroup) + +DREAM3D Review (Clustering) + +## Description + +This **Filter** applies the DBSCAN (density-based spatial clustering of applications with noise) algorithm to an **Attribute Array**. DBSCAN is a _clustering algorithm_ that assigns to each point of the **Attribute Array** a _cluster Id_; points that have the same cluster Id are grouped together more densely (in the sense that the _distance_ between them is small) in the data space (i.e., points that have many nearest neighbors will belong to the same cluster). The user may select from a number of options to use as the distance metric. Points that are in sparse regions of the data space are considered "outliers"; these points will belong to cluster Id 0. Additionally, the user may opt to use a mask to ignore certain points; where the mask is _false_, the points will be categorized as outliers and placed in cluster 0. The algorithm requires two parameters: a _neighborbood region_, called epsilon; and the minimum number of points needed to form a cluster. The algorithm, in pseudocode, proceeds as follows: + + for each point p in dataset + { + cluster = 0 + if p has been visited + { + continue to next point + } + mark p as visited + neighbor_points = all points within epsilon distance from p + if the number of neighbor_points < minimum number of points + { + mark p is outlier (cluster Id = 0) + } + else + { + cluster++ + add p to cluster + for each point p_prime in neighbor_points + { + if p_prime has not been visited + { + mark p_prime as visited + neighbor_points_prime = all points within epsilon distance from p_prime + if the number of neighbor_points_prime >= minimum number of points + { + adjoin neighbor_points_prime to neighbor_points + } + } + if p_prime is not a member of any cluster + { + add p_prime to cluster + } + } + } + } + +An advantage of DBSCAN over other clustering approaches (e.g., [k means](@ref kmeans)) is that the number of clusters is not defined _a priori_. Additionally, DBSCAN is capable of finding arbitrarily shaped, nonlinear clusters, and is robust to noise. However, the choice of epsilon and the minimum number of points affects the quality of the clustering. In general, a reasonable rule of thumb for choosing the minimum number of points is that it should be, at least, greater than or equal to the dimensionality of the data set plus 1 (i.e., the number of components of the **Attribute Array** plus 1). The epsilon parameter may be estimated using a _k distance graph_, which can be computed using [this Filter](@ref kdistancegraph). When computing the k distance graph, set the k nearest neighbors value equal to the minimum number of points intended for DBSCAN. A reasonable choice of epsilon will be where the graph shows a strong bend. If using this approach to help estimate epsilon, remember to use the same distance metric in both **Filters**! An alternative method to choosing the two parameters for DBSCAN is to rely on _domain knowledge_ for the data, considering things like what neighbor distances between points make sense for a given metric. + +Note: In SIMPLNX there is no explicit positional subtyping for Attribute Matrix, so the next section should be treated as a high-level understanding of what is being created. Naming the Attribute Matrix to include the type listed on the respective line in the 'Attribute Matrix Created' column is encouraged to help with readability and comprehension. + +A clustering algorithm can be considered a kind of segmentation; this implementation of DBSCAN does not rely on the **Geometry** on which the data lie, only the _topology_ of the space that the array itself forms. Therefore, this **Filter** has the effect of creating either **Features** or **Ensembles** depending on the kind of array passed to it for clustering. If an **Element** array (e.g., voxel-level **Cell** data) is passed to the **Filter**, then **Features** are created (in the previous example, a **Cell Feature Attribute Matrix** will be created). If a **Feature** array is passed to the **Filter**, then an **Ensemble Attribute Matrix** is created. The following table shows what type of **Attribute Matrix** is created based on what sort of array is used for clustering: + +| Attribute Matrix Source | Attribute Matrix Created | +|------------------|--------------------| +| Generic | Generic | +| Vertex | Vertex Feature | +| Edge | Edge Feature | +| Face | Face Feature | +| Cell | Cell Feature| +| Vertex Feature | Vertex Ensemble | +| Edge Feature | Edge Ensemble | +| Face Feature | Face Ensemble | +| Cell Feature | Cell Ensemble| +| Vertex Ensemble | Vertex Ensemble | +| Edge Ensemble | Edge Ensemble | +| Face Ensemble | Face Ensemble | +| Cell Ensemble | Cell Ensemble| + +## Note on Randomness + +It is not recommended to use iterative for the _Initalization Type_, as it was just included for backwards compatibility. The inclusion of randomness in this algorithm is solely to attempt to reduce bias from starting cluster. Iterative produced identical results in our test cases, but the random initialization is truest to the well known DBSCAN algorithm. + +% Auto generated parameter table will be inserted here + +## Notes on Hyperparameter Tuning + +Machine Learning algorithms, especially unsupervised ones like DBSCAN, depend heavily upon the hyperparameter values passed into the algorithm. In this case the hyperparameters would be Epsilon and Minimum Points. To exemplify this in the context of the filter itself, consider the following image: + +![STRAIN Array Visualization](Images/DBSCAN_strain_vis.png) + +The above image depicts the strains experienced by an object, the dataset for which is used to test the algorithm and can be found in our Data Archive under the name "_DBSCAN_test.tar.gz_". In it we can see 3 clearly distinct stress points, one thin stressor running midway across the object from the west side to roughly the center, with the other two being northeast and southeast of the center respectively. Below shows a table of the oucomes of DBSCAN with different hyperparameters: + +| Incorrect | Exemplar | +|-----------------------------------|------------------------------------| +| Epsilon: 0.01, Minimum Points: 50 | Epsilon: 0.06, Minimum Points: 100 | +| ![Underdeveloped](Images/DBSCAN_underdeveloped.png) | ![Semi-Correct](Images/DBSCAN_semi_correct.png) | +| Epsilon: 0.05, Minimum Points: 100 | Zoomed Image of STRAIN (Reference) | +| ![Overdeveloped](Images/DBSCAN_overdeveloped.png) | ![Zoomed STRAIN](Images/DBSCAN_zoomed_strain.png) | + +Note: the colors are just representing index of the cluster at a specific point, it is exclusively a label **NOT a visual representation of a quantitative value**. + +Out of the above table, lets focus in on a few specific aspects: + +- The top left image (_Epsilon: 0.01, Minimum Points: 50_) is clearly underdeveloped. There are only two clusters and the red clusters are clearly undersized. +- The bottom left image (_Epsilon: 0.05, Minimum Points: 100_) may seem correct at first glance, but there are a two things that stand out: + - Throughout the boundary of the red cluster there are specks of the dark blue cluster, where it should either be a red or light blue. (Compare to _Zoomed_) + - The red cluster boundary is seemingly arbitrarily defined on the gradient, in that there is not a clear enough distinction to denote it as a separate cluster. Even if you were looking to make a distinction, arguably the boundary would be far close to the east side of the object. +- The top right image (_Epsilon: 0.06, Minimum Points: 100_) was selected as the exemplar here because it did cluster the three regions of stress into the same cluster, with clear and distinct boundaries. However, this is not perfect either, as it incorrectly incorporated part of the gradient into the cluster as well. + +This dataset is not the ideal case for this algorithm, but it is what we were able to source and make available. That said it still demonstrates the idea and potential application. + +_Note from Developers_: We are aware of a paper that outlines an algorithm that can reasonably predict the hyperparameter values, but at the current time implementation is left up to potential contributors [2]. + +## References + +[1] A density-based algorithm for discovering clusters in large spatial databases with noise, M. Ester, H.P. Kriegel, J. Sander, and X. Xu, Proceedings of the Second International Conference on Knowledge Discovery and Data Mining, pp. 226-231, 1996. + +[2] Yang, Y., Qian, C., Li, H. et al. An efficient DBSCAN optimized by arithmetic optimization algorithm with opposition-based learning. J Supercomput 78, 19566–19604 (2022). https://doi.org/10.1007/s11227-022-04634-w + +## Example Pipelines + +## License & Copyright + +Please see the description file distributed with this plugin. + +## DREAM3D-NX Help + +If you need help, need to file a bug report or want to request a new feature, please head over to the [DREAM3DNX-Issues](https://github.com/BlueQuartzSoftware/DREAM3DNX-Issues/discussions) GitHub site where the community of DREAM3D-NX users can help answer your questions. diff --git a/src/Plugins/SimplnxCore/docs/Images/DBSCAN_overdeveloped.png b/src/Plugins/SimplnxCore/docs/Images/DBSCAN_overdeveloped.png new file mode 100644 index 0000000000..3e08f1f01b Binary files /dev/null and b/src/Plugins/SimplnxCore/docs/Images/DBSCAN_overdeveloped.png differ diff --git a/src/Plugins/SimplnxCore/docs/Images/DBSCAN_semi_correct.png b/src/Plugins/SimplnxCore/docs/Images/DBSCAN_semi_correct.png new file mode 100644 index 0000000000..89e1733f18 Binary files /dev/null and b/src/Plugins/SimplnxCore/docs/Images/DBSCAN_semi_correct.png differ diff --git a/src/Plugins/SimplnxCore/docs/Images/DBSCAN_strain_vis.png b/src/Plugins/SimplnxCore/docs/Images/DBSCAN_strain_vis.png new file mode 100644 index 0000000000..6cce8246ea Binary files /dev/null and b/src/Plugins/SimplnxCore/docs/Images/DBSCAN_strain_vis.png differ diff --git a/src/Plugins/SimplnxCore/docs/Images/DBSCAN_underdeveloped.png b/src/Plugins/SimplnxCore/docs/Images/DBSCAN_underdeveloped.png new file mode 100644 index 0000000000..7ad94ad62d Binary files /dev/null and b/src/Plugins/SimplnxCore/docs/Images/DBSCAN_underdeveloped.png differ diff --git a/src/Plugins/SimplnxCore/docs/Images/DBSCAN_zoomed_strain.png b/src/Plugins/SimplnxCore/docs/Images/DBSCAN_zoomed_strain.png new file mode 100644 index 0000000000..0eff8a14a2 Binary files /dev/null and b/src/Plugins/SimplnxCore/docs/Images/DBSCAN_zoomed_strain.png differ diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeKMeans.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeKMeans.cpp index 009ba22ef9..1b7e9f454d 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeKMeans.cpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeKMeans.cpp @@ -1,9 +1,9 @@ #include "ComputeKMeans.hpp" #include "simplnx/DataStructure/DataArray.hpp" +#include "simplnx/Utilities/ClusteringUtilities.hpp" #include "simplnx/Utilities/DataArrayUtilities.hpp" #include "simplnx/Utilities/FilterUtilities.hpp" -#include "simplnx/Utilities/KUtilities.hpp" #include @@ -16,7 +16,7 @@ class ComputeKMeansTemplate { public: ComputeKMeansTemplate(ComputeKMeans* filter, const IDataArray& inputIDataArray, IDataArray& meansIDataArray, const std::unique_ptr& maskDataArray, usize numClusters, Int32Array& fIds, - KUtilities::DistanceMetric distMetric, std::mt19937_64::result_type seed) + ClusterUtilities::DistanceMetric distMetric, std::mt19937_64::result_type seed) : m_Filter(filter) , m_InputArray(dynamic_cast(inputIDataArray)) , m_Means(dynamic_cast(meansIDataArray)) @@ -107,7 +107,7 @@ class ComputeKMeansTemplate const std::unique_ptr& m_Mask; usize m_NumClusters; Int32Array& m_FeatureIds; - KUtilities::DistanceMetric m_DistMetric; + ClusterUtilities::DistanceMetric m_DistMetric; std::mt19937_64::result_type m_Seed; // ----------------------------------------------------------------------------- @@ -131,7 +131,7 @@ class ComputeKMeansTemplate float64 minDist = std::numeric_limits::max(); for(int32 j = 0; j < m_NumClusters; j++) { - float64 dist = KUtilities::GetDistance(m_InputArray, (dims * i), m_Means, (dims * (j + 1)), dims, m_DistMetric); + float64 dist = ClusterUtilities::GetDistance(m_InputArray, (dims * i), m_Means, (dims * (j + 1)), dims, m_DistMetric); if(dist < minDist) { minDist = dist; diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeKMeans.hpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeKMeans.hpp index dab50de12b..85688b2676 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeKMeans.hpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeKMeans.hpp @@ -9,7 +9,7 @@ #include "simplnx/Parameters/ArraySelectionParameter.hpp" #include "simplnx/Parameters/ChoicesParameter.hpp" #include "simplnx/Parameters/NumberParameter.hpp" -#include "simplnx/Utilities/KUtilities.hpp" +#include "simplnx/Utilities/ClusteringUtilities.hpp" namespace nx::core { @@ -17,7 +17,7 @@ namespace nx::core struct SIMPLNXCORE_EXPORT ComputeKMeansInputValues { uint64 InitClusters; - KUtilities::DistanceMetric DistanceMetric; + ClusterUtilities::DistanceMetric DistanceMetric; DataPath ClusteringArrayPath; DataPath MaskArrayPath; DataPath FeatureIdsArrayPath; diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeKMedoids.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeKMedoids.cpp index fa48a52926..25aed0dcf7 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeKMedoids.cpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeKMedoids.cpp @@ -1,9 +1,9 @@ #include "ComputeKMedoids.hpp" #include "simplnx/DataStructure/DataArray.hpp" +#include "simplnx/Utilities/ClusteringUtilities.hpp" #include "simplnx/Utilities/DataArrayUtilities.hpp" #include "simplnx/Utilities/FilterUtilities.hpp" -#include "simplnx/Utilities/KUtilities.hpp" #include @@ -16,7 +16,7 @@ class KMedoidsTemplate { public: KMedoidsTemplate(ComputeKMedoids* filter, const IDataArray& inputIDataArray, IDataArray& medoidsIDataArray, const std::unique_ptr& maskDataArray, usize numClusters, Int32Array& fIds, - KUtilities::DistanceMetric distMetric, std::mt19937_64::result_type seed) + ClusterUtilities::DistanceMetric distMetric, std::mt19937_64::result_type seed) : m_Filter(filter) , m_InputArray(dynamic_cast(inputIDataArray)) , m_Medoids(dynamic_cast(medoidsIDataArray)) @@ -95,7 +95,7 @@ class KMedoidsTemplate const std::unique_ptr& m_Mask; usize m_NumClusters; Int32Array& m_FeatureIds; - KUtilities::DistanceMetric m_DistMetric; + ClusterUtilities::DistanceMetric m_DistMetric; std::mt19937_64::result_type m_Seed; // ----------------------------------------------------------------------------- @@ -112,7 +112,7 @@ class KMedoidsTemplate float64 minDist = std::numeric_limits::max(); for(int32 j = 0; j < m_NumClusters; j++) { - float64 dist = KUtilities::GetDistance(m_InputArray, (dims * i), m_Medoids, (dims * (j + 1)), dims, m_DistMetric); + float64 dist = ClusterUtilities::GetDistance(m_InputArray, (dims * i), m_Medoids, (dims * (j + 1)), dims, m_DistMetric); if(dist < minDist) { minDist = dist; @@ -153,7 +153,7 @@ class KMedoidsTemplate } if(m_FeatureIds[k] == i + 1 && m_Mask->isTrue(k)) { - cost += KUtilities::GetDistance(m_InputArray, (dims * k), m_InputArray, (dims * j), dims, m_DistMetric); + cost += ClusterUtilities::GetDistance(m_InputArray, (dims * k), m_InputArray, (dims * j), dims, m_DistMetric); } } diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeKMedoids.hpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeKMedoids.hpp index c2db142d02..745b660209 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeKMedoids.hpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeKMedoids.hpp @@ -6,14 +6,14 @@ #include "simplnx/DataStructure/DataStructure.hpp" #include "simplnx/Filter/IFilter.hpp" #include "simplnx/Parameters/ChoicesParameter.hpp" -#include "simplnx/Utilities/KUtilities.hpp" +#include "simplnx/Utilities/ClusteringUtilities.hpp" namespace nx::core { struct SIMPLNXCORE_EXPORT KMedoidsInputValues { uint64 InitClusters; - KUtilities::DistanceMetric DistanceMetric; + ClusterUtilities::DistanceMetric DistanceMetric; DataPath ClusteringArrayPath; DataPath MaskArrayPath; DataPath FeatureIdsArrayPath; diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/DBSCAN.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/DBSCAN.cpp new file mode 100644 index 0000000000..a36303bd4d --- /dev/null +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/DBSCAN.cpp @@ -0,0 +1,382 @@ +#include "DBSCAN.hpp" + +#include "simplnx/Common/Range.hpp" +#include "simplnx/DataStructure/DataArray.hpp" +#include "simplnx/Utilities/ClusteringUtilities.hpp" +#include "simplnx/Utilities/DataArrayUtilities.hpp" +#include "simplnx/Utilities/FilterUtilities.hpp" +#include "simplnx/Utilities/ParallelDataAlgorithm.hpp" + +#include + +using namespace nx::core; + +namespace +{ +template +class FindEpsilonNeighborhoodsImpl +{ +private: + using AbstractDataStoreT = AbstractDataStore; + +public: + FindEpsilonNeighborhoodsImpl(DBSCAN* filter, float64 epsilon, const AbstractDataStoreT& inputData, const std::unique_ptr& mask, usize numCompDims, usize numTuples, + ClusterUtilities::DistanceMetric distMetric, std::vector>& neighborhoods) + : m_Filter(filter) + , m_Epsilon(epsilon) + , m_InputDataStore(inputData) + , m_Mask(mask) + , m_NumCompDims(numCompDims) + , m_NumTuples(numTuples) + , m_DistMetric(distMetric) + , m_Neighborhoods(neighborhoods) + { + } + + void compute(usize start, usize end) const + { + for(usize i = start; i < end; i++) + { + if(m_Filter->getCancel()) + { + return; + } + if(m_Mask->isTrue(i)) + { + // directly inline to try to convince compiler to construct in place + m_Neighborhoods[i] = epsilon_neighbors(i); + } + } + } + + [[nodiscard]] std::list epsilon_neighbors(usize index) const + { + std::list neighbors; + + for(usize i = 0; i < m_NumTuples; i++) + { + if(m_Mask->isTrue(i)) + { + float64 dist = ClusterUtilities::GetDistance(m_InputDataStore, (m_NumCompDims * index), m_InputDataStore, (m_NumCompDims * i), m_NumCompDims, m_DistMetric); + if(dist < m_Epsilon) + { + neighbors.push_back(i); + } + } + } + + return neighbors; + } + + void operator()(const Range& range) const + { + compute(range.min(), range.max()); + } + +private: + DBSCAN* m_Filter; + float64 m_Epsilon; + const AbstractDataStoreT& m_InputDataStore; + const std::unique_ptr& m_Mask; + usize m_NumCompDims; + usize m_NumTuples; + ClusterUtilities::DistanceMetric m_DistMetric; + std::vector>& m_Neighborhoods; +}; + +template +class DBSCANTemplate +{ +private: + using AbstractDataStoreT = AbstractDataStore; + +public: + DBSCANTemplate(DBSCAN* filter, const AbstractDataStoreT& inputDataStore, const std::unique_ptr& maskDataArray, AbstractDataStore& fIdsDataStore, float32 epsilon, int32 minPoints, + ClusterUtilities::DistanceMetric distMetric, std::mt19937_64::result_type seed) + : m_Filter(filter) + , m_InputDataStore(inputDataStore) + , m_Mask(maskDataArray) + , m_FeatureIds(fIdsDataStore) + , m_Epsilon(epsilon) + , m_MinPoints(minPoints) + , m_DistMetric(distMetric) + , m_Seed(seed) + { + } + ~DBSCANTemplate() = default; + + DBSCANTemplate(const DBSCANTemplate&) = delete; // Copy Constructor Not Implemented + void operator=(const DBSCANTemplate&) = delete; // Move assignment Not Implemented + + // ----------------------------------------------------------------------------- + void operator()() + { + usize numTuples = m_InputDataStore.getNumberOfTuples(); + usize numCompDims = m_InputDataStore.getNumberOfComponents(); + std::vector visited(numTuples, false); // Uses one bit per value for space efficiency + std::vector clustered(numTuples, false); // Uses one bit per value for space efficiency + + auto minDist = static_cast(m_Epsilon); + int32 cluster = 0; + + std::vector> epsilonNeighborhoods; + + if constexpr(PrecacheV) + { + // In-memory only with current implementation for speed with std::list + epsilonNeighborhoods = std::vector>(numTuples); + + m_Filter->updateProgress("Finding Neighborhoods in parallel..."); + ParallelDataAlgorithm dataAlg; + dataAlg.setRange(0ULL, numTuples); + dataAlg.execute(FindEpsilonNeighborhoodsImpl(m_Filter, minDist, m_InputDataStore, m_Mask, numCompDims, numTuples, m_DistMetric, epsilonNeighborhoods)); + + m_Filter->updateProgress("Neighborhoods found."); + } + + std::mt19937_64 gen(m_Seed); + std::uniform_int_distribution dist(0, numTuples - 1); + + m_Filter->updateProgress("Beginning clustering..."); + auto start = std::chrono::steady_clock::now(); + usize i = 0; + uint8 misses = 0; + while(std::find(visited.begin(), visited.end(), false) != visited.end()) + { + if(m_Filter->getCancel()) + { + return; + } + + usize index; + if constexpr(!RandomInitV) + { + index = i; + if(i >= numTuples) + { + break; + } + i++; + } + if constexpr(RandomInitV) + { + index = dist(gen); + } + + if(visited[index]) + { + if(misses >= 10) + { + auto findIter = std::find(visited.begin(), visited.end(), false); + if(findIter == visited.end()) + { + break; + } + index = std::distance(visited.begin(), findIter); + + if constexpr(RandomInitV) + { + dist = std::uniform_int_distribution(index, numTuples - 1); + } + } + else + { + misses++; + continue; + } + } + + misses = 0; + + if(m_Mask->isTrue(index)) + { + visited[index] = true; + auto now = std::chrono::steady_clock::now(); + // Only send updates every 1 second + if(std::chrono::duration_cast(now - start).count() > 1000) + { + float32 progress = (static_cast(index) / static_cast(numTuples)) * 100.0f; + m_Filter->updateProgress(fmt::format("Scanning Data || Visited Point {} of {} || {:.2f}% Completed", index, numTuples, progress)); + start = std::chrono::steady_clock::now(); + } + + std::list neighbors; + if constexpr(PrecacheV) + { + neighbors = epsilonNeighborhoods[index]; + } + if constexpr(!PrecacheV) + { + for(usize j = 0; j < numTuples; j++) + { + if(m_Mask->isTrue(j)) + { + float64 distance = ClusterUtilities::GetDistance(m_InputDataStore, (numCompDims * index), m_InputDataStore, (numCompDims * j), numCompDims, m_DistMetric); + if(distance < m_Epsilon) + { + neighbors.push_back(j); + } + } + } + } + + if(static_cast(neighbors.size()) < m_MinPoints) + { + m_FeatureIds[index] = 0; + clustered[index] = true; + } + else + { + if(m_Filter->getCancel()) + { + return; + } + cluster++; + m_FeatureIds[index] = cluster; + clustered[index] = true; + + for(auto&& idx : neighbors) + { + if(m_Mask->isTrue(idx)) + { + if(!visited[idx]) + { + visited[idx] = true; + + std::list neighbors_prime; + if constexpr(PrecacheV) + { + neighbors_prime = epsilonNeighborhoods[idx]; + } + if constexpr(!PrecacheV) + { + for(usize j = 0; j < numTuples; j++) + { + if(m_Mask->isTrue(j)) + { + float64 distance = ClusterUtilities::GetDistance(m_InputDataStore, (numCompDims * idx), m_InputDataStore, (numCompDims * j), numCompDims, m_DistMetric); + if(distance < m_Epsilon) + { + neighbors_prime.push_back(j); + } + } + } + } + + if(static_cast(neighbors_prime.size()) >= m_MinPoints) + { + neighbors.splice(std::end(neighbors), neighbors_prime); + } + } + if(!clustered[idx]) + { + m_FeatureIds[idx] = cluster; + clustered[idx] = true; + } + } + } + } + } + else + { + visited[index] = true; + } + } + m_Filter->updateProgress("Clustering Complete!"); + } + +private: + DBSCAN* m_Filter; + const AbstractDataStoreT& m_InputDataStore; + const std::unique_ptr& m_Mask; + AbstractDataStore& m_FeatureIds; + float32 m_Epsilon; + int32 m_MinPoints; + ClusterUtilities::DistanceMetric m_DistMetric; + std::mt19937_64::result_type m_Seed; +}; + +struct DBSCANFunctor +{ + template + void operator()(bool cache, bool useRandom, DBSCAN* filter, const IDataArray& inputIDataArray, const std::unique_ptr& maskCompare, Int32Array& fIds, float32 epsilon, int32 minPoints, + ClusterUtilities::DistanceMetric distMetric, std::mt19937_64::result_type seed) + { + if(cache) + { + if(useRandom) + { + DBSCANTemplate(filter, dynamic_cast&>(inputIDataArray).getDataStoreRef(), maskCompare, fIds.getDataStoreRef(), epsilon, minPoints, distMetric, seed)(); + } + else + { + DBSCANTemplate(filter, dynamic_cast&>(inputIDataArray).getDataStoreRef(), maskCompare, fIds.getDataStoreRef(), epsilon, minPoints, distMetric, seed)(); + } + } + else + { + if(useRandom) + { + DBSCANTemplate(filter, dynamic_cast&>(inputIDataArray).getDataStoreRef(), maskCompare, fIds.getDataStoreRef(), epsilon, minPoints, distMetric, seed)(); + } + else + { + DBSCANTemplate(filter, dynamic_cast&>(inputIDataArray).getDataStoreRef(), maskCompare, fIds.getDataStoreRef(), epsilon, minPoints, distMetric, seed)(); + } + } + } +}; +} // namespace + +// ----------------------------------------------------------------------------- +DBSCAN::DBSCAN(DataStructure& dataStructure, const IFilter::MessageHandler& mesgHandler, const std::atomic_bool& shouldCancel, DBSCANInputValues* inputValues) +: m_DataStructure(dataStructure) +, m_InputValues(inputValues) +, m_ShouldCancel(shouldCancel) +, m_MessageHandler(mesgHandler) +{ +} + +// ----------------------------------------------------------------------------- +DBSCAN::~DBSCAN() noexcept = default; + +// ----------------------------------------------------------------------------- +void DBSCAN::updateProgress(const std::string& message) +{ + m_MessageHandler(IFilter::Message::Type::Info, message); +} + +// ----------------------------------------------------------------------------- +const std::atomic_bool& DBSCAN::getCancel() +{ + return m_ShouldCancel; +} + +// ----------------------------------------------------------------------------- +Result<> DBSCAN::operator()() +{ + auto& clusteringArray = m_DataStructure.getDataRefAs(m_InputValues->ClusteringArrayPath); + auto& featureIds = m_DataStructure.getDataRefAs(m_InputValues->FeatureIdsArrayPath); + + std::unique_ptr maskCompare; + try + { + maskCompare = InstantiateMaskCompare(m_DataStructure, m_InputValues->MaskArrayPath); + } catch(const std::out_of_range& exception) + { + // This really should NOT be happening as the path was verified during preflight BUT we may be calling this from + // somewhere else that is NOT going through the normal nx::core::IFilter API of Preflight and Execute + std::string message = fmt::format("Mask Array DataPath does not exist or is not of the correct type (Bool | UInt8) {}", m_InputValues->MaskArrayPath.toString()); + return MakeErrorResult(-54060, message); + } + + ExecuteNeighborFunction(DBSCANFunctor{}, clusteringArray.getDataType(), m_InputValues->AllowCaching, m_InputValues->UseRandom, this, clusteringArray, maskCompare, featureIds, m_InputValues->Epsilon, + m_InputValues->MinPoints, m_InputValues->DistanceMetric, m_InputValues->Seed); + + updateProgress("Resizing Clustering Attribute Matrix..."); + auto& featureIdsDataStore = featureIds.getDataStoreRef(); + int32 maxCluster = *std::max_element(featureIdsDataStore.begin(), featureIdsDataStore.end()); + m_DataStructure.getDataAs(m_InputValues->FeatureAM)->resizeTuples(AttributeMatrix::ShapeType{static_cast(maxCluster + 1)}); + + return {}; +} diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/DBSCAN.hpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/DBSCAN.hpp new file mode 100644 index 0000000000..2dfac37a98 --- /dev/null +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/DBSCAN.hpp @@ -0,0 +1,57 @@ +#pragma once + +#include "SimplnxCore/SimplnxCore_export.hpp" + +#include "simplnx/DataStructure/DataPath.hpp" +#include "simplnx/DataStructure/DataStructure.hpp" +#include "simplnx/Filter/IFilter.hpp" +#include "simplnx/Parameters/ArrayCreationParameter.hpp" +#include "simplnx/Parameters/ArraySelectionParameter.hpp" +#include "simplnx/Parameters/ChoicesParameter.hpp" +#include "simplnx/Parameters/NumberParameter.hpp" +#include "simplnx/Utilities/ClusteringUtilities.hpp" + +#include + +namespace nx::core +{ +struct SIMPLNXCORE_EXPORT DBSCANInputValues +{ + DataPath ClusteringArrayPath; + DataPath MaskArrayPath; + DataPath FeatureIdsArrayPath; + float32 Epsilon; + int32 MinPoints; + ClusterUtilities::DistanceMetric DistanceMetric; + DataPath FeatureAM; + bool AllowCaching; + bool UseRandom; + std::mt19937_64::result_type Seed; +}; + +/** + * @class + */ +class SIMPLNXCORE_EXPORT DBSCAN +{ +public: + DBSCAN(DataStructure& dataStructure, const IFilter::MessageHandler& mesgHandler, const std::atomic_bool& shouldCancel, DBSCANInputValues* inputValues); + ~DBSCAN() noexcept; + + DBSCAN(const DBSCAN&) = delete; + DBSCAN(DBSCAN&&) noexcept = delete; + DBSCAN& operator=(const DBSCAN&) = delete; + DBSCAN& operator=(DBSCAN&&) noexcept = delete; + + Result<> operator()(); + void updateProgress(const std::string& message); + const std::atomic_bool& getCancel(); + +private: + DataStructure& m_DataStructure; + const DBSCANInputValues* m_InputValues = nullptr; + const std::atomic_bool& m_ShouldCancel; + const IFilter::MessageHandler& m_MessageHandler; +}; + +} // namespace nx::core diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/Silhouette.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/Silhouette.cpp index 58cc0b4497..03b1806d1d 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/Silhouette.cpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/Silhouette.cpp @@ -1,9 +1,9 @@ #include "Silhouette.hpp" #include "simplnx/DataStructure/DataArray.hpp" +#include "simplnx/Utilities/ClusteringUtilities.hpp" #include "simplnx/Utilities/DataArrayUtilities.hpp" #include "simplnx/Utilities/FilterUtilities.hpp" -#include "simplnx/Utilities/KUtilities.hpp" #include @@ -27,7 +27,7 @@ class SilhouetteTemplate } SilhouetteTemplate(const IDataArray& inputIDataArray, Float64Array& outputDataArray, const std::unique_ptr& maskDataArray, usize numClusters, const Int32Array& featureIds, - KUtilities::DistanceMetric distMetric) + ClusterUtilities::DistanceMetric distMetric) : m_InputData(dynamic_cast(inputIDataArray)) , m_OutputData(outputDataArray) , m_Mask(maskDataArray) @@ -71,7 +71,7 @@ class SilhouetteTemplate { if(m_Mask->isTrue(j)) { - clusterDist[i][m_FeatureIds[j]] += KUtilities::GetDistance(m_InputData, (numCompDims * i), m_InputData, (numCompDims * j), numCompDims, m_DistMetric); + clusterDist[i][m_FeatureIds[j]] += ClusterUtilities::GetDistance(m_InputData, (numCompDims * i), m_InputData, (numCompDims * j), numCompDims, m_DistMetric); } } } @@ -127,7 +127,7 @@ class SilhouetteTemplate const Int32Array& m_FeatureIds; const std::unique_ptr& m_Mask; usize m_NumClusters; - KUtilities::DistanceMetric m_DistMetric; + ClusterUtilities::DistanceMetric m_DistMetric; }; } // namespace diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/Silhouette.hpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/Silhouette.hpp index 85a5b2cf55..e7c62f0b99 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/Silhouette.hpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/Silhouette.hpp @@ -8,13 +8,13 @@ #include "simplnx/Parameters/ArrayCreationParameter.hpp" #include "simplnx/Parameters/ArraySelectionParameter.hpp" #include "simplnx/Parameters/ChoicesParameter.hpp" -#include "simplnx/Utilities/KUtilities.hpp" +#include "simplnx/Utilities/ClusteringUtilities.hpp" namespace nx::core { struct SIMPLNXCORE_EXPORT SilhouetteInputValues { - KUtilities::DistanceMetric DistanceMetric; + ClusterUtilities::DistanceMetric DistanceMetric; DataPath ClusteringArrayPath; DataPath MaskArrayPath; DataPath FeatureIdsArrayPath; diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeKMeansFilter.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeKMeansFilter.cpp index cfc4eff4f4..b51b7419a1 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeKMeansFilter.cpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeKMeansFilter.cpp @@ -15,7 +15,7 @@ #include "simplnx/Parameters/DataGroupCreationParameter.hpp" #include "simplnx/Parameters/DataObjectNameParameter.hpp" #include "simplnx/Parameters/NumberParameter.hpp" -#include "simplnx/Utilities/KUtilities.hpp" +#include "simplnx/Utilities/ClusteringUtilities.hpp" #include "simplnx/Utilities/SIMPLConversion.hpp" @@ -75,7 +75,7 @@ Parameters ComputeKMeansFilter::parameters() const params.insert(std::make_unique(k_InitClusters_Key, "Number of Clusters", "This will be the tuple size for Cluster Attribute Matrix and the values within", 0)); params.insert( - std::make_unique(k_DistanceMetric_Key, "Distance Metric", "Distance Metric type to be used for calculations", to_underlying(KUtilities::DistanceMetric::Euclidean), + std::make_unique(k_DistanceMetric_Key, "Distance Metric", "Distance Metric type to be used for calculations", to_underlying(ClusterUtilities::DistanceMetric::Euclidean), ChoicesParameter::Choices{"Euclidean", "Squared Euclidean", "Manhattan", "Cosine", "Pearson", "Squared Pearson"})); // sequence dependent DO NOT REORDER params.insertSeparator(Parameters::Separator{"Optional Data Mask"}); @@ -187,7 +187,7 @@ Result<> ComputeKMeansFilter::executeImpl(DataStructure& dataStructure, const Ar ComputeKMeansInputValues inputValues; inputValues.InitClusters = filterArgs.value(k_InitClusters_Key); - inputValues.DistanceMetric = static_cast(filterArgs.value(k_DistanceMetric_Key)); + inputValues.DistanceMetric = static_cast(filterArgs.value(k_DistanceMetric_Key)); inputValues.MaskArrayPath = maskPath; inputValues.MeansArrayPath = filterArgs.value(k_FeatureAMPath_Key).createChildPath(filterArgs.value(k_MeansArrayName_Key)); inputValues.Seed = seed; diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeKMedoidsFilter.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeKMedoidsFilter.cpp index afe20e1798..3a8fa961b4 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeKMedoidsFilter.cpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeKMedoidsFilter.cpp @@ -15,7 +15,7 @@ #include "simplnx/Parameters/DataGroupCreationParameter.hpp" #include "simplnx/Parameters/DataObjectNameParameter.hpp" #include "simplnx/Parameters/NumberParameter.hpp" -#include "simplnx/Utilities/KUtilities.hpp" +#include "simplnx/Utilities/ClusteringUtilities.hpp" #include "simplnx/Utilities/SIMPLConversion.hpp" @@ -77,7 +77,7 @@ Parameters ComputeKMedoidsFilter::parameters() const DataPath{}, ArraySelectionParameter::AllowedTypes{DataType::boolean, DataType::uint8})); params.insert(std::make_unique(k_InitClusters_Key, "Number of Clusters", "This will be the tuple size for Cluster Attribute Matrix and the values within", 0)); params.insert( - std::make_unique(k_DistanceMetric_Key, "Distance Metric", "Distance Metric type to be used for calculations", to_underlying(KUtilities::DistanceMetric::Euclidean), + std::make_unique(k_DistanceMetric_Key, "Distance Metric", "Distance Metric type to be used for calculations", to_underlying(ClusterUtilities::DistanceMetric::Euclidean), ChoicesParameter::Choices{"Euclidean", "Squared Euclidean", "Manhattan", "Cosine", "Pearson", "Squared Pearson"})); // sequence dependent DO NOT REORDER params.insertSeparator(Parameters::Separator{"Input Data Objects"}); @@ -183,7 +183,7 @@ Result<> ComputeKMedoidsFilter::executeImpl(DataStructure& dataStructure, const KMedoidsInputValues inputValues; inputValues.InitClusters = filterArgs.value(k_InitClusters_Key); - inputValues.DistanceMetric = static_cast(filterArgs.value(k_DistanceMetric_Key)); + inputValues.DistanceMetric = static_cast(filterArgs.value(k_DistanceMetric_Key)); inputValues.MaskArrayPath = maskPath; inputValues.MedoidsArrayPath = filterArgs.value(k_FeatureAMPath_Key).createChildPath(filterArgs.value(k_MedoidsArrayName_Key)); inputValues.Seed = seed; diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/DBSCANFilter.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/DBSCANFilter.cpp new file mode 100644 index 0000000000..84d1219ddf --- /dev/null +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/DBSCANFilter.cpp @@ -0,0 +1,245 @@ +#include "DBSCANFilter.hpp" + +#include "SimplnxCore/Filters/Algorithms/DBSCAN.hpp" + +#include "simplnx/Common/TypeTraits.hpp" +#include "simplnx/DataStructure/DataArray.hpp" +#include "simplnx/DataStructure/DataPath.hpp" +#include "simplnx/DataStructure/IDataArray.hpp" +#include "simplnx/Filter/Actions/CreateArrayAction.hpp" +#include "simplnx/Filter/Actions/CreateAttributeMatrixAction.hpp" +#include "simplnx/Filter/Actions/DeleteDataAction.hpp" +#include "simplnx/Parameters/ArraySelectionParameter.hpp" +#include "simplnx/Parameters/BoolParameter.hpp" +#include "simplnx/Parameters/ChoicesParameter.hpp" +#include "simplnx/Parameters/DataGroupCreationParameter.hpp" +#include "simplnx/Parameters/DataObjectNameParameter.hpp" +#include "simplnx/Parameters/NumberParameter.hpp" +#include "simplnx/Utilities/ClusteringUtilities.hpp" + +#include "simplnx/Utilities/SIMPLConversion.hpp" + +#include + +using namespace nx::core; + +namespace +{ +const std::string k_MaskName = "temp_mask"; +} // namespace + +namespace nx::core +{ +//------------------------------------------------------------------------------ +std::string DBSCANFilter::name() const +{ + return FilterTraits::name.str(); +} + +//------------------------------------------------------------------------------ +std::string DBSCANFilter::className() const +{ + return FilterTraits::className; +} + +//------------------------------------------------------------------------------ +Uuid DBSCANFilter::uuid() const +{ + return FilterTraits::uuid; +} + +//------------------------------------------------------------------------------ +std::string DBSCANFilter::humanName() const +{ + return "DBSCAN"; +} + +//------------------------------------------------------------------------------ +std::vector DBSCANFilter::defaultTags() const +{ + return {className(), "DBSCAN", "Clustering", "Segmentation", "Statistics"}; +} + +//------------------------------------------------------------------------------ +Parameters DBSCANFilter::parameters() const +{ + Parameters params; + + // Create the parameter descriptors that are needed for this filter + params.insertSeparator(Parameters::Separator{"Random Number Seed Parameters"}); + params.insertLinkableParameter(std::make_unique(k_InitTypeIndex_Key, "Initialization Type", + "Whether to use random or iterative for start state. See Documentation for further detail", to_underlying(AlgType::SeededRandom), + ChoicesParameter::Choices{"Iterative", "Random", "Seeded Random"})); // sequence dependent DO NOT REORDER + params.insert(std::make_unique>(k_SeedValue_Key, "Seed Value", "The seed fed into the random generator", std::mt19937::default_seed)); + params.insert(std::make_unique(k_SeedArrayName_Key, "Stored Seed Value Array Name", "Name of array holding the seed value", "DBSCAN SeedValue")); + + params.insertSeparator(Parameters::Separator{"Input Parameter(s)"}); + params.insert(std::make_unique(k_UsePrecaching_Key, "Use Precaching", "If true the algorithm will be significantly faster, but it requires more memory", true)); + params.insert(std::make_unique(k_Epsilon_Key, "Epsilon", "The epsilon-neighborhood around each point is queried", 0.0001)); + params.insert(std::make_unique(k_MinPoints_Key, "Minimum Points", + "The minimum number of points needed to form a 'dense region' (i.e., the minimum number of points needed to be called a cluster)", 2)); + params.insert( + std::make_unique(k_DistanceMetric_Key, "Distance Metric", "Distance Metric type to be used for calculations", to_underlying(ClusterUtilities::DistanceMetric::Euclidean), + ChoicesParameter::Choices{"Euclidean", "Squared Euclidean", "Manhattan", "Cosine", "Pearson", "Squared Pearson"})); // sequence dependent DO NOT REORDER + + params.insertSeparator(Parameters::Separator{"Optional Data Mask"}); + params.insertLinkableParameter(std::make_unique(k_UseMask_Key, "Use Mask Array", "Specifies whether or not to use a mask array", false)); + params.insert(std::make_unique(k_MaskArrayPath_Key, "Cell Mask Array", + "DataPath to the boolean or uint8 mask array. Values that are true will mark that cell/point as usable.", DataPath{}, + ArraySelectionParameter::AllowedTypes{DataType::boolean, DataType::uint8})); + + params.insertSeparator(Parameters::Separator{"Input Data Objects"}); + params.insert(std::make_unique(k_SelectedArrayPath_Key, "Attribute Array to Cluster", "The data array to cluster", DataPath{}, nx::core::GetAllNumericTypes())); + + params.insertSeparator(Parameters::Separator{"Output Data Object(s)"}); + params.insert(std::make_unique(k_FeatureIdsArrayName_Key, "Cluster Ids Array Name", "Name of the ids array to be created in Attribute Array to Cluster's parent group", + "Cluster Ids")); + params.insert( + std::make_unique(k_FeatureAMPath_Key, "Cluster Attribute Matrix", "The complete path to the attribute matrix in which to store to hold Cluster Data", DataPath{})); + + // Associate the Linkable Parameter(s) to the children parameters that they control + params.linkParameters(k_InitTypeIndex_Key, k_SeedArrayName_Key, static_cast(to_underlying(AlgType::Random))); + params.linkParameters(k_InitTypeIndex_Key, k_SeedValue_Key, static_cast(to_underlying(AlgType::SeededRandom))); + params.linkParameters(k_InitTypeIndex_Key, k_SeedArrayName_Key, static_cast(to_underlying(AlgType::SeededRandom))); + params.linkParameters(k_UseMask_Key, k_MaskArrayPath_Key, true); + + return params; +} + +//------------------------------------------------------------------------------ +IFilter::UniquePointer DBSCANFilter::clone() const +{ + return std::make_unique(); +} + +//------------------------------------------------------------------------------ +IFilter::PreflightResult DBSCANFilter::preflightImpl(const DataStructure& dataStructure, const Arguments& filterArgs, const MessageHandler& messageHandler, const std::atomic_bool& shouldCancel) const +{ + auto pEpsilonValue = filterArgs.value(k_Epsilon_Key); + auto pMinPointsValue = filterArgs.value(k_MinPoints_Key); + auto pUseMaskValue = filterArgs.value(k_UseMask_Key); + auto pSelectedArrayPathValue = filterArgs.value(k_SelectedArrayPath_Key); + auto pMaskArrayPathValue = filterArgs.value(k_MaskArrayPath_Key); + auto pFeatureIdsArrayNameValue = filterArgs.value(k_FeatureIdsArrayName_Key); + auto pFeatureAMPathValue = filterArgs.value(k_FeatureAMPath_Key); + + PreflightResult preflightResult; + nx::core::Result resultOutputActions; + std::vector preflightUpdatedValues; + + auto clusterArray = dataStructure.getDataAs(pSelectedArrayPathValue); + if(clusterArray == nullptr) + { + return MakePreflightErrorResult(-7585, "Array to Cluster MUST be a valid DataPath."); + } + + { + auto createAction = std::make_unique(DataType::int32, clusterArray->getTupleShape(), std::vector{1}, pSelectedArrayPathValue.replaceName(pFeatureIdsArrayNameValue)); + resultOutputActions.value().appendAction(std::move(createAction)); + } + + if(!pUseMaskValue) + { + DataPath tempPath = DataPath({k_MaskName}); + { + auto createAction = std::make_unique(DataType::boolean, clusterArray->getTupleShape(), std::vector{1}, tempPath); + resultOutputActions.value().appendAction(std::move(createAction)); + } + + resultOutputActions.value().appendDeferredAction(std::make_unique(tempPath)); + } + + // Resized later + { + auto createAction = std::make_unique(pFeatureAMPathValue, std::vector{1}); + resultOutputActions.value().appendAction(std::move(createAction)); + } + + // For caching seed run to run + if(static_cast(filterArgs.value(k_InitTypeIndex_Key)) != AlgType::Iterative) + { + auto createAction = std::make_unique(DataType::uint64, std::vector{1}, std::vector{1}, DataPath({filterArgs.value(k_SeedArrayName_Key)})); + resultOutputActions.value().appendAction(std::move(createAction)); + } + + // Return both the resultOutputActions and the preflightUpdatedValues via std::move() + return {std::move(resultOutputActions), std::move(preflightUpdatedValues)}; +} + +//------------------------------------------------------------------------------ +Result<> DBSCANFilter::executeImpl(DataStructure& dataStructure, const Arguments& filterArgs, const PipelineFilter* pipelineNode, const MessageHandler& messageHandler, + const std::atomic_bool& shouldCancel) const +{ + auto maskPath = filterArgs.value(k_MaskArrayPath_Key); + if(!filterArgs.value(k_UseMask_Key)) + { + maskPath = DataPath({k_MaskName}); + dataStructure.getDataRefAs(maskPath).fill(true); + } + + auto seed = filterArgs.value(k_SeedValue_Key); + if(static_cast(filterArgs.value(k_InitTypeIndex_Key)) != AlgType::SeededRandom) + { + seed = static_cast(std::chrono::steady_clock::now().time_since_epoch().count()); + } + + if(static_cast(filterArgs.value(k_InitTypeIndex_Key)) != AlgType::Iterative) + { + // Store Seed Value in Top Level Array + dataStructure.getDataRefAs(DataPath({filterArgs.value(k_SeedArrayName_Key)}))[0] = seed; + } + + DBSCANInputValues inputValues; + + inputValues.Epsilon = filterArgs.value(k_Epsilon_Key); + inputValues.MinPoints = filterArgs.value(k_MinPoints_Key); + inputValues.DistanceMetric = static_cast(filterArgs.value(k_DistanceMetric_Key)); + inputValues.MaskArrayPath = maskPath; + inputValues.ClusteringArrayPath = filterArgs.value(k_SelectedArrayPath_Key); + auto fIdsPath = inputValues.ClusteringArrayPath.replaceName(filterArgs.value(k_FeatureIdsArrayName_Key)); + dataStructure.getDataAs(fIdsPath)->fill(0); + inputValues.FeatureIdsArrayPath = fIdsPath; + inputValues.FeatureAM = filterArgs.value(k_FeatureAMPath_Key); + inputValues.AllowCaching = filterArgs.value(k_UsePrecaching_Key); + inputValues.UseRandom = static_cast(filterArgs.value(k_InitTypeIndex_Key)) != AlgType::Iterative; + inputValues.Seed = filterArgs.value(k_SeedValue_Key); + + return DBSCAN(dataStructure, messageHandler, shouldCancel, &inputValues)(); +} + +namespace +{ +namespace SIMPL +{ +constexpr StringLiteral k_EpsilonKey = "Epsilon"; +constexpr StringLiteral k_MinPointsKey = "MinPnts"; +constexpr StringLiteral k_DistanceMetricKey = "DistanceMetric"; +constexpr StringLiteral k_UseMaskKey = "UseMask"; +constexpr StringLiteral k_SelectedArrayPathKey = "SelectedArrayPath"; +constexpr StringLiteral k_MaskArrayPathKey = "MaskArrayPath"; +constexpr StringLiteral k_FeatureIdsArrayNameKey = "FeatureIdsArrayName"; +constexpr StringLiteral k_FeatureAttributeMatrixNameKey = "FeatureAttributeMatrixName"; +} // namespace SIMPL +} // namespace + +Result DBSCANFilter::FromSIMPLJson(const nlohmann::json& json) +{ + Arguments args = DBSCANFilter().getDefaultArguments(); + + std::vector> results; + + results.push_back(SIMPLConversion::ConvertParameter>(args, json, SIMPL::k_EpsilonKey, k_Epsilon_Key)); + results.push_back(SIMPLConversion::ConvertParameter>(args, json, SIMPL::k_MinPointsKey, k_MinPoints_Key)); + results.push_back(SIMPLConversion::ConvertParameter(args, json, SIMPL::k_DistanceMetricKey, k_DistanceMetric_Key)); + results.push_back(SIMPLConversion::ConvertParameter(args, json, SIMPL::k_UseMaskKey, k_UseMask_Key)); + results.push_back(SIMPLConversion::ConvertParameter(args, json, SIMPL::k_SelectedArrayPathKey, k_SelectedArrayPath_Key)); + results.push_back(SIMPLConversion::ConvertParameter(args, json, SIMPL::k_MaskArrayPathKey, k_MaskArrayPath_Key)); + results.push_back(SIMPLConversion::ConvertParameter(args, json, SIMPL::k_FeatureIdsArrayNameKey, k_FeatureIdsArrayName_Key)); + results.push_back(SIMPLConversion::Convert2Parameters(args, json, SIMPL::k_SelectedArrayPathKey, SIMPL::k_FeatureAttributeMatrixNameKey, + k_FeatureAMPath_Key)); + + Result<> conversionResult = MergeResults(std::move(results)); + + return ConvertResultTo(std::move(conversionResult), std::move(args)); +} +} // namespace nx::core diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/DBSCANFilter.hpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/DBSCANFilter.hpp new file mode 100644 index 0000000000..babeaf813c --- /dev/null +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/DBSCANFilter.hpp @@ -0,0 +1,122 @@ +#pragma once + +#include "SimplnxCore/SimplnxCore_export.hpp" + +#include "simplnx/Filter/FilterTraits.hpp" +#include "simplnx/Filter/IFilter.hpp" + +namespace nx::core +{ +enum AlgType +{ + Iterative, + Random, + SeededRandom +}; + +/** + * @class DBSCANFilter + * @brief This filter will .... + */ +class SIMPLNXCORE_EXPORT DBSCANFilter : public IFilter +{ +public: + DBSCANFilter() = default; + ~DBSCANFilter() noexcept override = default; + + DBSCANFilter(const DBSCANFilter&) = delete; + DBSCANFilter(DBSCANFilter&&) noexcept = delete; + + DBSCANFilter& operator=(const DBSCANFilter&) = delete; + DBSCANFilter& operator=(DBSCANFilter&&) noexcept = delete; + + // Parameter Keys + static inline constexpr StringLiteral k_InitTypeIndex_Key = "init_type_index"; + static inline constexpr StringLiteral k_SeedValue_Key = "seed_value"; + static inline constexpr StringLiteral k_SeedArrayName_Key = "seed_array_name"; + static inline constexpr StringLiteral k_UsePrecaching_Key = "use_precaching"; + static inline constexpr StringLiteral k_Epsilon_Key = "epsilon"; + static inline constexpr StringLiteral k_MinPoints_Key = "min_points"; + static inline constexpr StringLiteral k_DistanceMetric_Key = "distance_metric_index"; + static inline constexpr StringLiteral k_UseMask_Key = "use_mask"; + static inline constexpr StringLiteral k_SelectedArrayPath_Key = "selected_array_path"; + static inline constexpr StringLiteral k_MaskArrayPath_Key = "mask_array_path"; + static inline constexpr StringLiteral k_FeatureIdsArrayName_Key = "feature_ids_array_name"; + static inline constexpr StringLiteral k_FeatureAMPath_Key = "feature_attribute_matrix_path"; + + /** + * @brief Reads SIMPL json and converts it simplnx Arguments. + * @param json + * @return Result + */ + static Result FromSIMPLJson(const nlohmann::json& json); + + /** + * @brief Returns the name of the filter. + * @return + */ + std::string name() const override; + + /** + * @brief Returns the C++ classname of this filter. + * @return + */ + std::string className() const override; + + /** + * @brief Returns the uuid of the filter. + * @return + */ + Uuid uuid() const override; + + /** + * @brief Returns the human readable name of the filter. + * @return + */ + std::string humanName() const override; + + /** + * @brief Returns the default tags for this filter. + * @return + */ + std::vector defaultTags() const override; + + /** + * @brief Returns the parameters of the filter (i.e. its inputs) + * @return + */ + Parameters parameters() const override; + + /** + * @brief Returns a copy of the filter. + * @return + */ + UniquePointer clone() const override; + +protected: + /** + * @brief Takes in a DataStructure and checks that the filter can be run on it with the given arguments. + * Returns any warnings/errors. Also returns the changes that would be applied to the DataStructure. + * Some parts of the actions may not be completely filled out if all the required information is not available at preflight time. + * @param ds The input DataStructure instance + * @param filterArgs These are the input values for each parameter that is required for the filter + * @param messageHandler The MessageHandler object + * @return Returns a Result object with error or warning values if any of those occurred during execution of this function + */ + PreflightResult preflightImpl(const DataStructure& dataStructure, const Arguments& filterArgs, const MessageHandler& messageHandler, const std::atomic_bool& shouldCancel) const override; + + /** + * @brief Applies the filter's algorithm to the DataStructure with the given arguments. Returns any warnings/errors. + * On failure, there is no guarantee that the DataStructure is in a correct state. + * @param ds The input DataStructure instance + * @param filterArgs These are the input values for each parameter that is required for the filter + * @param messageHandler The MessageHandler object + * @return Returns a Result object with error or warning values if any of those occurred during execution of this function + */ + Result<> executeImpl(DataStructure& dataStructure, const Arguments& filterArgs, const PipelineFilter* pipelineNode, const MessageHandler& messageHandler, + const std::atomic_bool& shouldCancel) const override; +}; +} // namespace nx::core + +SIMPLNX_DEF_FILTER_TRAITS(nx::core, DBSCANFilter, "763dad44-fad7-4606-808f-617867257b98"); +/* LEGACY UUID FOR THIS FILTER c2d4f1e8-2b04-5d82-b90f-2191e8f4262e */ diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/SilhouetteFilter.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/SilhouetteFilter.cpp index dc6e054d73..b200b6bacf 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/SilhouetteFilter.cpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/SilhouetteFilter.cpp @@ -15,7 +15,7 @@ #include "simplnx/Utilities/SIMPLConversion.hpp" -#include "simplnx/Utilities/KUtilities.hpp" +#include "simplnx/Utilities/ClusteringUtilities.hpp" using namespace nx::core; @@ -63,7 +63,7 @@ Parameters SilhouetteFilter::parameters() const params.insertSeparator(Parameters::Separator{"Input Parameter(s)"}); params.insert( - std::make_unique(k_DistanceMetric_Key, "Distance Metric", "Distance Metric type to be used for calculations", to_underlying(KUtilities::DistanceMetric::Euclidean), + std::make_unique(k_DistanceMetric_Key, "Distance Metric", "Distance Metric type to be used for calculations", to_underlying(ClusterUtilities::DistanceMetric::Euclidean), ChoicesParameter::Choices{"Euclidean", "Squared Euclidean", "Manhattan", "Cosine", "Pearson", "Squared Pearson"})); // sequence dependent DO NOT REORDER // Create the parameter descriptors that are needed for this filter @@ -147,7 +147,7 @@ Result<> SilhouetteFilter::executeImpl(DataStructure& dataStructure, const Argum SilhouetteInputValues inputValues; - inputValues.DistanceMetric = static_cast(filterArgs.value(k_DistanceMetric_Key)); + inputValues.DistanceMetric = static_cast(filterArgs.value(k_DistanceMetric_Key)); inputValues.ClusteringArrayPath = filterArgs.value(k_SelectedArrayPath_Key); inputValues.MaskArrayPath = maskPath; inputValues.FeatureIdsArrayPath = filterArgs.value(k_FeatureIdsArrayPath_Key); diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/SimplnxCoreLegacyUUIDMapping.hpp b/src/Plugins/SimplnxCore/src/SimplnxCore/SimplnxCoreLegacyUUIDMapping.hpp index 0c3291262b..204699ed83 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/SimplnxCoreLegacyUUIDMapping.hpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/SimplnxCoreLegacyUUIDMapping.hpp @@ -31,6 +31,7 @@ #include "SimplnxCore/Filters/CreateImageGeometryFilter.hpp" #include "SimplnxCore/Filters/CropImageGeometryFilter.hpp" #include "SimplnxCore/Filters/CropVertexGeometryFilter.hpp" +#include "SimplnxCore/Filters/DBSCANFilter.hpp" #include "SimplnxCore/Filters/DeleteDataFilter.hpp" #include "SimplnxCore/Filters/ErodeDilateCoordinationNumberFilter.hpp" #include "SimplnxCore/Filters/ErodeDilateMaskFilter.hpp" @@ -152,7 +153,8 @@ namespace nx::core {nx::core::Uuid::FromString("f2132744-3abb-5d66-9cd9-c9a233b5c4aa").value(), {nx::core::FilterTraits::uuid, &CreateImageGeometryFilter::FromSIMPLJson}}, // CreateImageGeometryFilter {nx::core::Uuid::FromString("baa4b7fe-31e5-5e63-a2cb-0bb9d844cfaf").value(), {nx::core::FilterTraits::uuid, &CropImageGeometryFilter::FromSIMPLJson}}, // CropImageGeometryFilter {nx::core::Uuid::FromString("f28cbf07-f15a-53ca-8c7f-b41a11dae6cc").value(), {nx::core::FilterTraits::uuid, &CropVertexGeometryFilter::FromSIMPLJson}}, // CropVertexGeometryFilter - {nx::core::Uuid::FromString("7b1c8f46-90dd-584a-b3ba-34e16958a7d0").value(), {nx::core::FilterTraits::uuid, &DeleteDataFilter::FromSIMPLJson}}, // RemoveArrays + {nx::core::Uuid::FromString("c2d4f1e8-2b04-5d82-b90f-2191e8f4262e").value(), {nx::core::FilterTraits::uuid, &DBSCANFilter::FromSIMPLJson}}, // DBSCAN + {nx::core::Uuid::FromString("7b1c8f46-90dd-584a-b3ba-34e16958a7d0").value(), {nx::core::FilterTraits::uuid, &DeleteDataFilter::FromSIMPLJson}}, // RemoveArrays {nx::core::Uuid::FromString("3fcd4c43-9d75-5b86-aad4-4441bc914f37").value(), {nx::core::FilterTraits::uuid, &WriteDREAM3DFilter::FromSIMPLJson}}, // DataContainerWriter {nx::core::Uuid::FromString("52a069b4-6a46-5810-b0ec-e0693c636034").value(), {nx::core::FilterTraits::uuid, &ExtractInternalSurfacesFromTriangleGeometryFilter::FromSIMPLJson}}, // ExtractInternalSurfacesFromTriangleGeometryFilter {nx::core::Uuid::FromString("737b8d5a-8622-50f9-9a8a-bfdb57608891").value(), {nx::core::FilterTraits::uuid, &WriteFeatureDataCSVFilter::FromSIMPLJson}}, // FeatureDataCSVWriter diff --git a/src/Plugins/SimplnxCore/test/CMakeLists.txt b/src/Plugins/SimplnxCore/test/CMakeLists.txt index 575cc834d4..463c1e2609 100644 --- a/src/Plugins/SimplnxCore/test/CMakeLists.txt +++ b/src/Plugins/SimplnxCore/test/CMakeLists.txt @@ -56,6 +56,7 @@ set(${PLUGIN_NAME}UnitTest_SRCS CreatePythonSkeletonTest.cpp CropImageGeometryTest.cpp CropVertexGeometryTest.cpp + DBSCANTest.cpp DeleteDataTest.cpp DREAM3DFileTest.cpp ErodeDilateBadDataTest.cpp @@ -233,6 +234,7 @@ if(EXISTS "${DREAM3D_DATA_DIR}" AND SIMPLNX_DOWNLOAD_TEST_FILES) download_test_data(DREAM3D_DATA_DIR ${DREAM3D_DATA_DIR} ARCHIVE_NAME generate_color_table_test.tar.gz SHA512 b5683c758964eb723267400b14047f8adb0d5365ee9ca93d1a6940e9b6ad198cd4739c1ca799eb787b7706e668dbc16ab8243642034cdba5b71d64c27e682d3f) download_test_data(DREAM3D_DATA_DIR ${DREAM3D_DATA_DIR} ARCHIVE_NAME read_vtk_structured_points_test.tar.gz SHA512 e7a07a4e3901204c2562754cd71e0fdba1a46de2a5135bad2b6d66b40eefd0e63bed4dbe0ccd6ccadafb708ef63e20635d080aa3a35c172c4ced6986e0f75d5c) download_test_data(DREAM3D_DATA_DIR ${DREAM3D_DATA_DIR} ARCHIVE_NAME ReadSTLFileTest.tar.gz SHA512 975587206625ffa183160308934e767347de55a34a16272cf5c121114efa286b3c6939e3c6a397e8728fdefe1771bc024bd4c9b409afdff0b76f2f56fcb9eb69) + download_test_data(DREAM3D_DATA_DIR ${DREAM3D_DATA_DIR} ARCHIVE_NAME DBSCAN_tests.tar.gz SHA512 fba3d6c02cde5eeeccad0153032e403c20826365c2362351b8e1048b3d385480e7fba828748196e03ffa95aff569420630b0506441d9da9893d25db40390fdf0) endif() # ----------------------------------------------------------------------------- diff --git a/src/Plugins/SimplnxCore/test/DBSCANTest.cpp b/src/Plugins/SimplnxCore/test/DBSCANTest.cpp new file mode 100644 index 0000000000..154586a75a --- /dev/null +++ b/src/Plugins/SimplnxCore/test/DBSCANTest.cpp @@ -0,0 +1,225 @@ +#include + +#include "simplnx/DataStructure/DataArray.hpp" +#include "simplnx/Parameters/ChoicesParameter.hpp" +#include "simplnx/UnitTest/UnitTestCommon.hpp" + +#include "SimplnxCore/Filters/DBSCANFilter.hpp" +#include "SimplnxCore/SimplnxCore_test_dirs.hpp" + +#include +namespace fs = std::filesystem; + +using namespace nx::core; + +namespace +{ +constexpr std::array k_CircleIndexes = {553, 554, 555, 557, 601, 602, 649, 651, 696, 697, 742, 744}; +constexpr std::array k_TriangleIndexes = {556, 600, 603, 647, 694, 743, 745}; +constexpr std::array k_XIndexes = {604, 648, 650, 695, 698, 741}; + +const std::string k_TargetArrayName = "STRAIN"; +const std::string k_ClusterDataNX = "ClusterAM"; + +const DataPath k_GeomPath = DataPath({"6_6_DBSCAN"}); +const DataPath k_CellPath = k_GeomPath.createChildPath(Constants::k_CellData); +const DataPath k_TargetArrayPath = k_CellPath.createChildPath(k_TargetArrayName); +const DataPath k_ClusterDataPathNX = k_GeomPath.createChildPath(k_ClusterDataNX); + +const std::string k_ClusterIdsName = "ClusterIds"; +const std::string k_ClusterIdsNameNX = k_ClusterIdsName + "NX"; + +const DataPath k_ClusterIdsPath = k_CellPath.createChildPath(k_ClusterIdsName); +const DataPath k_ClusterIdsPathNX = k_CellPath.createChildPath(k_ClusterIdsNameNX); +} // namespace + +TEST_CASE("SimplnxCore::DBSCAN: Valid Filter Execution (Precached, Iterative)", "[SimplnxCore][DBSCAN]") +{ + const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_CMakeExecutable, nx::core::unit_test::k_TestFilesDir, "DBSCAN_tests.tar.gz", "DBSCAN_tests"); + DataStructure dataStructure = UnitTest::LoadDataStructure(fs::path(fmt::format("{}/DBSCAN_tests/default/6_5_DBSCAN_Data.dream3d", unit_test::k_TestFilesDir))); + + { + // Instantiate the filter and an Arguments Object + DBSCANFilter filter; + Arguments args; + + // Create default Parameters for the filter. + args.insertOrAssign(DBSCANFilter::k_InitTypeIndex_Key, std::make_any(to_underlying(AlgType::Iterative))); + args.insertOrAssign(DBSCANFilter::k_UsePrecaching_Key, std::make_any(true)); + args.insertOrAssign(DBSCANFilter::k_Epsilon_Key, std::make_any(0.01)); + args.insertOrAssign(DBSCANFilter::k_MinPoints_Key, std::make_any(50)); + args.insertOrAssign(DBSCANFilter::k_UseMask_Key, std::make_any(false)); + args.insertOrAssign(DBSCANFilter::k_SelectedArrayPath_Key, std::make_any(k_TargetArrayPath)); + args.insertOrAssign(DBSCANFilter::k_FeatureIdsArrayName_Key, std::make_any(k_ClusterIdsNameNX)); + args.insertOrAssign(DBSCANFilter::k_FeatureAMPath_Key, std::make_any(k_ClusterDataPathNX)); + + // Preflight the filter and check result + auto preflightResult = filter.preflight(dataStructure, args); + REQUIRE(preflightResult.outputActions.valid()); + + // Execute the filter and check the result + auto executeResult = filter.execute(dataStructure, args); + REQUIRE(executeResult.result.valid()); + } + + UnitTest::CompareDataArrays(dataStructure.getDataRefAs(k_ClusterIdsPath), dataStructure.getDataRefAs(k_ClusterIdsPathNX)); + + // Write the DataStructure out to the file system +#ifdef SIMPLNX_WRITE_TEST_OUTPUT + WriteTestDataStructure(dataStructure, fs::path(fmt::format("{}/7_0_DBSCAN_precached_iterative_test.dream3d", unit_test::k_BinaryTestOutputDir))); +#endif +} + +TEST_CASE("SimplnxCore::DBSCAN: Valid Filter Execution (uncached, Iterative)", "[SimplnxCore][DBSCAN]") +{ + const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_CMakeExecutable, nx::core::unit_test::k_TestFilesDir, "DBSCAN_tests.tar.gz", "DBSCAN_tests"); + DataStructure dataStructure = UnitTest::LoadDataStructure(fs::path(fmt::format("{}/DBSCAN_tests/default/6_5_DBSCAN_Data.dream3d", unit_test::k_TestFilesDir))); + + { + // Instantiate the filter and an Arguments Object + DBSCANFilter filter; + Arguments args; + + // Create default Parameters for the filter. + args.insertOrAssign(DBSCANFilter::k_InitTypeIndex_Key, std::make_any(to_underlying(AlgType::Iterative))); + args.insertOrAssign(DBSCANFilter::k_UsePrecaching_Key, std::make_any(false)); + args.insertOrAssign(DBSCANFilter::k_Epsilon_Key, std::make_any(0.01)); + args.insertOrAssign(DBSCANFilter::k_MinPoints_Key, std::make_any(50)); + args.insertOrAssign(DBSCANFilter::k_UseMask_Key, std::make_any(false)); + args.insertOrAssign(DBSCANFilter::k_SelectedArrayPath_Key, std::make_any(k_TargetArrayPath)); + args.insertOrAssign(DBSCANFilter::k_FeatureIdsArrayName_Key, std::make_any(k_ClusterIdsNameNX)); + args.insertOrAssign(DBSCANFilter::k_FeatureAMPath_Key, std::make_any(k_ClusterDataPathNX)); + + // Preflight the filter and check result + auto preflightResult = filter.preflight(dataStructure, args); + REQUIRE(preflightResult.outputActions.valid()); + + // Execute the filter and check the result + auto executeResult = filter.execute(dataStructure, args); + REQUIRE(executeResult.result.valid()); + } + + UnitTest::CompareDataArrays(dataStructure.getDataRefAs(k_ClusterIdsPath), dataStructure.getDataRefAs(k_ClusterIdsPathNX)); + + // Write the DataStructure out to the file system +#ifdef SIMPLNX_WRITE_TEST_OUTPUT + WriteTestDataStructure(dataStructure, fs::path(fmt::format("{}/7_0_DBSCAN_uncached_iterative_test.dream3d", unit_test::k_BinaryTestOutputDir))); +#endif +} + +TEST_CASE("SimplnxCore::DBSCAN: Valid Filter Execution (precached, Random)", "[SimplnxCore][DBSCAN]") +{ + const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_CMakeExecutable, nx::core::unit_test::k_TestFilesDir, "DBSCAN_tests.tar.gz", "DBSCAN_tests"); + DataStructure dataStructure = UnitTest::LoadDataStructure(fs::path(fmt::format("{}/DBSCAN_tests/default/6_5_DBSCAN_Data.dream3d", unit_test::k_TestFilesDir))); + + { + // Instantiate the filter and an Arguments Object + DBSCANFilter filter; + Arguments args; + + // Create default Parameters for the filter. + args.insertOrAssign(DBSCANFilter::k_InitTypeIndex_Key, std::make_any(to_underlying(AlgType::SeededRandom))); + args.insertOrAssign(DBSCANFilter::k_SeedValue_Key, std::make_any(5489)); + args.insertOrAssign(DBSCANFilter::k_UsePrecaching_Key, std::make_any(true)); + args.insertOrAssign(DBSCANFilter::k_Epsilon_Key, std::make_any(0.01)); + args.insertOrAssign(DBSCANFilter::k_MinPoints_Key, std::make_any(50)); + args.insertOrAssign(DBSCANFilter::k_UseMask_Key, std::make_any(false)); + args.insertOrAssign(DBSCANFilter::k_SelectedArrayPath_Key, std::make_any(k_TargetArrayPath)); + args.insertOrAssign(DBSCANFilter::k_FeatureIdsArrayName_Key, std::make_any(k_ClusterIdsNameNX)); + args.insertOrAssign(DBSCANFilter::k_FeatureAMPath_Key, std::make_any(k_ClusterDataPathNX)); + + // Preflight the filter and check result + auto preflightResult = filter.preflight(dataStructure, args); + REQUIRE(preflightResult.outputActions.valid()); + + // Execute the filter and check the result + auto executeResult = filter.execute(dataStructure, args); + REQUIRE(executeResult.result.valid()); + } + + const auto& clusterIdsDataStore = dataStructure.getDataRefAs(k_ClusterIdsPathNX).getDataStoreRef(); + REQUIRE(*std::max_element(clusterIdsDataStore.cbegin(), clusterIdsDataStore.cend()) == 1); + + // Write the DataStructure out to the file system +#ifdef SIMPLNX_WRITE_TEST_OUTPUT + WriteTestDataStructure(dataStructure, fs::path(fmt::format("{}/7_0_DBSCAN_precached_random_test.dream3d", unit_test::k_BinaryTestOutputDir))); +#endif +} + +TEST_CASE("SimplnxCore::DBSCAN: Valid Filter Execution (uncached, Random)", "[SimplnxCore][DBSCAN]") +{ + const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_CMakeExecutable, nx::core::unit_test::k_TestFilesDir, "DBSCAN_tests.tar.gz", "DBSCAN_tests"); + DataStructure dataStructure = UnitTest::LoadDataStructure(fs::path(fmt::format("{}/DBSCAN_tests/default/6_5_DBSCAN_Data.dream3d", unit_test::k_TestFilesDir))); + + { + // Instantiate the filter and an Arguments Object + DBSCANFilter filter; + Arguments args; + + // Create default Parameters for the filter. + args.insertOrAssign(DBSCANFilter::k_InitTypeIndex_Key, std::make_any(to_underlying(AlgType::SeededRandom))); + args.insertOrAssign(DBSCANFilter::k_SeedValue_Key, std::make_any(5489)); + args.insertOrAssign(DBSCANFilter::k_UsePrecaching_Key, std::make_any(false)); + args.insertOrAssign(DBSCANFilter::k_Epsilon_Key, std::make_any(0.01)); + args.insertOrAssign(DBSCANFilter::k_MinPoints_Key, std::make_any(50)); + args.insertOrAssign(DBSCANFilter::k_UseMask_Key, std::make_any(false)); + args.insertOrAssign(DBSCANFilter::k_SelectedArrayPath_Key, std::make_any(k_TargetArrayPath)); + args.insertOrAssign(DBSCANFilter::k_FeatureIdsArrayName_Key, std::make_any(k_ClusterIdsNameNX)); + args.insertOrAssign(DBSCANFilter::k_FeatureAMPath_Key, std::make_any(k_ClusterDataPathNX)); + + // Preflight the filter and check result + auto preflightResult = filter.preflight(dataStructure, args); + REQUIRE(preflightResult.outputActions.valid()); + + // Execute the filter and check the result + auto executeResult = filter.execute(dataStructure, args); + REQUIRE(executeResult.result.valid()); + } + + const auto& clusterIdsDataStore = dataStructure.getDataRefAs(k_ClusterIdsPathNX).getDataStoreRef(); + REQUIRE(*std::max_element(clusterIdsDataStore.cbegin(), clusterIdsDataStore.cend()) == 1); + + // Write the DataStructure out to the file system +#ifdef SIMPLNX_WRITE_TEST_OUTPUT + WriteTestDataStructure(dataStructure, fs::path(fmt::format("{}/7_0_DBSCAN_uncached_random_test.dream3d", unit_test::k_BinaryTestOutputDir))); +#endif +} + +TEST_CASE("SimplnxCore::DBSCAN: Valid Detailed Filter Execution (cached, Random)", "[SimplnxCore][DBSCAN]") +{ + const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_CMakeExecutable, nx::core::unit_test::k_TestFilesDir, "DBSCAN_tests.tar.gz", "DBSCAN_tests"); + DataStructure dataStructure = UnitTest::LoadDataStructure(fs::path(fmt::format("{}/DBSCAN_tests/default/6_5_DBSCAN_Data.dream3d", unit_test::k_TestFilesDir))); + + { + // Instantiate the filter and an Arguments Object + DBSCANFilter filter; + Arguments args; + + // Create default Parameters for the filter. + args.insertOrAssign(DBSCANFilter::k_InitTypeIndex_Key, std::make_any(to_underlying(AlgType::SeededRandom))); + args.insertOrAssign(DBSCANFilter::k_SeedValue_Key, std::make_any(5489)); + args.insertOrAssign(DBSCANFilter::k_UsePrecaching_Key, std::make_any(true)); + args.insertOrAssign(DBSCANFilter::k_Epsilon_Key, std::make_any(0.06)); + args.insertOrAssign(DBSCANFilter::k_MinPoints_Key, std::make_any(100)); + args.insertOrAssign(DBSCANFilter::k_UseMask_Key, std::make_any(false)); + args.insertOrAssign(DBSCANFilter::k_SelectedArrayPath_Key, std::make_any(k_TargetArrayPath)); + args.insertOrAssign(DBSCANFilter::k_FeatureIdsArrayName_Key, std::make_any(k_ClusterIdsNameNX)); + args.insertOrAssign(DBSCANFilter::k_FeatureAMPath_Key, std::make_any(k_ClusterDataPathNX)); + + // Preflight the filter and check result + auto preflightResult = filter.preflight(dataStructure, args); + REQUIRE(preflightResult.outputActions.valid()); + + // Execute the filter and check the result + auto executeResult = filter.execute(dataStructure, args); + REQUIRE(executeResult.result.valid()); + } + + const auto& clusterIdsDataStore = dataStructure.getDataRefAs(k_ClusterIdsPathNX).getDataStoreRef(); + REQUIRE(*std::max_element(clusterIdsDataStore.cbegin(), clusterIdsDataStore.cend()) == 2); + + // Write the DataStructure out to the file system +#ifdef SIMPLNX_WRITE_TEST_OUTPUT + WriteTestDataStructure(dataStructure, fs::path(fmt::format("{}/7_0_DBSCAN_detailed_test.dream3d", unit_test::k_BinaryTestOutputDir))); +#endif +} diff --git a/src/simplnx/Utilities/KUtilities.hpp b/src/simplnx/Utilities/ClusteringUtilities.hpp similarity index 97% rename from src/simplnx/Utilities/KUtilities.hpp rename to src/simplnx/Utilities/ClusteringUtilities.hpp index 08d3dbfa51..42245f5964 100644 --- a/src/simplnx/Utilities/KUtilities.hpp +++ b/src/simplnx/Utilities/ClusteringUtilities.hpp @@ -5,7 +5,7 @@ #include -namespace nx::core::KUtilities +namespace nx::core::ClusterUtilities { enum SIMPLNX_EXPORT DistanceMetric { @@ -134,4 +134,4 @@ float64 GetDistance(const leftDataType& leftVector, usize leftOffset, const righ // Return the correct primitive type for distance return dist; } -} // namespace nx::core::KUtilities +} // namespace nx::core::ClusterUtilities diff --git a/src/simplnx/Utilities/FilterUtilities.hpp b/src/simplnx/Utilities/FilterUtilities.hpp index cbea57c2e7..b99110de1d 100644 --- a/src/simplnx/Utilities/FilterUtilities.hpp +++ b/src/simplnx/Utilities/FilterUtilities.hpp @@ -179,7 +179,7 @@ auto ExecuteNeighborFunction(FuncT&& func, DataType dataType, ArgsT&&... args) throw std::runtime_error("Cannot create a NeighborList of booleans."); } default: { - throw std::runtime_error("nx::core::ExecuteDataFunction<...>(FuncT&& func, DataType dataType, ArgsT&&... args). Error: Invalid DataType"); + throw std::runtime_error("nx::core::ExecuteNeighborFunction<...>(FuncT&& func, DataType dataType, ArgsT&&... args). Error: Invalid DataType"); } } }