Skip to content

Commit

Permalink
extraneous code removal and AbstractDataStore integration for Compute…
Browse files Browse the repository at this point in the history
…FeatureCentroids and ComputeFeatureClustering
  • Loading branch information
nyoungbq committed Jul 23, 2024
1 parent 8613142 commit 25f550a
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 71 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,20 @@ using namespace nx::core;

namespace
{

class ComputeFeatureCentroidsImpl1
{
public:
ComputeFeatureCentroidsImpl1(ComputeFeatureCentroids* filter, double* sum, double* center, size_t* count, std::array<size_t, 3> dims, const nx::core::ImageGeom& imageGeom,
const Int32Array& featureIds)
: m_Filter(filter)
, m_Sum(sum)
ComputeFeatureCentroidsImpl1(double* sum, double* center, size_t* count, std::array<size_t, 3> dims, const nx::core::ImageGeom& imageGeom, const Int32AbstractDataStore& featureIds)
: m_Sum(sum)
, m_Center(center)
, m_Count(count)
, m_Dims(dims)
, m_ImageGeom(imageGeom)
, m_FeatureIds(featureIds.getDataStoreRef())
, m_FeatureIds(featureIds)
{
}
~ComputeFeatureCentroidsImpl1() = default;
void compute(int64_t minFeatureId, int64_t maxFeatureId) const
void compute(usize minFeatureId, usize maxFeatureId) const
{
for(size_t i = 0; i < m_Dims[2]; i++)
{
Expand All @@ -35,7 +32,7 @@ class ComputeFeatureCentroidsImpl1
size_t yStride = j * m_Dims[0];
for(size_t k = 0; k < m_Dims[0]; k++)
{
int32_t featureId = m_FeatureIds[zStride + yStride + k]; // Get the current FeatureId
int32 featureId = m_FeatureIds[zStride + yStride + k]; // Get the current FeatureId
if(featureId < minFeatureId || featureId >= maxFeatureId)
{
continue;
Expand Down Expand Up @@ -76,11 +73,9 @@ class ComputeFeatureCentroidsImpl1
}

private:
ComputeFeatureCentroids* m_Filter = nullptr;
double* m_Sum = nullptr;
double* m_Center = nullptr;
size_t* m_Count = nullptr;
size_t m_TotalFeatures = 0;
std::array<size_t, 3> m_Dims = {0, 0, 0};
const nx::core::ImageGeom& m_ImageGeom;
const Int32AbstractDataStore& m_FeatureIds;
Expand Down Expand Up @@ -111,16 +106,15 @@ const std::atomic_bool& ComputeFeatureCentroids::getCancel()
Result<> ComputeFeatureCentroids::operator()()
{
// Input Cell Data
const auto& featureIds = m_DataStructure.getDataRefAs<Int32Array>(m_InputValues->FeatureIdsArrayPath);
const auto& featureIds = m_DataStructure.getDataAs<Int32Array>(m_InputValues->FeatureIdsArrayPath)->getDataStoreRef();

// Output Feature Data
auto& centroidsArray = m_DataStructure.getDataRefAs<Float32Array>(m_InputValues->CentroidsArrayPath);
auto& centroids = centroidsArray.getDataStoreRef();
auto& centroids = m_DataStructure.getDataAs<Float32Array>(m_InputValues->CentroidsArrayPath)->getDataStoreRef();

// Required Geometry
const auto& imageGeom = m_DataStructure.getDataRefAs<ImageGeom>(m_InputValues->ImageGeometryPath);

size_t totalFeatures = centroidsArray.getNumberOfTuples();
size_t totalFeatures = centroids.getNumberOfTuples();

size_t xPoints = imageGeom.getNumXCells();
size_t yPoints = imageGeom.getNumYCells();
Expand All @@ -138,7 +132,7 @@ Result<> ComputeFeatureCentroids::operator()()
// by the total number of cores/threads and do a ParallelTask Algorithm instead
// we might see some speedup.
dataAlg.setParallelizationEnabled(false);
dataAlg.execute(ComputeFeatureCentroidsImpl1(this, sum.data(), center.data(), count.data(), {xPoints, yPoints, zPoints}, imageGeom, featureIds));
dataAlg.execute(ComputeFeatureCentroidsImpl1(sum.data(), center.data(), count.data(), {xPoints, yPoints, zPoints}, imageGeom, featureIds));

// Here we are only looping over the number of features so let this just go in serial mode.
for(size_t featureId = 0; featureId < totalFeatures; featureId++)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "simplnx/DataStructure/NeighborList.hpp"

#include <random>
#include <simplnx/Utilities/DataArrayUtilities.hpp>

using namespace nx::core;

Expand Down Expand Up @@ -34,14 +35,12 @@ std::vector<float32> GenerateRandomDistribution(float32 minDistance, float32 max

freq.resize(static_cast<size_t>(currentNumBins + 1));

std::random_device randomDevice; // Will be used to obtain a seed for the random number engine
std::mt19937_64 generator(randomDevice()); // Standard mersenne_twister_engine seeded with rd()
generator.seed(userSeedValue);
std::mt19937_64 generator(userSeedValue); // Standard mersenne_twister_engine seeded
std::uniform_real_distribution<double> distribution(0.0, 1.0);

randomCentroids.resize(largeNumber * 3);

// Generating all of the random points and storing their coordinates in randomCentroids
// Generating all the random points and storing their coordinates in randomCentroids
for(usize i = 0; i < largeNumber; i++)
{
const auto featureOwnerIdx = static_cast<usize>(distribution(generator) * totalPoints);
Expand All @@ -61,7 +60,7 @@ std::vector<float32> GenerateRandomDistribution(float32 minDistance, float32 max

distanceList.resize(largeNumber);

// Calculating all of the distances and storing them in the distance list
// Calculating all the distances and storing them in the distance list
for(size_t i = 1; i < largeNumber; i++)
{
const float32 x = randomCentroids[3 * i];
Expand Down Expand Up @@ -132,17 +131,25 @@ const std::atomic_bool& ComputeFeatureClustering::getCancel()
Result<> ComputeFeatureClustering::operator()()
{
const auto& imageGeometry = m_DataStructure.getDataRefAs<ImageGeom>(m_InputValues->ImageGeometryPath);
const auto& equivalentDiameters = m_DataStructure.getDataRefAs<Float32Array>(m_InputValues->EquivalentDiametersArrayPath);
const auto& featurePhases = m_DataStructure.getDataRefAs<Int32Array>(m_InputValues->FeaturePhasesArrayPath);
const auto& centroids = m_DataStructure.getDataRefAs<Float32Array>(m_InputValues->CentroidsArrayPath);
const auto& featurePhasesStore = m_DataStructure.getDataAs<Int32Array>(m_InputValues->FeaturePhasesArrayPath)->getDataStoreRef();
const auto& centroidsStore = m_DataStructure.getDataAs<Float32Array>(m_InputValues->CentroidsArrayPath)->getDataStoreRef();

auto& clusteringList = m_DataStructure.getDataRefAs<NeighborList<float32>>(m_InputValues->ClusteringListArrayName);
auto& rdf = m_DataStructure.getDataRefAs<Float32Array>(m_InputValues->RDFArrayName);
auto& minMaxDistances = m_DataStructure.getDataRefAs<Float32Array>(m_InputValues->MaxMinArrayName);
BoolArray* biasedFeatures = nullptr;
auto& rdfStore = m_DataStructure.getDataAs<Float32Array>(m_InputValues->RDFArrayName)->getDataStoreRef();
auto& minMaxDistancesStore = m_DataStructure.getDataAs<Float32Array>(m_InputValues->MaxMinArrayName)->getDataStoreRef();
std::unique_ptr<MaskCompare> maskCompare;
if(m_InputValues->RemoveBiasedFeatures)
{
biasedFeatures = m_DataStructure.getDataAs<BoolArray>(m_InputValues->BiasedFeaturesArrayPath);
try
{
maskCompare = InstantiateMaskCompare(m_DataStructure, m_InputValues->BiasedFeaturesArrayPath);
} catch(const std::out_of_range& exception)
{
// This really should NOT be happening as the path was verified during preflight BUT we may be calling this from
// somewhere else that is NOT going through the normal nx::core::IFilter API of Preflight and Execute
std::string message = fmt::format("Mask Array DataPath does not exist or is not of the correct type (Bool | UInt8) {}", m_InputValues->BiasedFeaturesArrayPath.toString());
return MakeErrorResult(-54070, message);
}
}

float32 x = 0.0f, y = 0.0f, z = 0.0f;
Expand All @@ -154,13 +161,12 @@ Result<> ComputeFeatureClustering::operator()()
int32 totalPptFeatures = 0;
float32 min = std::numeric_limits<float32>::max();
float32 max = 0.0f;
float32 value = 0.0f;

std::vector<std::vector<float32>> clusters;
std::vector<float32> oldCount(m_InputValues->NumberOfBins);
std::vector<float32> randomRDF;

const usize totalFeatures = featurePhases.getNumberOfTuples();
const usize totalFeatures = featurePhasesStore.getNumberOfTuples();

SizeVec3 dims = imageGeometry.getDimensions();
FloatVec3 spacing = imageGeometry.getSpacing();
Expand All @@ -175,7 +181,7 @@ Result<> ComputeFeatureClustering::operator()()

for(usize i = 1; i < totalFeatures; i++)
{
if(featurePhases[i] == m_InputValues->PhaseNumber)
if(featurePhasesStore[i] == m_InputValues->PhaseNumber)
{
totalPptFeatures++;
}
Expand All @@ -185,24 +191,24 @@ Result<> ComputeFeatureClustering::operator()()

for(usize i = 1; i < totalFeatures; i++)
{
if(featurePhases[i] == m_InputValues->PhaseNumber)
if(featurePhasesStore[i] == m_InputValues->PhaseNumber)
{
if(i % 1000 == 0)
{
m_MessageHandler(IFilter::Message::Type::Info, fmt::format("Working on Feature {} of {}", i, totalPptFeatures));
}

x = centroids[3 * i];
y = centroids[3 * i + 1];
z = centroids[3 * i + 2];
x = centroidsStore[3 * i];
y = centroidsStore[3 * i + 1];
z = centroidsStore[3 * i + 2];

for(usize j = i + 1; j < totalFeatures; j++)
{
if(featurePhases[i] == featurePhases[j])
if(featurePhasesStore[i] == featurePhasesStore[j])
{
xn = centroids[3 * j];
yn = centroids[3 * j + 1];
zn = centroids[3 * j + 2];
xn = centroidsStore[3 * j];
yn = centroidsStore[3 * j + 1];
zn = centroidsStore[3 * j + 2];

r = sqrtf((x - xn) * (x - xn) + (y - yn) * (y - yn) + (z - zn) * (z - zn));

Expand All @@ -215,11 +221,10 @@ Result<> ComputeFeatureClustering::operator()()

for(usize i = 1; i < totalFeatures; i++)
{
if(featurePhases[i] == m_InputValues->PhaseNumber)
if(featurePhasesStore[i] == m_InputValues->PhaseNumber)
{
for(usize j = 0; j < clusters[i].size(); j++)
for(auto value : clusters[i])
{
value = clusters[i][j];
if(value > max)
{
max = value;
Expand All @@ -234,27 +239,49 @@ Result<> ComputeFeatureClustering::operator()()

const float32 stepSize = (max - min) / m_InputValues->NumberOfBins;

minMaxDistances[(m_InputValues->PhaseNumber * 2)] = max;
minMaxDistances[(m_InputValues->PhaseNumber * 2) + 1] = min;
minMaxDistancesStore[(m_InputValues->PhaseNumber * 2)] = max;
minMaxDistancesStore[(m_InputValues->PhaseNumber * 2) + 1] = min;

for(usize i = 1; i < totalFeatures; i++)
if(m_InputValues->RemoveBiasedFeatures && maskCompare != nullptr)
{
if(featurePhases[i] == m_InputValues->PhaseNumber)
for(usize i = 1; i < totalFeatures; i++)
{
if(m_InputValues->RemoveBiasedFeatures && (*biasedFeatures)[i])
if(featurePhasesStore[i] == m_InputValues->PhaseNumber)
{
continue;
}
if(maskCompare->isTrue(i))
{
continue;
}

for(usize j = 0; j < clusters[i].size(); j++)
for(usize j = 0; j < clusters[i].size(); j++)
{
ensemble = featurePhasesStore[i];
bin = (clusters[i][j] - min) / stepSize;
if(bin >= m_InputValues->NumberOfBins)
{
bin = m_InputValues->NumberOfBins - 1;
}
rdfStore[(m_InputValues->NumberOfBins * ensemble) + bin]++;
}
}
}
}
else
{
for(usize i = 1; i < totalFeatures; i++)
{
if(featurePhasesStore[i] == m_InputValues->PhaseNumber)
{
ensemble = featurePhases[i];
bin = (clusters[i][j] - min) / stepSize;
if(bin >= m_InputValues->NumberOfBins)
for(usize j = 0; j < clusters[i].size(); j++)
{
bin = m_InputValues->NumberOfBins - 1;
ensemble = featurePhasesStore[i];
bin = (clusters[i][j] - min) / stepSize;
if(bin >= m_InputValues->NumberOfBins)
{
bin = m_InputValues->NumberOfBins - 1;
}
rdfStore[(m_InputValues->NumberOfBins * ensemble) + bin]++;
}
rdf[(m_InputValues->NumberOfBins * ensemble) + bin]++;
}
}
}
Expand All @@ -269,15 +296,12 @@ Result<> ComputeFeatureClustering::operator()()

// Scale the random distribution by the number of distances in this particular instance
const float32 normFactor = totalPptFeatures * (totalPptFeatures - 1);
for(usize i = 0; i < randomRDF.size(); i++)
{
randomRDF[i] *= normFactor;
}
std::transform(randomRDF.begin(), randomRDF.end(), randomRDF.begin(), [normFactor](float32 value) { return value * normFactor; });

for(usize i = 0; i < m_InputValues->NumberOfBins; i++)
{
oldCount[i] = rdf[(m_InputValues->NumberOfBins * m_InputValues->PhaseNumber) + i];
rdf[(m_InputValues->NumberOfBins * m_InputValues->PhaseNumber) + i] = oldCount[i] / randomRDF[i + 1];
oldCount[i] = rdfStore[(m_InputValues->NumberOfBins * m_InputValues->PhaseNumber) + i];
rdfStore[(m_InputValues->NumberOfBins * m_InputValues->PhaseNumber) + i] = oldCount[i] / randomRDF[i + 1];
}

for(usize i = 1; i < totalFeatures; i++)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ struct SIMPLNXCORE_EXPORT ComputeFeatureClusteringInputValues
int32 PhaseNumber;
bool RemoveBiasedFeatures;
uint64 SeedValue;
DataPath EquivalentDiametersArrayPath;
DataPath FeaturePhasesArrayPath;
DataPath CentroidsArrayPath;
DataPath BiasedFeaturesArrayPath;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,15 +71,13 @@ Parameters ComputeFeatureClusteringFilter::parameters() const
params.insert(std::make_unique<DataObjectNameParameter>(k_SeedArrayName_Key, "Stored Seed Value Array Name", "Name of array holding the seed value", "ComputeFeatureClustering SeedValue"));

params.insertSeparator(Parameters::Separator{"Input Feature Data"});
params.insert(std::make_unique<ArraySelectionParameter>(k_EquivalentDiametersArrayPath_Key, "Equivalent Diameters", "Diameter of a sphere with the same volume as the Feature", DataPath{},
ArraySelectionParameter::AllowedTypes{DataType::float32}, ArraySelectionParameter::AllowedComponentShapes{{1}}));
params.insert(std::make_unique<ArraySelectionParameter>(k_FeaturePhasesArrayPath_Key, "Phases", "Specifies to which Ensemble each Feature belongs", DataPath{},
ArraySelectionParameter::AllowedTypes{DataType::int32}, ArraySelectionParameter::AllowedComponentShapes{{1}}));
params.insert(std::make_unique<ArraySelectionParameter>(k_CentroidsArrayPath_Key, "Centroids", "X, Y, Z coordinates of Feature center of mass", DataPath{},
ArraySelectionParameter::AllowedTypes{DataType::float32}, ArraySelectionParameter::AllowedComponentShapes{{3}}));
params.insert(std::make_unique<ArraySelectionParameter>(k_BiasedFeaturesArrayPath_Key, "Biased Features",
"Specifies which features are biased and therefor should be removed if the Remove Biased Features option is on", DataPath{},
ArraySelectionParameter::AllowedTypes{DataType::boolean}, ArraySelectionParameter::AllowedComponentShapes{{1}}));
"Specifies which features are biased and therefor should be removed if the Remove Biased Features option is on; True values removed",
DataPath{}, ArraySelectionParameter::AllowedTypes{DataType::boolean, DataType::uint8}, ArraySelectionParameter::AllowedComponentShapes{{1}}));
params.insertSeparator(Parameters::Separator{"Input Ensemble Data"});
params.insert(std::make_unique<AttributeMatrixSelectionParameter>(k_CellEnsembleAttributeMatrixPath_Key, "Cell Ensemble Attribute Matrix",
"The path to the cell ensemble attribute matrix where the RDF and RDF min and max distance arrays will be stored", DataPath{}));
Expand Down Expand Up @@ -108,7 +106,6 @@ IFilter::PreflightResult ComputeFeatureClusteringFilter::preflightImpl(const Dat
{
auto pNumberOfBinsValue = filterArgs.value<int32>(k_NumberOfBins_Key);
auto pRemoveBiasedFeaturesValue = filterArgs.value<bool>(k_RemoveBiasedFeatures_Key);
auto pEquivalentDiametersArrayPathValue = filterArgs.value<DataPath>(k_EquivalentDiametersArrayPath_Key);
auto pFeaturePhasesArrayPathValue = filterArgs.value<DataPath>(k_FeaturePhasesArrayPath_Key);
auto pCentroidsArrayPathValue = filterArgs.value<DataPath>(k_CentroidsArrayPath_Key);
auto pCellEnsembleAttributeMatrixNameValue = filterArgs.value<DataPath>(k_CellEnsembleAttributeMatrixPath_Key);
Expand All @@ -123,7 +120,7 @@ IFilter::PreflightResult ComputeFeatureClusteringFilter::preflightImpl(const Dat
nx::core::Result<OutputActions> resultOutputActions;
std::vector<PreflightValue> preflightUpdatedValues;

std::vector<DataPath> featureDataArrays = {pEquivalentDiametersArrayPathValue, pFeaturePhasesArrayPathValue, pCentroidsArrayPathValue};
std::vector<DataPath> featureDataArrays = {pFeaturePhasesArrayPathValue, pCentroidsArrayPathValue};
if(pRemoveBiasedFeaturesValue)
{
featureDataArrays.push_back(filterArgs.value<DataPath>(k_BiasedFeaturesArrayPath_Key));
Expand Down Expand Up @@ -181,7 +178,6 @@ Result<> ComputeFeatureClusteringFilter::executeImpl(DataStructure& dataStructur
inputValues.PhaseNumber = filterArgs.value<int32>(k_PhaseNumber_Key);
inputValues.RemoveBiasedFeatures = filterArgs.value<bool>(k_RemoveBiasedFeatures_Key);
inputValues.SeedValue = seed;
inputValues.EquivalentDiametersArrayPath = filterArgs.value<DataPath>(k_EquivalentDiametersArrayPath_Key);
inputValues.FeaturePhasesArrayPath = filterArgs.value<DataPath>(k_FeaturePhasesArrayPath_Key);
inputValues.CentroidsArrayPath = filterArgs.value<DataPath>(k_CentroidsArrayPath_Key);
inputValues.BiasedFeaturesArrayPath = filterArgs.value<DataPath>(k_BiasedFeaturesArrayPath_Key);
Expand Down Expand Up @@ -226,8 +222,6 @@ Result<Arguments> ComputeFeatureClusteringFilter::FromSIMPLJson(const nlohmann::
results.push_back(SIMPLConversion::ConvertParameter<SIMPLConversion::UInt64FilterParameterConverter>(args, json, SIMPL::k_RandomSeedValueKey, k_SeedValue_Key));
results.push_back(
SIMPLConversion::ConvertParameter<SIMPLConversion::DataContainerSelectionFilterParameterConverter>(args, json, SIMPL::k_EquivalentDiametersArrayPathKey, k_SelectedImageGeometryPath_Key));
results.push_back(
SIMPLConversion::ConvertParameter<SIMPLConversion::DataArraySelectionFilterParameterConverter>(args, json, SIMPL::k_EquivalentDiametersArrayPathKey, k_EquivalentDiametersArrayPath_Key));
results.push_back(SIMPLConversion::ConvertParameter<SIMPLConversion::DataArraySelectionFilterParameterConverter>(args, json, SIMPL::k_FeaturePhasesArrayPathKey, k_FeaturePhasesArrayPath_Key));
results.push_back(SIMPLConversion::ConvertParameter<SIMPLConversion::DataArraySelectionFilterParameterConverter>(args, json, SIMPL::k_CentroidsArrayPathKey, k_CentroidsArrayPath_Key));
results.push_back(SIMPLConversion::ConvertParameter<SIMPLConversion::DataArraySelectionFilterParameterConverter>(args, json, SIMPL::k_BiasedFeaturesArrayPathKey, k_BiasedFeaturesArrayPath_Key));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ class SIMPLNXCORE_EXPORT ComputeFeatureClusteringFilter : public IFilter
static inline constexpr StringLiteral k_SetRandomSeed_Key = "set_random_seed";
static inline constexpr StringLiteral k_SeedValue_Key = "seed_value";
static inline constexpr StringLiteral k_SeedArrayName_Key = "seed_array_name";
static inline constexpr StringLiteral k_EquivalentDiametersArrayPath_Key = "equivalent_diameters_array_path";
static inline constexpr StringLiteral k_FeaturePhasesArrayPath_Key = "feature_phases_array_path";
static inline constexpr StringLiteral k_CentroidsArrayPath_Key = "centroids_array_path";
static inline constexpr StringLiteral k_BiasedFeaturesArrayPath_Key = "biased_features_array_path";
Expand Down

0 comments on commit 25f550a

Please sign in to comment.