From 7b73128476ab19ac6394d3d12bf9d399b6d8c486 Mon Sep 17 00:00:00 2001 From: Nathan Young Date: Thu, 26 Sep 2024 07:05:14 -0400 Subject: [PATCH] BUG/API: Histogram Sync (#1073) * Add HistogramUtilities File * Integrated Histogram API into Compute Array Statistics and Compute Array Histogram * complete extensive documentation for histogram utilities * Update test case to reflect new histogram that produces results inline with plot.ly histogram * repair broken pipelines after key updates * Added a size() function to IDataStore for std::vector compatibility * Update CalculateBin for special bool case --------- Co-authored-by: Jared Duffey --- CMakeLists.txt | 1 + .../pipelines/CI_Histogram.d3dpipeline | 11 +- .../pipelines/aptr12_Analysis.d3dpipeline | 11 +- .../pipelines/avtr12_Analysis.d3dpipeline | 11 +- .../Algorithms/ComputeArrayHistogram.cpp | 143 ++----- .../Algorithms/ComputeArrayHistogram.hpp | 18 +- .../Algorithms/ComputeArrayStatistics.cpp | 173 +++++---- .../Algorithms/ComputeArrayStatistics.hpp | 3 +- .../Filters/ComputeArrayHistogramFilter.cpp | 64 ++-- .../Filters/ComputeArrayHistogramFilter.hpp | 3 +- .../Filters/ComputeArrayStatisticsFilter.cpp | 21 +- .../Filters/ComputeArrayStatisticsFilter.hpp | 3 +- .../test/ComputeArrayHistogramTest.cpp | 67 ++-- .../test/ComputeArrayStatisticsTest.cpp | 32 +- src/simplnx/DataStructure/IDataStore.hpp | 9 + src/simplnx/Utilities/HistogramUtilities.hpp | 354 ++++++++++++++++++ .../Utilities/Math/StatisticsCalculations.hpp | 89 ++--- .../Utilities/ParallelAlgorithmUtilities.hpp | 91 +++++ .../OrientationAnalysis/APTR12_Analysis.py | 3 +- .../OrientationAnalysis/AVTR12_Analysis.py | 3 +- .../OrientationAnalysis/CI_Histogram.py | 3 +- 21 files changed, 747 insertions(+), 366 deletions(-) create mode 100644 src/simplnx/Utilities/HistogramUtilities.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 48fb70d25f..62002e771a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -499,6 +499,7 @@ set(SIMPLNX_HDRS ${SIMPLNX_SOURCE_DIR}/Utilities/FilterUtilities.hpp ${SIMPLNX_SOURCE_DIR}/Utilities/GeometryUtilities.hpp ${SIMPLNX_SOURCE_DIR}/Utilities/GeometryHelpers.hpp + ${SIMPLNX_SOURCE_DIR}/Utilities/HistogramUtilities.hpp ${SIMPLNX_SOURCE_DIR}/Utilities/MemoryUtilities.hpp ${SIMPLNX_SOURCE_DIR}/Utilities/StringUtilities.hpp ${SIMPLNX_SOURCE_DIR}/Utilities/IParallelAlgorithm.hpp diff --git a/src/Plugins/OrientationAnalysis/pipelines/CI_Histogram.d3dpipeline b/src/Plugins/OrientationAnalysis/pipelines/CI_Histogram.d3dpipeline index 8873b62eaa..47bb27ec36 100644 --- a/src/Plugins/OrientationAnalysis/pipelines/CI_Histogram.d3dpipeline +++ b/src/Plugins/OrientationAnalysis/pipelines/CI_Histogram.d3dpipeline @@ -117,7 +117,8 @@ { "args": { "new_data_group_path": "DataContainer/Statistics", - "histogram_suffix": " Histogram", + "histogram_bin_count_suffix": " Histogram Counts", + "histogram_bin_range_suffix": " Histogram Bin Ranges", "max_range": 1.0, "min_range": 0.0, "create_new_data_group": true, @@ -141,11 +142,11 @@ "file_extension": ".csv", "header_option_index": 1, "max_val_per_line": 0, - "output_dir": "Data/Output/OrientationAnalysis/", - "output_path": "Data/Output/OrientationAnalysis/Test/CI_Histogram.csv", - "output_style_index": 1, + "output_dir": "Data/Output/OrientationAnalysis/Test", + "output_style_index": 0, "input_data_array_paths": [ - "DataContainer/Statistics/Confidence Index Histogram" + "DataContainer/Statistics/Confidence Index Histogram Counts", + "DataContainer/Statistics/Confidence Index Histogram Bin Ranges" ] }, "comments": "", diff --git a/src/Plugins/OrientationAnalysis/pipelines/aptr12_Analysis.d3dpipeline b/src/Plugins/OrientationAnalysis/pipelines/aptr12_Analysis.d3dpipeline index b526795d5c..29a75c7f7a 100644 --- a/src/Plugins/OrientationAnalysis/pipelines/aptr12_Analysis.d3dpipeline +++ b/src/Plugins/OrientationAnalysis/pipelines/aptr12_Analysis.d3dpipeline @@ -448,7 +448,8 @@ { "args": { "create_new_data_group": true, - "histogram_suffix": " Histogram", + "histogram_bin_count_suffix": " Histogram Counts", + "histogram_bin_range_suffix": " Histogram Bin Ranges", "max_range": 1.0, "min_range": 0.0, "new_data_group_path": "fw-ar-IF1-aptr12-corr/Histograms", @@ -472,12 +473,12 @@ "file_extension": ".csv", "header_option_index": 1, "input_data_array_paths": [ - "fw-ar-IF1-aptr12-corr/Histograms/EquivalentDiameters Histogram" + "fw-ar-IF1-aptr12-corr/Histograms/EquivalentDiameters Histogram Counts", + "fw-ar-IF1-aptr12-corr/Histograms/EquivalentDiameters Histogram Bin Ranges" ], "max_val_per_line": 0, - "output_dir": "", - "output_path": "Data/Output/fw-ar-IF1-aptr12-corr/EqDiamHistogram.csv", - "output_style_index": 1 + "output_dir": "Data/Output/fw-ar-IF1-aptr12-corr", + "output_style_index": 0 }, "comments": "", "filter": { diff --git a/src/Plugins/OrientationAnalysis/pipelines/avtr12_Analysis.d3dpipeline b/src/Plugins/OrientationAnalysis/pipelines/avtr12_Analysis.d3dpipeline index df912c6f3d..0022ca5a01 100644 --- a/src/Plugins/OrientationAnalysis/pipelines/avtr12_Analysis.d3dpipeline +++ b/src/Plugins/OrientationAnalysis/pipelines/avtr12_Analysis.d3dpipeline @@ -448,7 +448,8 @@ { "args": { "create_new_data_group": true, - "histogram_suffix": "Histogram", + "histogram_bin_count_suffix": " Histogram Counts", + "histogram_bin_range_suffix": " Histogram Bin Ranges", "max_range": 1.0, "min_range": 0.0, "new_data_group_path": "fw-ar-IF1-avtr12-corr/Histograms", @@ -472,12 +473,12 @@ "file_extension": ".csv", "header_option_index": 1, "input_data_array_paths": [ - "fw-ar-IF1-avtr12-corr/Histograms/EquivalentDiametersHistogram" + "fw-ar-IF1-avtr12-corr/Histograms/EquivalentDiameters Histogram Counts", + "fw-ar-IF1-avtr12-corr/Histograms/EquivalentDiameters Histogram Bin Ranges" ], "max_val_per_line": 0, - "output_dir": "", - "output_path": "Data/Output/fw-ar-IF1-avtr12-corr/EqDiamHistogram.csv", - "output_style_index": 1 + "output_dir": "Data/Output/fw-ar-IF1-avtr12-corr", + "output_style_index": 0 }, "comments": "", "filter": { diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeArrayHistogram.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeArrayHistogram.cpp index 94588211b3..3cc8916832 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeArrayHistogram.cpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeArrayHistogram.cpp @@ -1,106 +1,16 @@ #include "ComputeArrayHistogram.hpp" #include "SimplnxCore/Filters/ComputeArrayHistogramFilter.hpp" - #include "simplnx/DataStructure/DataArray.hpp" #include "simplnx/DataStructure/DataGroup.hpp" +#include "simplnx/Utilities/HistogramUtilities.hpp" #include "simplnx/Utilities/ParallelAlgorithmUtilities.hpp" #include "simplnx/Utilities/ParallelTaskAlgorithm.hpp" -#include #include using namespace nx::core; -namespace -{ -template -class GenerateHistogramFromData -{ -public: - GenerateHistogramFromData(ComputeArrayHistogram& filter, const int32 numBins, const IDataArray& inputArray, AbstractDataStore& histogram, std::atomic& overflow, - std::tuple& range, size_t progressIncrement) - : m_Filter(filter) - , m_NumBins(numBins) - , m_InputArray(inputArray) - , m_Histogram(histogram) - , m_Overflow(overflow) - , m_Range(range) - , m_ProgressIncrement(progressIncrement) - { - } - ~GenerateHistogramFromData() = default; - - void operator()() const - { - const auto& inputStore = m_InputArray.template getIDataStoreRefAs>(); - auto end = inputStore.getSize(); - - // tuple visualization: Histogram = {(bin maximum, count), (bin maximum, count), ... } - float64 min = 0.0; - float64 max = 0.0; - if(std::get<0>(m_Range)) - { - min = std::get<1>(m_Range); - max = std::get<2>(m_Range); - } - else - { - auto minMax = std::minmax_element(inputStore.begin(), inputStore.end()); - min = (static_cast(*minMax.first) - 1); // ensure upper limit encapsulates max value - max = (static_cast(*minMax.second) + 1); // ensure lower limit encapsulates min value - } - - const float64 increment = (max - min) / static_cast(m_NumBins); - if(m_NumBins == 1) // if one bin, just set the first element to total number of points - { - m_Histogram[0] = max; - m_Histogram[1] = end; - } - else - { - size_t progressCounter = 0; - for(usize i = 0; i < end; i++) - { - if(progressCounter > m_ProgressIncrement) - { - m_Filter.updateThreadSafeProgress(progressCounter); - progressCounter = 0; - } - if(m_Filter.getCancel()) - { - return; - } - const auto bin = std::floor((inputStore[i] - min) / increment); - if((bin >= 0) && (bin < m_NumBins)) - { - m_Histogram[bin * 2 + 1]++; - } - else - { - m_Overflow++; - } - progressCounter++; - } - } - - for(int64 i = 0; i < m_NumBins; i++) - { - m_Histogram[(i * 2)] = static_cast(min + (increment * (static_cast(i) + 1.0))); // load bin maximum into respective position {(x, ), (x , ), ...} - } - } - -private: - ComputeArrayHistogram& m_Filter; - const int32 m_NumBins = 1; - std::tuple& m_Range; - const IDataArray& m_InputArray; - AbstractDataStore& m_Histogram; - std::atomic& m_Overflow; - size_t m_ProgressIncrement = 100; -}; -} // namespace - // ----------------------------------------------------------------------------- ComputeArrayHistogram::ComputeArrayHistogram(DataStructure& dataStructure, const IFilter::MessageHandler& mesgHandler, const std::atomic_bool& shouldCancel, ComputeArrayHistogramInputValues* inputValues) @@ -119,22 +29,6 @@ void ComputeArrayHistogram::updateProgress(const std::string& progressMessage) { m_MessageHandler({IFilter::Message::Type::Info, progressMessage}); } -// ----------------------------------------------------------------------------- -void ComputeArrayHistogram::updateThreadSafeProgress(size_t counter) -{ - std::lock_guard guard(m_ProgressMessage_Mutex); - - m_ProgressCounter += counter; - - auto now = std::chrono::steady_clock::now(); - if(std::chrono::duration_cast(now - m_InitialTime).count() > 1000) // every second update - { - auto progressInt = static_cast((static_cast(m_ProgressCounter) / static_cast(m_TotalElements)) * 100.0); - std::string progressMessage = "Calculating... "; - m_MessageHandler(IFilter::ProgressMessage{IFilter::Message::Type::Progress, progressMessage, static_cast(progressInt)}); - m_InitialTime = std::chrono::steady_clock::now(); - } -} // ----------------------------------------------------------------------------- const std::atomic_bool& ComputeArrayHistogram::getCancel() @@ -145,16 +39,9 @@ const std::atomic_bool& ComputeArrayHistogram::getCancel() // ----------------------------------------------------------------------------- Result<> ComputeArrayHistogram::operator()() { - const auto numBins = m_InputValues->NumberOfBins; - const auto selectedArrayPaths = m_InputValues->SelectedArrayPaths; - - for(const auto& arrayPath : selectedArrayPaths) - { - m_TotalElements += m_DataStructure.getDataAs(arrayPath)->getSize(); - } - auto progressIncrement = m_TotalElements / 100; + const int32 numBins = m_InputValues->NumberOfBins; + const std::vector selectedArrayPaths = m_InputValues->SelectedArrayPaths; - std::tuple range = std::make_tuple(m_InputValues->UserDefinedRange, m_InputValues->MinRange, m_InputValues->MaxRange); // Custom bool, min, max ParallelTaskAlgorithm taskRunner; std::atomic overflow = 0; @@ -165,13 +52,29 @@ Result<> ComputeArrayHistogram::operator()() { return {}; } - const auto& inputData = m_DataStructure.getDataRefAs(selectedArrayPaths[i]); - auto& histogram = m_DataStructure.getDataAs>(m_InputValues->CreatedHistogramDataPaths.at(i))->getDataStoreRef(); - ExecuteParallelFunction(inputData.getDataType(), taskRunner, *this, numBins, inputData, histogram, overflow, range, progressIncrement); + + const auto* inputData = m_DataStructure.getDataAs(selectedArrayPaths[i]); + auto* binRanges = m_DataStructure.getDataAs(m_InputValues->CreatedBinRangeDataPaths.at(i)); + auto& counts = m_DataStructure.getDataAs>(m_InputValues->CreatedHistogramCountsDataPaths.at(i))->getDataStoreRef(); + Result<> result = {}; + if(m_InputValues->UserDefinedRange) + { + ExecuteParallelFunctor(HistogramUtilities::concurrent::InstantiateHistogramImplFunctor{}, inputData->getDataType(), taskRunner, inputData, binRanges, + std::make_pair(m_InputValues->MinRange, m_InputValues->MaxRange), m_ShouldCancel, numBins, counts, overflow); + } + else + { + ExecuteParallelFunctor(HistogramUtilities::concurrent::InstantiateHistogramImplFunctor{}, inputData->getDataType(), taskRunner, inputData, binRanges, m_ShouldCancel, numBins, counts, overflow); + } + + if(result.invalid()) + { + return result; + } if(overflow > 0) { - const std::string arrayName = inputData.getName(); + const std::string arrayName = inputData->getName(); ComputeArrayHistogram::updateProgress(fmt::format("{} values not categorized into bin for array {}", overflow.load(), arrayName)); } } diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeArrayHistogram.hpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeArrayHistogram.hpp index c1a2890643..40ae1f9ddc 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeArrayHistogram.hpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeArrayHistogram.hpp @@ -7,27 +7,23 @@ #include "simplnx/Filter/IFilter.hpp" #include "simplnx/Parameters/MultiArraySelectionParameter.hpp" -#include -#include - namespace nx::core { - struct SIMPLNXCORE_EXPORT ComputeArrayHistogramInputValues { - int32 NumberOfBins = 0; bool UserDefinedRange = false; + int32 NumberOfBins = 0; float64 MinRange = 0.0; float64 MaxRange = 0.0; MultiArraySelectionParameter::ValueType SelectedArrayPaths = {}; - MultiArraySelectionParameter::ValueType CreatedHistogramDataPaths = {}; + MultiArraySelectionParameter::ValueType CreatedBinRangeDataPaths = {}; + MultiArraySelectionParameter::ValueType CreatedHistogramCountsDataPaths = {}; }; /** * @class ComputeArrayHistogram * @brief This filter calculates a Histogram according to user specification and stores it accordingly */ - class SIMPLNXCORE_EXPORT ComputeArrayHistogram { public: @@ -42,7 +38,6 @@ class SIMPLNXCORE_EXPORT ComputeArrayHistogram Result<> operator()(); void updateProgress(const std::string& progMessage); - void updateThreadSafeProgress(size_t counter); const std::atomic_bool& getCancel(); private: @@ -50,12 +45,5 @@ class SIMPLNXCORE_EXPORT ComputeArrayHistogram const ComputeArrayHistogramInputValues* m_InputValues = nullptr; const std::atomic_bool& m_ShouldCancel; const IFilter::MessageHandler& m_MessageHandler; - - // Threadsafe Progress Message - mutable std::mutex m_ProgressMessage_Mutex; - size_t m_TotalElements = 0; - size_t m_ProgressCounter = 0; - std::chrono::steady_clock::time_point m_InitialTime = std::chrono::steady_clock::now(); }; - } // namespace nx::core diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeArrayStatistics.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeArrayStatistics.cpp index 09875f9fea..2258cef499 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeArrayStatistics.cpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeArrayStatistics.cpp @@ -3,6 +3,7 @@ #include "simplnx/DataStructure/AttributeMatrix.hpp" #include "simplnx/Utilities/DataArrayUtilities.hpp" #include "simplnx/Utilities/FilterUtilities.hpp" +#include "simplnx/Utilities/HistogramUtilities.hpp" #include "simplnx/Utilities/Math/StatisticsCalculations.hpp" #include "simplnx/Utilities/ParallelDataAlgorithm.hpp" @@ -43,8 +44,8 @@ class ComputeArrayStatisticsByIndexImpl ComputeArrayStatisticsByIndexImpl(bool length, bool min, bool max, bool mean, bool mode, bool stdDeviation, bool summation, bool hist, float64 histmin, float64 histmax, bool histfullrange, int32 numBins, bool modalBinRanges, const std::unique_ptr& mask, const Int32Array* featureIds, const DataArray& source, BoolArray* featureHasDataArray, UInt64Array* lengthArray, DataArray* minArray, DataArray* maxArray, Float32Array* meanArray, NeighborList* modeArray, - Float32Array* stdDevArray, Float32Array* summationArray, UInt64Array* histArray, UInt64Array* mostPopulatedBinArray, NeighborList* modalBinRangesArray, - ComputeArrayStatistics* filter) + Float32Array* stdDevArray, Float32Array* summationArray, UInt64Array* histBinCountsArray, DataArray* histBinRangesArray, UInt64Array* mostPopulatedBinArray, + NeighborList* modalBinRangesArray, ComputeArrayStatistics* filter) : m_Length(length) , m_Min(min) , m_Max(max) @@ -69,7 +70,8 @@ class ComputeArrayStatisticsByIndexImpl , m_ModeArray(modeArray) , m_StdDevArray(stdDevArray) , m_SummationArray(summationArray) - , m_HistArray(histArray) + , m_HistBinCountsArray(histBinCountsArray) + , m_HistBinRangesArray(histBinRangesArray) , m_MostPopulatedBinArray(mostPopulatedBinArray) , m_ModalBinRangesArray(modalBinRangesArray) , m_Filter(filter) @@ -214,10 +216,16 @@ class ComputeArrayStatisticsByIndexImpl } } - AbstractDataStore* histDataStorePtr = nullptr; - if(m_HistArray != nullptr) + AbstractDataStore* binCountsStorePtr = nullptr; + if(m_HistBinCountsArray != nullptr) { - histDataStorePtr = m_HistArray->getDataStore(); + binCountsStorePtr = m_HistBinCountsArray->getDataStore(); + } + + AbstractDataStore* binRangesStorePtr = nullptr; + if(m_HistBinRangesArray != nullptr) + { + binRangesStorePtr = m_HistBinRangesArray->getDataStore(); } AbstractDataStore* mostPopulatedBinDataStorePtr = nullptr; @@ -226,20 +234,24 @@ class ComputeArrayStatisticsByIndexImpl mostPopulatedBinDataStorePtr = m_MostPopulatedBinArray->getDataStore(); } - if(m_Histogram && histDataStorePtr != nullptr) + if(m_Histogram && binCountsStorePtr != nullptr && binRangesStorePtr != nullptr) { + std::vector ranges(m_NumBins + 1); std::vector histogram(m_NumBins, 0); if(length[localFeatureIndex] > 0) { - float32 histMin = m_HistMin; - float32 histMax = m_HistMax; + T histMin = static_cast(m_HistMin); + T histMax = static_cast(m_HistMax); + if(m_HistFullRange) { - histMin = static_cast(min[localFeatureIndex]); - histMax = static_cast(max[localFeatureIndex]); + histMin = min[localFeatureIndex]; + histMax = max[localFeatureIndex] + static_cast(1.0); } - const float32 increment = (histMax - histMin) / (m_NumBins); + HistogramUtilities::serial::FillBinRanges(ranges, std::make_pair(histMin, histMax), m_NumBins); + + const float32 increment = HistogramUtilities::serial::CalculateIncrement(histMin, histMax, m_NumBins); if(std::fabs(increment) < 1E-10) { histogram[0] = length[localFeatureIndex]; @@ -260,15 +272,11 @@ class ComputeArrayStatisticsByIndexImpl { continue; } - const auto value = static_cast(m_Source[i]); - const auto bin = static_cast((value - histMin) / increment); // find bin for this input array value - if((bin >= 0) && (bin < m_NumBins)) // make certain bin is in range - { - ++histogram[bin]; // increment histogram element corresponding to this input array value - } - else if(value == histMax) + const T value = m_Source[i]; + const auto bin = static_cast(HistogramUtilities::serial::CalculateBin(value, histMin, increment)); // find bin for this input array value + if((bin >= 0) && (bin < m_NumBins)) // make certain bin is in range { - histogram[m_NumBins - 1]++; + histogram[bin]++; // increment histogram element corresponding to this input array value } } // end of numTuples loop } // end of increment else @@ -285,28 +293,20 @@ class ComputeArrayStatisticsByIndexImpl auto modeList = m_ModeArray->getList(j); for(int i = 0; i < modeList->size(); i++) { - const float32 mode = modeList->at(i); - const auto modalBin = static_cast((mode - histMin) / increment); - float32 minBinValue = 0.0f; - float32 maxBinValue = 0.0f; + const T mode = modeList->at(i); + const auto modalBin = HistogramUtilities::serial::CalculateBin(mode, histMin, increment); if((modalBin >= 0) && (modalBin < m_NumBins)) // make certain bin is in range { - minBinValue = static_cast(histMin + (modalBin * increment)); - maxBinValue = static_cast(histMin + ((modalBin + 1) * increment)); + m_ModalBinRangesArray->addEntry(j, ranges[modalBin]); + m_ModalBinRangesArray->addEntry(j, ranges[modalBin + 1]); } - else if(mode == histMax) - { - minBinValue = static_cast(histMin + ((modalBin - 1) * increment)); - maxBinValue = static_cast(histMin + (modalBin * increment)); - } - m_ModalBinRangesArray->addEntry(j, minBinValue); - m_ModalBinRangesArray->addEntry(j, maxBinValue); } } } } // end of length if - histDataStorePtr->setTuple(j, histogram); + binCountsStorePtr->setTuple(j, histogram); + binRangesStorePtr->setTuple(j, ranges); auto maxElementIt = std::max_element(histogram.begin(), histogram.end()); uint64 index = std::distance(histogram.begin(), maxElementIt); @@ -403,9 +403,10 @@ class ComputeArrayStatisticsByIndexImpl NeighborList* m_ModeArray = nullptr; Float32Array* m_StdDevArray = nullptr; Float32Array* m_SummationArray = nullptr; - UInt64Array* m_HistArray = nullptr; + UInt64Array* m_HistBinCountsArray = nullptr; + DataArray* m_HistBinRangesArray = nullptr; UInt64Array* m_MostPopulatedBinArray = nullptr; - NeighborList* m_ModalBinRangesArray = nullptr; + NeighborList* m_ModalBinRangesArray = nullptr; ComputeArrayStatistics* m_Filter = nullptr; }; @@ -600,7 +601,13 @@ void FindStatisticsImpl(const ContainerType& data, std::vector& arrays, auto* array7Ptr = dynamic_cast(arrays[8]); if(array7Ptr == nullptr) { - throw std::invalid_argument("findStatisticsImpl() could not dynamic_cast 'Histogram' array to needed type. Check input array selection."); + throw std::invalid_argument("findStatisticsImpl() could not dynamic_cast 'Histogram Bin Counts' array to needed type. Check input array selection."); + } + + auto* array12Ptr = dynamic_cast*>(arrays[12]); + if(array12Ptr == nullptr) + { + throw std::invalid_argument("findStatisticsImpl() could not dynamic_cast 'Histogram Bin Ranges' array to needed type. Check input array selection."); } auto* array10Ptr = dynamic_cast(arrays[10]); @@ -609,17 +616,39 @@ void FindStatisticsImpl(const ContainerType& data, std::vector& arrays, throw std::invalid_argument("findStatisticsImpl() could not dynamic_cast 'Most Populated Bin' array to needed type. Check input array selection."); } - auto* arr10DataStorePtr = array10Ptr->getDataStore(); - if(auto* arr7DataStorePtr = array7Ptr->getDataStore(); arr7DataStorePtr != nullptr) + auto& binCountsStore = array7Ptr->getDataStoreRef(); + auto& binRangesStore = array12Ptr->getDataStoreRef(); + auto& mostPopBinStore = array10Ptr->getDataStoreRef(); + + auto range = StatisticsCalculations::findHistogramRange(data, static_cast(inputValues->MinRange), static_cast(inputValues->MaxRange), inputValues->UseFullRange); + + if(inputValues->UseFullRange) { - std::vector values = StatisticsCalculations::findHistogram(data, inputValues->MinRange, inputValues->MaxRange, inputValues->UseFullRange, inputValues->NumBins); - arr7DataStorePtr->setTuple(0, values); + range.second++; // Upper bound must be exclusive + } + + std::atomic_bool neverCancel{false}; + std::atomic overflow{0}; + std::vector binCounts(inputValues->NumBins, 0); + std::vector binRanges(inputValues->NumBins + 1); - auto maxElementIt = std::max_element(values.begin(), values.end()); - uint64 index = std::distance(values.begin(), maxElementIt); - arr10DataStorePtr->setComponent(0, 0, index); - arr10DataStorePtr->setComponent(0, 1, values[index]); + Result<> result = {}; + if constexpr(std::is_same_v, ContainerType>) + { + result = HistogramUtilities::serial::GenerateHistogram(data.getDataStoreRef(), binRanges, range, neverCancel, inputValues->NumBins, binCounts, overflow); } + else + { + result = HistogramUtilities::serial::GenerateHistogram(data, binRanges, range, neverCancel, inputValues->NumBins, binCounts, overflow); + } + + binCountsStore.setTuple(0, binCounts); + binRangesStore.setTuple(0, binRanges); + + auto maxElementIt = std::max_element(binCounts.begin(), binCounts.end()); + uint64 index = std::distance(binCounts.begin(), maxElementIt); + mostPopBinStore.setComponent(0, 0, index); + mostPopBinStore.setComponent(0, 1, binCounts[index]); if(inputValues->FindModalBinRanges) { @@ -629,7 +658,7 @@ void FindStatisticsImpl(const ContainerType& data, std::vector& arrays, throw std::invalid_argument("findStatisticsImpl() could not dynamic_cast 'Mode' array to needed type. Check input array selection."); } - auto* array11Ptr = dynamic_cast*>(arrays[11]); + auto* array11Ptr = dynamic_cast*>(arrays[11]); if(array11Ptr == nullptr) { throw std::invalid_argument("findStatisticsImpl() could not dynamic_cast 'Modal Bin Ranges' array to needed type. Check input array selection."); @@ -637,9 +666,9 @@ void FindStatisticsImpl(const ContainerType& data, std::vector& arrays, for(const T& mode : array5Ptr->at(0)) { - std::pair range = StatisticsCalculations::findModalBinRange(data, inputValues->MinRange, inputValues->MaxRange, inputValues->UseFullRange, inputValues->NumBins, mode); - array11Ptr->addEntry(0, range.first); - array11Ptr->addEntry(0, range.second); + std::pair modalRange = StatisticsCalculations::findModalBinRange(data, binRanges, mode); + array11Ptr->addEntry(0, modalRange.first); + array11Ptr->addEntry(0, modalRange.second); } } } @@ -670,10 +699,11 @@ void FindStatistics(const DataArray& source, const Int32Array* featureIds, co auto* modeArrayPtr = dynamic_cast*>(arrays[5]); auto* stdDevArrayPtr = dynamic_cast(arrays[6]); auto* summationArrayPtr = dynamic_cast(arrays[7]); - auto* histArrayPtr = dynamic_cast(arrays[8]); + auto* histBinCountsArrayPtr = dynamic_cast(arrays[8]); auto* mostPopulatedBinPtr = dynamic_cast(arrays[10]); - auto* modalBinsArrayPtr = dynamic_cast*>(arrays[11]); - auto* featureHasDataPtr = dynamic_cast(arrays[12]); + auto* modalBinsArrayPtr = dynamic_cast*>(arrays[11]); + auto* histBinRangesArrayPtr = dynamic_cast*>(arrays[12]); + auto* featureHasDataPtr = dynamic_cast(arrays[13]); IParallelAlgorithm::AlgorithmArrays indexAlgArrays; indexAlgArrays.push_back(&source); @@ -684,7 +714,8 @@ void FindStatistics(const DataArray& source, const Int32Array* featureIds, co indexAlgArrays.push_back(meanArrayPtr); indexAlgArrays.push_back(stdDevArrayPtr); indexAlgArrays.push_back(summationArrayPtr); - indexAlgArrays.push_back(histArrayPtr); + indexAlgArrays.push_back(histBinCountsArrayPtr); + indexAlgArrays.push_back(histBinRangesArrayPtr); indexAlgArrays.push_back(mostPopulatedBinPtr); #ifdef SIMPLNX_ENABLE_MULTICORE @@ -697,8 +728,8 @@ void FindStatistics(const DataArray& source, const Int32Array* featureIds, co ComputeArrayStatisticsByIndexImpl(inputValues->FindLength, inputValues->FindMin, inputValues->FindMax, inputValues->FindMean, inputValues->FindMode, inputValues->FindStdDeviation, inputValues->FindSummation, inputValues->FindHistogram, inputValues->MinRange, inputValues->MaxRange, inputValues->UseFullRange, inputValues->NumBins, inputValues->FindModalBinRanges, mask, featureIds, source, featureHasDataPtr, - lengthArrayPtr, minArrayPtr, maxArrayPtr, meanArrayPtr, modeArrayPtr, stdDevArrayPtr, summationArrayPtr, histArrayPtr, mostPopulatedBinPtr, - modalBinsArrayPtr, filter), + lengthArrayPtr, minArrayPtr, maxArrayPtr, meanArrayPtr, modeArrayPtr, stdDevArrayPtr, summationArrayPtr, histBinCountsArrayPtr, + histBinRangesArrayPtr, mostPopulatedBinPtr, modalBinsArrayPtr, filter), simplePartitioner); } else @@ -706,10 +737,11 @@ void FindStatistics(const DataArray& source, const Int32Array* featureIds, co ParallelDataAlgorithm indexAlg; indexAlg.setRange(0, numFeatures); indexAlg.requireArraysInMemory(indexAlgArrays); - indexAlg.execute(ComputeArrayStatisticsByIndexImpl( - inputValues->FindLength, inputValues->FindMin, inputValues->FindMax, inputValues->FindMean, inputValues->FindMode, inputValues->FindStdDeviation, inputValues->FindSummation, - inputValues->FindHistogram, inputValues->MinRange, inputValues->MaxRange, inputValues->UseFullRange, inputValues->NumBins, inputValues->FindModalBinRanges, mask, featureIds, source, - featureHasDataPtr, lengthArrayPtr, minArrayPtr, maxArrayPtr, meanArrayPtr, modeArrayPtr, stdDevArrayPtr, summationArrayPtr, histArrayPtr, mostPopulatedBinPtr, modalBinsArrayPtr, filter)); + indexAlg.execute(ComputeArrayStatisticsByIndexImpl(inputValues->FindLength, inputValues->FindMin, inputValues->FindMax, inputValues->FindMean, inputValues->FindMode, + inputValues->FindStdDeviation, inputValues->FindSummation, inputValues->FindHistogram, inputValues->MinRange, inputValues->MaxRange, + inputValues->UseFullRange, inputValues->NumBins, inputValues->FindModalBinRanges, mask, featureIds, source, featureHasDataPtr, + lengthArrayPtr, minArrayPtr, maxArrayPtr, meanArrayPtr, modeArrayPtr, stdDevArrayPtr, summationArrayPtr, histBinCountsArrayPtr, + histBinRangesArrayPtr, mostPopulatedBinPtr, modalBinsArrayPtr, filter)); } #endif @@ -855,7 +887,7 @@ struct ComputeArrayStatisticsFunctor { return MakeErrorResult(-563502, "ComputeArrayStatisticsFunctor could not dynamic_cast 'Feature-Has-Data' array to needed type. Check input array selection."); } - arrayPtr->fill(0); + arrayPtr->fill(false); } if(inputValues->FindLength) { @@ -923,13 +955,21 @@ struct ComputeArrayStatisticsFunctor if(inputValues->FindHistogram) { { - auto* arrayPtr = dataStructure.getDataAs(inputValues->HistogramArrayName); + auto* arrayPtr = dataStructure.getDataAs(inputValues->BinCountsArrayName); if(arrayPtr == nullptr) { - return MakeErrorResult(-563511, "ComputeArrayStatisticsFunctor could not dynamic_cast 'Histogram' array to needed type. Check input array selection."); + return MakeErrorResult(-563511, "ComputeArrayStatisticsFunctor could not dynamic_cast 'Histogram Bin Counts' array to needed type. Check input array selection."); } arrayPtr->fill(0); } + { + auto* arrayPtr = dataStructure.getDataAs(inputValues->BinRangesArrayName); + if(arrayPtr == nullptr) + { + return MakeErrorResult(-563514, "ComputeArrayStatisticsFunctor could not dynamic_cast 'Histogram Bin Ranges' array to needed type. Check input array selection."); + } + arrayPtr->fill(static_cast(0.0)); + } { auto* arrayPtr = dataStructure.getDataAs(inputValues->MostPopulatedBinArrayName); if(arrayPtr == nullptr) @@ -996,7 +1036,7 @@ Result<> ComputeArrayStatistics::operator()() return {}; } - std::vector arrays(12, nullptr); + std::vector arrays(13, nullptr); if(m_InputValues->FindLength) { @@ -1032,7 +1072,8 @@ Result<> ComputeArrayStatistics::operator()() } if(m_InputValues->FindHistogram) { - arrays[8] = m_DataStructure.getDataAs(m_InputValues->HistogramArrayName); + arrays[8] = m_DataStructure.getDataAs(m_InputValues->BinCountsArrayName); + arrays[12] = m_DataStructure.getDataAs(m_InputValues->BinRangesArrayName); arrays[10] = m_DataStructure.getDataAs(m_InputValues->MostPopulatedBinArrayName); } if(m_InputValues->FindModalBinRanges) @@ -1047,8 +1088,8 @@ Result<> ComputeArrayStatistics::operator()() usize numFeatures = 0; if(m_InputValues->ComputeByIndex) { - arrays.resize(13); - arrays[12] = m_DataStructure.getDataAs(m_InputValues->FeatureHasDataArrayName); + arrays.resize(14); + arrays[13] = m_DataStructure.getDataAs(m_InputValues->FeatureHasDataArrayName); const auto& featureIds = m_DataStructure.getDataRefAs(m_InputValues->FeatureIdsArrayPath); numFeatures = findNumFeatures(featureIds); diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeArrayStatistics.hpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeArrayStatistics.hpp index 06265f8692..3802632fe7 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeArrayStatistics.hpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeArrayStatistics.hpp @@ -38,7 +38,8 @@ struct SIMPLNXCORE_EXPORT ComputeArrayStatisticsInputValues DataPath FeatureIdsArrayPath; DataPath MaskArrayPath; DataPath DestinationAttributeMatrix; - DataPath HistogramArrayName; + DataPath BinCountsArrayName; + DataPath BinRangesArrayName; DataPath MostPopulatedBinArrayName; DataPath ModalBinArrayName; DataPath FeatureHasDataArrayName; diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeArrayHistogramFilter.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeArrayHistogramFilter.cpp index aeccf287f1..ccb2ce08d3 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeArrayHistogramFilter.cpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeArrayHistogramFilter.cpp @@ -53,24 +53,30 @@ std::vector ComputeArrayHistogramFilter::defaultTags() const Parameters ComputeArrayHistogramFilter::parameters() const { Parameters params; + // Create the parameter descriptors that are needed for this filter params.insertSeparator(Parameters::Separator{"Input Parameter(s)"}); params.insert(std::make_unique(k_NumberOfBins_Key, "Number of Bins", "Specifies number of histogram bins (greater than zero)", 1)); params.insertLinkableParameter( std::make_unique(k_UserDefinedRange_Key, "Use Custom Min & Max Range", "Whether the user can set the min and max values to consider for the histogram", false)); - params.insert(std::make_unique(k_MinRange_Key, "Min Value", "Specifies the lower bound of the histogram.", 0.0)); - params.insert(std::make_unique(k_MaxRange_Key, "Max Value", "Specifies the upper bound of the histogram.", 1.0)); + params.insert(std::make_unique(k_MinRange_Key, "Min Value", "Specifies the inclusive lower bound of the histogram.", 0.0)); + params.insert(std::make_unique(k_MaxRange_Key, "Max Value", "Specifies the exclusive upper bound of the histogram.", 1.0)); + params.insertSeparator(Parameters::Separator{"Input Data"}); params.insert(std::make_unique(k_SelectedArrayPaths_Key, "Input Data Arrays", "The list of arrays to calculate histogram(s) for", MultiArraySelectionParameter::ValueType{}, MultiArraySelectionParameter::AllowedTypes{IArray::ArrayType::DataArray}, nx::core::GetAllNumericTypes())); + params.insertSeparator(Parameters::Separator{"Output parameters"}); params.insertLinkableParameter( std::make_unique(k_CreateNewDataGroup_Key, "Create New DataGroup for Histograms", "Whether or not to store the calculated histogram(s) in a new DataGroup", true)); params.insert(std::make_unique(k_NewDataGroupPath_Key, "New DataGroup Path", "The path to the new DataGroup in which to store the calculated histogram(s)", DataPath{})); params.insert(std::make_unique(k_DataGroupPath_Key, "Output DataGroup Path", "The complete path to the DataGroup in which to store the calculated histogram(s)", DataPath{}, DataGroupSelectionParameter::AllowedTypes{BaseGroup::GroupType::AttributeMatrix, BaseGroup::GroupType::DataGroup})); - params.insert(std::make_unique(k_HistoName_Key, "Suffix for created Histograms", "String appended to the end of the histogram array names", " Histogram")); + params.insert(std::make_unique(k_HistoBinCountName_Key, "Suffix for created Histogram Bin Counts", "String appended to the end of the histogram array names", " Histogram Counts")); + params.insert( + std::make_unique(k_HistoBinRangeName_Key, "Suffix for created Histogram Bin Ranges", "String appended to the end of the histogram array names", " Histogram Bin Ranges")); + // Associate the Linkable Parameter(s) to the children parameters that they control params.linkParameters(k_UserDefinedRange_Key, k_MinRange_Key, true); params.linkParameters(k_UserDefinedRange_Key, k_MaxRange_Key, true); @@ -95,36 +101,43 @@ IFilter::PreflightResult ComputeArrayHistogramFilter::preflightImpl(const DataSt auto pDataGroupNameValue = filterArgs.value(k_DataGroupPath_Key); auto pSelectedArrayPathsValue = filterArgs.value(k_SelectedArrayPaths_Key); auto pNewDataGroupNameValue = filterArgs.value(k_NewDataGroupPath_Key); // sanity check if is Attribute matrix after impending simplnx update - auto pHistogramSuffix = filterArgs.value(k_HistoName_Key); - - PreflightResult preflightResult; + auto pBinCountSuffix = filterArgs.value(k_HistoBinCountName_Key); + auto pBinRangeSuffix = filterArgs.value(k_HistoBinRangeName_Key); nx::core::Result resultOutputActions; - - std::vector preflightUpdatedValues; + ; if(pNewDataGroupValue) { auto createDataGroupAction = std::make_unique(pNewDataGroupNameValue); resultOutputActions.value().appendAction(std::move(createDataGroupAction)); } + DataPath parentPath = {}; + if(pNewDataGroupValue) + { + parentPath = pNewDataGroupNameValue; + } + else + { + parentPath = pDataGroupNameValue; + } for(auto& selectedArrayPath : pSelectedArrayPathsValue) { - const auto& dataArray = dataStructure.getDataAs(selectedArrayPath); - DataPath childPath; - if(pNewDataGroupValue) + const auto* dataArray = dataStructure.getDataAs(selectedArrayPath); { - childPath = pNewDataGroupNameValue.createChildPath((dataArray->getName() + pHistogramSuffix)); + auto createArrayAction = std::make_unique(nx::core::DataType::uint64, std::vector{static_cast(pNumberOfBinsValue)}, std::vector{1}, + parentPath.createChildPath((dataArray->getName() + pBinCountSuffix))); + resultOutputActions.value().appendAction(std::move(createArrayAction)); } - else + { - childPath = pDataGroupNameValue.createChildPath((dataArray->getName() + pHistogramSuffix)); + auto createArrayAction = std::make_unique(dataArray->getDataType(), std::vector{static_cast(pNumberOfBinsValue + 1)}, std::vector{1}, + parentPath.createChildPath((dataArray->getName() + pBinRangeSuffix))); + resultOutputActions.value().appendAction(std::move(createArrayAction)); } - auto createArrayAction = std::make_unique(nx::core::DataType::float64, std::vector{static_cast(pNumberOfBinsValue)}, std::vector{2}, childPath); - resultOutputActions.value().appendAction(std::move(createArrayAction)); } - return {std::move(resultOutputActions), std::move(preflightUpdatedValues)}; + return {std::move(resultOutputActions)}; } //------------------------------------------------------------------------------ @@ -139,7 +152,8 @@ Result<> ComputeArrayHistogramFilter::executeImpl(DataStructure& dataStructure, inputValues.MaxRange = filterArgs.value(k_MaxRange_Key); inputValues.SelectedArrayPaths = filterArgs.value(k_SelectedArrayPaths_Key); - auto histogramSuffix = filterArgs.value(k_HistoName_Key); + auto binCountSuffix = filterArgs.value(k_HistoBinCountName_Key); + auto binRangeSuffix = filterArgs.value(k_HistoBinRangeName_Key); DataPath dataGroupPath; if(filterArgs.value(k_CreateNewDataGroup_Key)) @@ -150,15 +164,19 @@ Result<> ComputeArrayHistogramFilter::executeImpl(DataStructure& dataStructure, { dataGroupPath = filterArgs.value(k_DataGroupPath_Key); } - std::vector createdDataPaths; + std::vector createdCountsDataPaths; + std::vector createdRangesDataPaths; for(auto& selectedArrayPath : inputValues.SelectedArrayPaths) // regenerate based on preflight { const auto& dataArray = dataStructure.getDataAs(selectedArrayPath); - auto childPath = dataGroupPath.createChildPath((dataArray->getName() + histogramSuffix)); - createdDataPaths.push_back(childPath); + auto countsPath = dataGroupPath.createChildPath((dataArray->getName() + binCountSuffix)); + createdCountsDataPaths.push_back(countsPath); + auto rangesPath = dataGroupPath.createChildPath((dataArray->getName() + binRangeSuffix)); + createdRangesDataPaths.push_back(rangesPath); } - inputValues.CreatedHistogramDataPaths = createdDataPaths; + inputValues.CreatedHistogramCountsDataPaths = createdCountsDataPaths; + inputValues.CreatedBinRangeDataPaths = createdRangesDataPaths; return ComputeArrayHistogram(dataStructure, messageHandler, shouldCancel, &inputValues)(); } @@ -194,7 +212,7 @@ Result ComputeArrayHistogramFilter::FromSIMPLJson(const nlohmann::jso results.push_back(SIMPLConversion::ConvertParameter(args, json, SIMPL::k_NewDataContainerNameKey, k_NewDataGroupPath_Key)); results.push_back(SIMPLConversion::Convert2Parameters(args, json, SIMPL::k_NewDataContainerNameKey, SIMPL::k_NewAttributeMatrixNameKey, k_DataGroupPath_Key)); - results.push_back(SIMPLConversion::ConvertParameter(args, json, SIMPL::k_NewDataArrayNameKey, k_HistoName_Key)); + results.push_back(SIMPLConversion::ConvertParameter(args, json, SIMPL::k_NewDataArrayNameKey, k_HistoBinCountName_Key)); Result<> conversionResult = MergeResults(std::move(results)); diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeArrayHistogramFilter.hpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeArrayHistogramFilter.hpp index b19f8f3fdd..36d33c604a 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeArrayHistogramFilter.hpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeArrayHistogramFilter.hpp @@ -32,7 +32,8 @@ class SIMPLNXCORE_EXPORT ComputeArrayHistogramFilter : public IFilter static inline constexpr StringLiteral k_SelectedArrayPaths_Key = "selected_array_paths"; static inline constexpr StringLiteral k_NewDataGroupPath_Key = "new_data_group_path"; static inline constexpr StringLiteral k_DataGroupPath_Key = "output_data_group_path"; - static inline constexpr StringLiteral k_HistoName_Key = "histogram_suffix"; + static inline constexpr StringLiteral k_HistoBinCountName_Key = "histogram_bin_count_suffix"; + static inline constexpr StringLiteral k_HistoBinRangeName_Key = "histogram_bin_range_suffix"; /** * @brief Reads SIMPL json and converts it simplnx Arguments. diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeArrayStatisticsFilter.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeArrayStatisticsFilter.cpp index f601c8bbb7..f6df1dd093 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeArrayStatisticsFilter.cpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeArrayStatisticsFilter.cpp @@ -118,10 +118,15 @@ OutputActions CreateCompatibleArrays(const DataStructure& dataStructure, const A if(findHistogramValue) { { - auto arrayPath = args.value(ComputeArrayStatisticsFilter::k_HistogramArrayName_Key); + auto arrayPath = args.value(ComputeArrayStatisticsFilter::k_HistoBinCountName_Key); auto action = std::make_unique(DataType::uint64, tupleDims, std::vector{numBins}, destinationAttributeMatrixValue.createChildPath(arrayPath)); actions.appendAction(std::move(action)); } + { + auto arrayPath = args.value(ComputeArrayStatisticsFilter::k_HistoBinRangeName_Key); + auto action = std::make_unique(dataType, tupleDims, std::vector{numBins + 1}, destinationAttributeMatrixValue.createChildPath(arrayPath)); + actions.appendAction(std::move(action)); + } { auto arrayPath = args.value(ComputeArrayStatisticsFilter::k_MostPopulatedBinArrayName_Key); auto action = std::make_unique(DataType::uint64, tupleDims, std::vector{2}, destinationAttributeMatrixValue.createChildPath(arrayPath)); @@ -130,7 +135,7 @@ OutputActions CreateCompatibleArrays(const DataStructure& dataStructure, const A if(findModalBinRanges) { auto arrayPath = args.value(ComputeArrayStatisticsFilter::k_ModalBinArrayName_Key); - auto action = std::make_unique(DataType::float32, tupleSize, destinationAttributeMatrixValue.createChildPath(arrayPath)); + auto action = std::make_unique(dataType, tupleSize, destinationAttributeMatrixValue.createChildPath(arrayPath)); actions.appendAction(std::move(action)); } } @@ -203,7 +208,8 @@ Parameters ComputeArrayStatisticsFilter::parameters() const params.insert( std::make_unique(k_UseFullRange_Key, "Use Full Range for Histogram", "If true, ignore min and max and use min and max from array upon which histogram is computed", false)); params.insert(std::make_unique(k_NumBins_Key, "Number of Bins", "Number of bins in histogram", 1)); - params.insert(std::make_unique(k_HistogramArrayName_Key, "Histogram Array Name", "The name of the histogram array", "Histogram")); + params.insert(std::make_unique(k_HistoBinCountName_Key, "Histogram Bin Counts Array Name", "The name of the histogram bin counts array", "Histogram Bin Counts")); + params.insert(std::make_unique(k_HistoBinRangeName_Key, "Histogram Bin Ranges Array Name", "The name of the histogram bin ranges array", "Histogram Bin Ranges")); params.insert(std::make_unique(k_MostPopulatedBinArrayName_Key, "Most Populated Bin Array Name", "The name of the Most Populated Bin array", "Most Populated Bin")); params.insert(std::make_unique(k_FindModalBinRanges_Key, "Find Modal Histogram Bin Ranges", "Whether to compute the histogram bin ranges that contain the mode values. This option requires that \" Find Mode \" is turned on.", false)); @@ -258,7 +264,8 @@ Parameters ComputeArrayStatisticsFilter::parameters() const "NumUniqueValues")); // Associate the Linkable Parameter(s) to the children parameters that they control - params.linkParameters(k_FindHistogram_Key, k_HistogramArrayName_Key, true); + params.linkParameters(k_FindHistogram_Key, k_HistoBinCountName_Key, true); + params.linkParameters(k_FindHistogram_Key, k_HistoBinRangeName_Key, true); params.linkParameters(k_FindHistogram_Key, k_UseFullRange_Key, true); params.linkParameters(k_FindHistogram_Key, k_NumBins_Key, true); params.linkParameters(k_FindHistogram_Key, k_MinRange_Key, true); @@ -312,7 +319,6 @@ IFilter::PreflightResult ComputeArrayStatisticsFilter::preflightImpl(const DataS auto pMaskArrayPathValue = filterArgs.value(k_MaskArrayPath_Key); auto pDestinationAttributeMatrixValue = filterArgs.value(k_DestinationAttributeMatrixPath_Key); - PreflightResult preflightResult; Result resultOutputActions; std::vector preflightUpdatedValues; @@ -442,7 +448,8 @@ Result<> ComputeArrayStatisticsFilter::executeImpl(DataStructure& dataStructure, inputValues.MaskArrayPath = filterArgs.value(k_MaskArrayPath_Key); inputValues.DestinationAttributeMatrix = filterArgs.value(k_DestinationAttributeMatrixPath_Key); inputValues.FeatureHasDataArrayName = inputValues.DestinationAttributeMatrix.createChildPath(filterArgs.value(k_FeatureHasDataArrayName_Key)); - inputValues.HistogramArrayName = inputValues.DestinationAttributeMatrix.createChildPath(filterArgs.value(k_HistogramArrayName_Key)); + inputValues.BinCountsArrayName = inputValues.DestinationAttributeMatrix.createChildPath(filterArgs.value(k_HistoBinCountName_Key)); + inputValues.BinRangesArrayName = inputValues.DestinationAttributeMatrix.createChildPath(filterArgs.value(k_HistoBinRangeName_Key)); inputValues.MostPopulatedBinArrayName = inputValues.DestinationAttributeMatrix.createChildPath(filterArgs.value(k_MostPopulatedBinArrayName_Key)); inputValues.ModalBinArrayName = inputValues.DestinationAttributeMatrix.createChildPath(filterArgs.value(k_ModalBinArrayName_Key)); inputValues.LengthArrayName = inputValues.DestinationAttributeMatrix.createChildPath(filterArgs.value(k_LengthArrayName_Key)); @@ -520,7 +527,7 @@ Result ComputeArrayStatisticsFilter::FromSIMPLJson(const nlohmann::js results.push_back(SIMPLConversion::ConvertParameter(args, json, SIMPL::k_MaskArrayPathKey, k_MaskArrayPath_Key)); results.push_back( SIMPLConversion::ConvertParameter(args, json, SIMPL::k_DestinationAttributeMatrixKey, k_DestinationAttributeMatrixPath_Key)); - results.push_back(SIMPLConversion::ConvertParameter(args, json, SIMPL::k_HistogramArrayNameKey, k_HistogramArrayName_Key)); + results.push_back(SIMPLConversion::ConvertParameter(args, json, SIMPL::k_HistogramArrayNameKey, k_HistoBinCountName_Key)); results.push_back(SIMPLConversion::ConvertParameter(args, json, SIMPL::k_LengthArrayNameKey, k_LengthArrayName_Key)); results.push_back(SIMPLConversion::ConvertParameter(args, json, SIMPL::k_MinimumArrayNameKey, k_MinimumArrayName_Key)); results.push_back(SIMPLConversion::ConvertParameter(args, json, SIMPL::k_MaximumArrayNameKey, k_MaximumArrayName_Key)); diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeArrayStatisticsFilter.hpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeArrayStatisticsFilter.hpp index 3392ccdd27..ede8ca2280 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeArrayStatisticsFilter.hpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeArrayStatisticsFilter.hpp @@ -48,7 +48,8 @@ class SIMPLNXCORE_EXPORT ComputeArrayStatisticsFilter : public IFilter static inline constexpr StringLiteral k_FeatureHasDataArrayName_Key = "feature_has_data_array_name"; static inline constexpr StringLiteral k_MaskArrayPath_Key = "mask_array_path"; static inline constexpr StringLiteral k_DestinationAttributeMatrixPath_Key = "destination_attribute_matrix_path"; - static inline constexpr StringLiteral k_HistogramArrayName_Key = "histogram_array_name"; + static inline constexpr StringLiteral k_HistoBinCountName_Key = "histogram_bin_count_name"; + static inline constexpr StringLiteral k_HistoBinRangeName_Key = "histogram_bin_range_name"; static inline constexpr StringLiteral k_MostPopulatedBinArrayName_Key = "most_populated_bin_array_name"; static inline constexpr StringLiteral k_ModalBinArrayName_Key = "modal_bin_array_name"; static inline constexpr StringLiteral k_LengthArrayName_Key = "length_array_name"; diff --git a/src/Plugins/SimplnxCore/test/ComputeArrayHistogramTest.cpp b/src/Plugins/SimplnxCore/test/ComputeArrayHistogramTest.cpp index 877686132c..7e2f600c66 100644 --- a/src/Plugins/SimplnxCore/test/ComputeArrayHistogramTest.cpp +++ b/src/Plugins/SimplnxCore/test/ComputeArrayHistogramTest.cpp @@ -14,12 +14,22 @@ using namespace nx::core; namespace { constexpr float64 k_max_difference = 0.0001; +constexpr StringLiteral k_BinRangesSuffix = " Ranges"; +constexpr StringLiteral k_BinCountsSuffix = " Counts"; +constexpr StringLiteral k_Array0Name = "array0"; +constexpr StringLiteral k_Array1Name = "array1"; +constexpr StringLiteral k_Array2Name = "array2"; -void compareHistograms(const DataArray& calulated, const std::array& actual) +template +void compareHistograms(const AbstractDataStore& calculated, const std::array& actual) { - for(int32 i = 0; i < actual.size(); i++) + if(calculated.getSize() != actual.size()) { - float64 diff = std::fabs(calulated[i] - actual[i]); + throw std::runtime_error("Improper sizing of DataStore"); + } + for(int32 i = 0; i < N; i++) + { + T diff = std::fabs(calculated[i] - actual[i]); REQUIRE(diff < ::k_max_difference); } } @@ -44,10 +54,10 @@ TEST_CASE("SimplnxCore::ComputeArrayHistogram: Valid Filter Execution", "[Simpln Arguments args; // load vector with data paths for test - ::fillArray(*DataArray::CreateWithStore>(dataStruct, "array0", {static_cast(4)}, {static_cast(3)}), + ::fillArray(*DataArray::CreateWithStore>(dataStruct, k_Array0Name, {static_cast(4)}, {static_cast(3)}), {0.0, 5.5, 8.5, 9.2, 16.7, 907.3, 5.0, 6.9, 83.7387483, -56.8, 3.7, -4.9}); - ::fillArray(*DataArray::CreateWithStore>(dataStruct, "array1", {static_cast(4)}, {static_cast(3)}), {56, 82, 46, 93, 73, 57, 24, 32, -90, -35, 74, -19}); - ::fillArray(*DataArray::CreateWithStore>(dataStruct, "array2", {static_cast(4)}, {static_cast(3)}), {83, 93, 75, 67, 8977, 56, 48, 92, 57, 34, 34, 34}); + ::fillArray(*DataArray::CreateWithStore>(dataStruct, k_Array1Name, {static_cast(4)}, {static_cast(3)}), {56, 82, 46, 93, 73, 57, 24, 32, -90, -35, 74, -19}); + ::fillArray(*DataArray::CreateWithStore>(dataStruct, k_Array2Name, {static_cast(4)}, {static_cast(3)}), {83, 93, 75, 67, 8977, 56, 48, 92, 57, 34, 34, 34}); std::vector dataPaths = dataStruct.getAllDataPaths(); auto parentPath = dataPaths[0].getParent(); @@ -59,7 +69,8 @@ TEST_CASE("SimplnxCore::ComputeArrayHistogram: Valid Filter Execution", "[Simpln args.insertOrAssign(ComputeArrayHistogramFilter::k_CreateNewDataGroup_Key, std::make_any(true)); args.insertOrAssign(ComputeArrayHistogramFilter::k_SelectedArrayPaths_Key, std::make_any(dataPaths)); args.insertOrAssign(ComputeArrayHistogramFilter::k_NewDataGroupPath_Key, std::make_any(dataGPath)); - args.insertOrAssign(ComputeArrayHistogramFilter::k_HistoName_Key, std::make_any("Histogram")); + args.insertOrAssign(ComputeArrayHistogramFilter::k_HistoBinRangeName_Key, std::make_any(std::string{::k_BinRangesSuffix})); + args.insertOrAssign(ComputeArrayHistogramFilter::k_HistoBinCountName_Key, std::make_any(std::string{::k_BinCountsSuffix})); // Preflight the filter and check result auto preflightResult = filter.preflight(dataStruct, args); @@ -69,32 +80,28 @@ TEST_CASE("SimplnxCore::ComputeArrayHistogram: Valid Filter Execution", "[Simpln auto executeResult = filter.execute(dataStruct, args); SIMPLNX_RESULT_REQUIRE_VALID(executeResult.result); - // load vector with child paths from filter - std::vector createdDataPaths; - for(auto& selectedArrayPath : dataPaths) // regenerate based on preflight { - const auto& dataArray = dataStruct.getDataAs(selectedArrayPath); - auto childPath = dataGPath.createChildPath((dataArray->getName() + "Histogram")); - createdDataPaths.push_back(childPath); + std::array binRangesSet = {-56.8, 184.475, 425.75, 667.025, 908.3}; + std::array binCountsSet = {11, 0, 0, 1}; + const std::string name = k_Array0Name; + + compareHistograms(dataStruct.getDataAs(dataGPath.createChildPath((name + std::string{k_BinRangesSuffix})))->getDataStoreRef(), binRangesSet); + compareHistograms(dataStruct.getDataAs(dataGPath.createChildPath((name + std::string{k_BinCountsSuffix})))->getDataStoreRef(), binCountsSet); } + { + std::array binRangesSet = {-90, -44, 2, 48, 94}; + std::array binCountsSet = {1, 2, 3, 6}; + const std::string name = k_Array1Name; - std::array array0HistogramSet = {183.725, 11, 425.25, 0, 666.775, 0, 908.3, 1}; - std::array array1HistogramSet = {-44.75, 1, 1.5, 2, 47.75, 3, 94, 6}; - std::array array2HistogramSet = {2269.25, 11, 4505.5, 0, 6741.75, 0, 8978, 1}; - for(const auto& child : createdDataPaths) + compareHistograms(dataStruct.getDataAs(dataGPath.createChildPath((name + std::string{k_BinRangesSuffix})))->getDataStoreRef(), binRangesSet); + compareHistograms(dataStruct.getDataAs(dataGPath.createChildPath((name + std::string{k_BinCountsSuffix})))->getDataStoreRef(), binCountsSet); + } { - auto& dataArray = dataStruct.getDataRefAs>(child); - if(dataArray.getName().find("array0") != std::string::npos) - { - compareHistograms(dataArray, array0HistogramSet); - } - else if(dataArray.getName().find("array1") != std::string::npos) - { - compareHistograms(dataArray, array1HistogramSet); - } - else if(dataArray.getName().find("array2") != std::string::npos) - { - compareHistograms(dataArray, array2HistogramSet); - } + std::array binRangesSet = {34, 2270, 4506, 6742, 8978}; + std::array binCountsSet = {11, 0, 0, 1}; + const std::string name = k_Array2Name; + + compareHistograms(dataStruct.getDataAs(dataGPath.createChildPath((name + std::string{k_BinRangesSuffix})))->getDataStoreRef(), binRangesSet); + compareHistograms(dataStruct.getDataAs(dataGPath.createChildPath((name + std::string{k_BinCountsSuffix})))->getDataStoreRef(), binCountsSet); } } diff --git a/src/Plugins/SimplnxCore/test/ComputeArrayStatisticsTest.cpp b/src/Plugins/SimplnxCore/test/ComputeArrayStatisticsTest.cpp index 1de661452e..33f7eec76d 100644 --- a/src/Plugins/SimplnxCore/test/ComputeArrayStatisticsTest.cpp +++ b/src/Plugins/SimplnxCore/test/ComputeArrayStatisticsTest.cpp @@ -94,7 +94,7 @@ TEST_CASE("SimplnxCore::ComputeArrayStatisticsFilter: Test Algorithm", "[Simplnx args.insertOrAssign(ComputeArrayStatisticsFilter::k_CellFeatureIdsArrayPath_Key, std::make_any()); args.insertOrAssign(ComputeArrayStatisticsFilter::k_MaskArrayPath_Key, std::make_any(DataPath({"TestData", "Mask"}))); args.insertOrAssign(ComputeArrayStatisticsFilter::k_DestinationAttributeMatrixPath_Key, std::make_any(statsDataPath)); - args.insertOrAssign(ComputeArrayStatisticsFilter::k_HistogramArrayName_Key, std::make_any(histogram)); + args.insertOrAssign(ComputeArrayStatisticsFilter::k_HistoBinCountName_Key, std::make_any(histogram)); args.insertOrAssign(ComputeArrayStatisticsFilter::k_MostPopulatedBinArrayName_Key, std::make_any(mostPopulatedBin)); args.insertOrAssign(ComputeArrayStatisticsFilter::k_ModalBinArrayName_Key, std::make_any(modalBinRanges)); args.insertOrAssign(ComputeArrayStatisticsFilter::k_LengthArrayName_Key, std::make_any(length)); @@ -146,7 +146,7 @@ TEST_CASE("SimplnxCore::ComputeArrayStatisticsFilter: Test Algorithm", "[Simplnx REQUIRE(histArray != nullptr); auto* mostPopulatedBinArray = dataStructure.getDataAs(statsDataPath.createChildPath(mostPopulatedBin)); REQUIRE(mostPopulatedBinArray != nullptr); - auto* modalBinRangesArray = dataStructure.getDataAs>(statsDataPath.createChildPath(modalBinRanges)); + auto* modalBinRangesArray = dataStructure.getDataAs>(statsDataPath.createChildPath(modalBinRanges)); REQUIRE(modalBinRangesArray != nullptr); auto* numUniqueValuesArray = dataStructure.getDataAs(statsDataPath.createChildPath(numUniqueValues)); REQUIRE(numUniqueValuesArray != nullptr); @@ -169,8 +169,8 @@ TEST_CASE("SimplnxCore::ComputeArrayStatisticsFilter: Test Algorithm", "[Simplnx REQUIRE(modeVals.size() == 1); REQUIRE(modeVals[0] == 1); REQUIRE(modalBinRangesVals.size() == 2); - REQUIRE(std::fabs(modalBinRangesVals[0] - 1.0f) < UnitTest::EPSILON); - REQUIRE(std::fabs(modalBinRangesVals[1] - 9.8f) < UnitTest::EPSILON); + REQUIRE(modalBinRangesVals[0] == 1); + REQUIRE(modalBinRangesVals[1] == 10); REQUIRE(std::fabs(meanVal - 14.3333f) < UnitTest::EPSILON); REQUIRE(std::fabs(medianVal - 10.0f) < UnitTest::EPSILON); REQUIRE(std::fabs(stdVal - 13.02f) < UnitTest::EPSILON); @@ -299,7 +299,7 @@ TEST_CASE("SimplnxCore::ComputeArrayStatisticsFilter: Test Algorithm By Index", args.insertOrAssign(ComputeArrayStatisticsFilter::k_CellFeatureIdsArrayPath_Key, std::make_any(DataPath({"TestData", "FeatureIds"}))); args.insertOrAssign(ComputeArrayStatisticsFilter::k_MaskArrayPath_Key, std::make_any(DataPath({"TestData", "Mask"}))); args.insertOrAssign(ComputeArrayStatisticsFilter::k_DestinationAttributeMatrixPath_Key, std::make_any(statsDataPath)); - args.insertOrAssign(ComputeArrayStatisticsFilter::k_HistogramArrayName_Key, std::make_any(histogram)); + args.insertOrAssign(ComputeArrayStatisticsFilter::k_HistoBinCountName_Key, std::make_any(histogram)); args.insertOrAssign(ComputeArrayStatisticsFilter::k_MostPopulatedBinArrayName_Key, std::make_any(mostPopulatedBin)); args.insertOrAssign(ComputeArrayStatisticsFilter::k_ModalBinArrayName_Key, std::make_any(modalBinRanges)); args.insertOrAssign(ComputeArrayStatisticsFilter::k_LengthArrayName_Key, std::make_any(length)); @@ -362,7 +362,7 @@ TEST_CASE("SimplnxCore::ComputeArrayStatisticsFilter: Test Algorithm By Index", auto* mostPopulatedBinArray = dataStructure.getDataAs(statsDataPath.createChildPath(mostPopulatedBin)); REQUIRE(mostPopulatedBinArray != nullptr); REQUIRE(mostPopulatedBinArray->getNumberOfTuples() == 3); - auto* modalBinRangesArray = dataStructure.getDataAs>(statsDataPath.createChildPath(modalBinRanges)); + auto* modalBinRangesArray = dataStructure.getDataAs>(statsDataPath.createChildPath(modalBinRanges)); REQUIRE(modalBinRangesArray != nullptr); REQUIRE(modalBinRangesArray->getNumberOfTuples() == 3); auto* numUniqueValuesArray = dataStructure.getDataAs(statsDataPath.createChildPath(numUniqueValues)); @@ -491,15 +491,15 @@ TEST_CASE("SimplnxCore::ComputeArrayStatisticsFilter: Test Algorithm By Index", REQUIRE((*mostPopulatedBinArray)[3] == 2); REQUIRE((*mostPopulatedBinArray)[4] == 0); REQUIRE((*mostPopulatedBinArray)[5] == 2); - REQUIRE(std::fabs(modalBinRange0[0] - 1.0f) < UnitTest::EPSILON); - REQUIRE(std::fabs(modalBinRange0[1] - 15.4f) < UnitTest::EPSILON); - REQUIRE(std::fabs(modalBinRange0[2] - 58.6f) < UnitTest::EPSILON); - REQUIRE(std::fabs(modalBinRange0[3] - 73.0f) < UnitTest::EPSILON); - REQUIRE(std::fabs(modalBinRange1[0] - 17.0f) < UnitTest::EPSILON); - REQUIRE(std::fabs(modalBinRange1[1] - 20.0f) < UnitTest::EPSILON); - REQUIRE(std::fabs(modalBinRange2[0] - 10.0f) < UnitTest::EPSILON); - REQUIRE(std::fabs(modalBinRange2[1] - 12.4f) < UnitTest::EPSILON); - REQUIRE(std::fabs(modalBinRange2[2] - 19.6f) < UnitTest::EPSILON); - REQUIRE(std::fabs(modalBinRange2[3] - 22.0f) < UnitTest::EPSILON); + REQUIRE(modalBinRange0[0] == 1); + REQUIRE(modalBinRange0[1] == 15); + REQUIRE(modalBinRange0[2] == 59); + REQUIRE(modalBinRange0[3] == 74); + REQUIRE(modalBinRange1[0] == 17); + REQUIRE(modalBinRange1[1] == 21); + REQUIRE(modalBinRange2[0] == 10); + REQUIRE(modalBinRange2[1] == 12); + REQUIRE(modalBinRange2[2] == 20); + REQUIRE(modalBinRange2[3] == 23); } } diff --git a/src/simplnx/DataStructure/IDataStore.hpp b/src/simplnx/DataStructure/IDataStore.hpp index 7f40eb193f..0085fef87c 100644 --- a/src/simplnx/DataStructure/IDataStore.hpp +++ b/src/simplnx/DataStructure/IDataStore.hpp @@ -73,6 +73,15 @@ class SIMPLNX_EXPORT IDataStore return getNumberOfTuples() * getNumberOfComponents(); } + /** + * @brief Returns the number of values stored within the DataStore. + * @return usize + */ + usize size() const + { + return getSize(); + } + /** * @brief Resizes the DataStore to handle the specified number of tuples. * @param numTuples diff --git a/src/simplnx/Utilities/HistogramUtilities.hpp b/src/simplnx/Utilities/HistogramUtilities.hpp new file mode 100644 index 0000000000..0316045d8f --- /dev/null +++ b/src/simplnx/Utilities/HistogramUtilities.hpp @@ -0,0 +1,354 @@ +#pragma once + +#include "simplnx/simplnx_export.hpp" + +#include "simplnx/Common/Result.hpp" +#include "simplnx/DataStructure/IDataArray.hpp" + +namespace nx::core::HistogramUtilities +{ +namespace serial +{ +template +float32 CalculateIncrement(Type min, Type max, int32 numBins) +{ + return static_cast(max - min) / static_cast(numBins); +} + +/** + * @function FillBinRange + * @brief This function fills a container that is STL compatible and has a bracket operator defined with the bin ranges in the following pattern: + * bin_ranges = {minimum, maximum, next maximum, ...} with the format being that the bin's range is defined by bin_ranges[bin_index] <= X < bin_ranges[bin_index + 1] + * @tparam Type this the end type of the function in that the container and data values are of this type + * @tparam Container this is the type of object the ranges are loaded into, !!! It is expected that this class is STL compatible nd has a defined `[]` operator !!! + * @param outputContainer this is the object that the ranges will be loaded into. ASSUMPTION: size is >= numBins + 1 !!! NO Bounds Check!!! + * @param rangeMinMax this is assumed to be the inclusive minimum value and exclusive maximum value for the overall histogram bins. FORMAT: [minimum, maximum) + * @param numBins this is the total number of bin ranges being calculated and by extension the indexing value for the ranges + * @param increment this is the uniform size of the bins + */ +template +void FillBinRanges(Container& outputContainer, const std::pair& rangeMinMax, const int32 numBins, const float32 increment) +{ + // WARNING: No bounds checking for type compatibility, it is expected to be done higher up where the type is not abstracted + // EXPECTED CONTAINER SIZE: numBins + 1 + + if(numBins == 1) // if one bin, just set the range to the inputs + { + outputContainer[0] = rangeMinMax.first; + outputContainer[1] = rangeMinMax.second; + return; + } + + // iterate through loading the middle values of the sequence considering `lower bound inclusive, upper bound exclusive` + for(int32 i = 0; i < numBins; i++) + { + outputContainer[i] = rangeMinMax.first + static_cast(increment * i); + } + + outputContainer[numBins] = rangeMinMax.second; +} + +/** + * @function FillBinRange + * @brief This overload is provided in the case the bin size is not provided and therefore must be calculated - see above overload for more detail on functionality + * @tparam Type this the end type of the function in that the container and data values are of this type + * @tparam Container this is the type of object the ranges are loaded into, !!! It is expected that this class is STL compatible nd has a defined `[]` operator !!! + * @param outputContainer this is the object that the ranges will be loaded into. ASSUMPTION: size is >= numBins + 1 !!! NO Bounds Check!!! + * @param rangeMinMax this is assumed to be the inclusive minimum value and exclusive maximum value for the overall histogram bins. FORMAT: [minimum, maximum) + * @param numBins this is the total number of bin ranges being calculated and by extension the indexing value for the ranges + */ +template +void FillBinRanges(Container& outputContainer, const std::pair& rangeMinMax, const int32 numBins) +{ + // DEV NOTE: this function also serves to act as a jumping off point for implementing logarithmic histograms down the line + + // Uniform Bin Sizes + const float32 increment = CalculateIncrement(rangeMinMax.first, rangeMinMax.second, numBins); + + FillBinRanges(outputContainer, rangeMinMax, numBins, increment); +} + +template +auto CalculateBin(Type value, Type min, float32 increment) +{ + if constexpr(std::is_same_v) + { + return static_cast(std::floor(static_cast(static_cast(value) - static_cast(min)) / increment)); + } + else + { + return static_cast(std::floor(static_cast(value - min) / increment)); + } +} + +/** + * @function GenerateHistogram + * @brief This function creates a uniform histogram (logarithmic possible, but not currently implemented) it fills two arrays one with the ranges for each bin and one for bin counts + * See FillBinRanges function for details on the high level structuring of the bin ranges array + * @tparam Type this the end type of the function in that it is the scalar type of the input and by extension range data + * @tparam SizeType this is the scalar type of the bin counts container + * @tparam InputContainer this is the type of object the values are read from: + * !!! In current implementation it is expected that this class is either AbstractDataStore or std::vector !!! + * @tparam RangesContainer this is the type of object the ranges are stored/written to: + * !!! In current implementation it is expected that this class is either AbstractDataStore or std::vector and whose scalar type matches Type !!! + * @tparam CountsContainer this is the type of object the counts are stored/written to: + * !!! In current implementation it is expected that this class is either AbstractDataStore or std::vector !!! + * @param inputStore this is the container holding the data that will be binned + * @param binRangesStore this is the object that the ranges will be loaded into. + * @param rangeMinMax this is assumed to be the inclusive minimum value and exclusive maximum value for the overall histogram bins. FORMAT: [minimum, maximum) + * @param shouldCancel this is an atomic value that will determine whether execution ends early; `true` cancels algorithm + * @param numBins this is the total number of bin ranges being calculated and by extension the indexing value for the ranges + * @param histogramCountsStore this is the container that will hold the counts for each bin (variable type sizing) + * @param overflow this is an atomic counter for the number of values that fall outside the bin range + */ +template +Result<> GenerateHistogram(const InputContainer& inputStore, RangesContainer& binRangesStore, const std::pair& rangeMinMax, const std::atomic_bool& shouldCancel, const int32 numBins, + CountsContainer& histogramCountsStore, std::atomic& overflow) +{ + static_assert(std::is_same_v, + "HistogramUtilities::GenerateHistogram: inputStore and binRangesStore must be of the same type. HistogramUtilities:99"); + + if(binRangesStore.size() < numBins + 1) + { + return MakeErrorResult(-23761, fmt::format("HistogramUtilities::{}: binRangesStore is too small to hold ranges. Needed: {} | Current Size: {}. {}:{}", __func__, numBins + 1, binRangesStore.size(), + __FILE__, __LINE__)); + } + if(histogramCountsStore.size() < numBins) + { + return MakeErrorResult(-23762, fmt::format("HistogramUtilities::{}: histogramCountsStore is too small to hold counts. Needed: {} | Current Size: {}. {}:{}", __func__, numBins, + histogramCountsStore.size(), __FILE__, __LINE__)); + } + + const float32 increment = CalculateIncrement(rangeMinMax.first, rangeMinMax.second, numBins); + + // Fill Bins + FillBinRanges(binRangesStore, rangeMinMax, numBins, increment); + + for(usize i = 0; i < inputStore.size(); i++) + { + if(shouldCancel) + { + return MakeErrorResult(-23763, fmt::format("HistogramUtilities::{}: Signal Interrupt Received. {}:{}", __func__, __FILE__, __LINE__)); + } + const auto bin = CalculateBin(inputStore[i], rangeMinMax.first, increment); + if((bin >= 0) && (bin < numBins)) + { + histogramCountsStore[bin]++; + } + else + { + overflow++; + } + } + + if(overflow > 0) + { + return MakeWarningVoidResult(-23764, fmt::format("HistogramUtilities::{}: Overflow detected: overflow count {}. {}:{}", __func__, overflow.load(), __FILE__, __LINE__)); + } + + return {}; +} + +/** + * @function GenerateHistogram + * @brief [Runs over specific component] This function creates a uniform histogram (logarithmic possible, but not currently implemented) it fills two arrays, + * one with the ranges for each bin and one for bin counts + * See FillBinRanges function for details on the high level structuring of the bin ranges array + * @tparam Type this the end type of the function in that it is the scalar type of the input and by extension range data + * @tparam RangesContainer this is the type of object the ranges are stored/written to: + * !!! In current implementation it is expected that this class is either AbstractDataStore or std::vector and whose scalar type matches Type !!! + * @tparam CountsContainer this is the type of object the counts are stored/written to: + * !!! In current implementation it is expected that this class is either AbstractDataStore or std::vector !!! + * @param inputStore this is the container holding the data that will be binned + * @param binRangesStore this is the object that the ranges will be loaded into. + * @param rangeMinMax this is assumed to be the inclusive minimum value and exclusive maximum value for the overall histogram bins. FORMAT: [minimum, maximum) + * @param shouldCancel this is an atomic value that will determine whether execution ends early; `true` cancels algorithm + * @param numBins this is the total number of bin ranges being calculated and by extension the indexing value for the ranges + * @param histogramCountsStore this is the container that will hold the counts for each bin (variable type sizing) + * @param overflow this is an atomic counter for the number of values that fall outside the bin range + */ +template +Result<> GenerateHistogramAtComponent(const AbstractDataStore& inputStore, RangesContainer& binRangesStore, const std::pair& rangeMinMax, const std::atomic_bool& shouldCancel, + const int32 numBins, CountsContainer& histogramCountsStore, std::atomic& overflow, usize componentIndex) +{ + static_assert(std::is_same_v, + "HistogramUtilities::GenerateHistogramAtComponent: inputStore and binRangesStore must be of the same type. HistogramUtilities:163"); + + usize numComp = inputStore.getNumberOfComponents(); + if(componentIndex > numComp) + { + return MakeErrorResult(-23765, fmt::format("HistogramUtilities::{}: supplied component index is larger than component size of input array. Needed: x < {} | Currently: {}. {}:{}", __func__, + numComp, componentIndex, __FILE__, __LINE__)); + } + + if(binRangesStore.size() < numBins + 1) + { + return MakeErrorResult(-23761, fmt::format("HistogramUtilities::{}: binRangesStore is too small to hold ranges. Needed: {} | Current Size: {}. {}:{}", __func__, numBins + 1, binRangesStore.size(), + __FILE__, __LINE__)); + } + if(histogramCountsStore.size() < numBins) + { + return MakeErrorResult(-23762, fmt::format("HistogramUtilities::{}: histogramCountsStore is too small to hold counts. Needed: {} | Current Size: {}. {}:{}", __func__, numBins, + histogramCountsStore.size(), __FILE__, __LINE__)); + } + + const float32 increment = CalculateIncrement(rangeMinMax.first, rangeMinMax.second, numBins); + + // Fill Bins + FillBinRanges(binRangesStore, rangeMinMax, numBins, increment); + + for(usize i = 0; i < inputStore.getNumberOfTuples(); i++) + { + if(shouldCancel) + { + return MakeErrorResult(-23763, fmt::format("HistogramUtilities::{}: Signal Interrupt Received. {}:{}", __func__, __FILE__, __LINE__)); + } + const auto bin = CalculateBin(inputStore[i * numComp + componentIndex], rangeMinMax.first, increment); + if((bin >= 0) && (bin < numBins)) + { + histogramCountsStore[bin]++; + } + else + { + overflow++; + } + } + + if(overflow > 0) + { + return MakeWarningVoidResult(-23764, fmt::format("HistogramUtilities::{}: Overflow detected: overflow count {}. {}:{}", __func__, overflow.load(), __FILE__, __LINE__)); + } + + return {}; +} + +/** + * @class GenerateHistogramFunctor + * @brief This is a compatibility functor that leverages existing typecasting functions to execute GenerateHistogram() cleanly. In it there are two + * definitions for the `()` operator that allows for implicit calculation of range, predicated whether a range is passed in or not + */ +struct GenerateHistogramFunctor +{ + template + Result<> operator()(const IDataArray* inputArray, IDataArray* binRangesArray, ArgsT&&... args) const + { + const auto& inputStore = inputArray->template getIDataStoreRefAs>(); + + auto minMax = std::minmax_element(inputStore.begin(), inputStore.end()); + + return GenerateHistogram(inputStore, binRangesArray->template getIDataStoreRefAs>(), std::make_pair(*minMax.first, *minMax.second + static_cast(1.0)), + std::forward(args)...); + } + + template + Result<> operator()(const IDataArray* inputArray, IDataArray* binRangesArray, std::pair&& rangeMinMax, ArgsT&&... args) const + { + const auto& inputStore = inputArray->template getIDataStoreRefAs>(); + + // check range ordering : should be min, max + if(rangeMinMax.first > rangeMinMax.second) + { + return MakeErrorResult(-23760, fmt::format("GenerateHistogramFunctor::{}: The range min value is larger than the max value. Min value: {} | Max Value: {}. {}:{}", __func__, rangeMinMax.first, + rangeMinMax.second, __FILE__, __LINE__)); + } + + return GenerateHistogram(inputStore, binRangesArray->template getIDataStoreRefAs>(), + std::make_pair(static_cast(rangeMinMax.first), static_cast(rangeMinMax.second)), std::forward(args)...); + } +}; +} // namespace serial + +namespace concurrent +{ +/** + * @class GenerateHistogramImpl + * @brief This class is a pseudo-wrapper for the serial::GenerateHistogram, the reason for this class' existence is to hold/define ownership of objects in each thread + * @tparam Type this the end type of the function in that the container and data values are of this type + * @tparam SizeType this is the scalar type of the bin counts container + */ +template +class GenerateHistogramImpl +{ +public: + /** + * @function constructor + * @brief This constructor requires a defined range and creates the object + * @param inputStore this is the AbstractDataStore holding the data that will be binned + * @param binRangesStore this is the AbstractDataStore that the ranges will be loaded into. + * @param rangeMinMax this is assumed to be the inclusive minimum value and exclusive maximum value for the overall histogram bins. FORMAT: [minimum, maximum) + * @param shouldCancel this is an atomic value that will determine whether execution ends early + * @param numBins this is the total number of bin ranges being calculated and by extension the indexing value for the ranges + * @param histogramStore this is the AbstractDataStore that will hold the counts for each bin (variable type sizing) + * @param overflow this is an atomic counter for the number of values that fall outside the bin range + */ + GenerateHistogramImpl(const AbstractDataStore& inputStore, AbstractDataStore& binRangesStore, std::pair&& rangeMinMax, const std::atomic_bool& shouldCancel, + const int32 numBins, AbstractDataStore& histogramStore, std::atomic& overflow) + : m_InputStore(inputStore) + , m_ShouldCancel(shouldCancel) + , m_NumBins(numBins) + , m_BinRangesStore(binRangesStore) + , m_HistogramStore(histogramStore) + , m_Overflow(overflow) + { + m_Range = std::make_pair(static_cast(rangeMinMax.first), static_cast(rangeMinMax.second)); + } + + /** + * @function constructor + * @brief This constructor constructs the object then calculates and stores the range implicitly + * @param inputStore this is the AbstractDataStore holding the data that will be binned + * @param binRangesStore this is the AbstractDataStore that the ranges will be loaded into. + * @param shouldCancel this is an atomic value that will determine whether execution ends early + * @param numBins this is the total number of bin ranges being calculated and by extension the indexing value for the ranges + * @param histogramStore this is the AbstractDataStore that will hold the counts for each bin (variable type sizing) + * @param overflow this is an atomic counter for the number of values that fall outside the bin range + */ + GenerateHistogramImpl(const AbstractDataStore& inputStore, AbstractDataStore& binRangesStore, const std::atomic_bool& shouldCancel, const int32 numBins, + AbstractDataStore& histogramStore, std::atomic& overflow) + : m_InputStore(inputStore) + , m_ShouldCancel(shouldCancel) + , m_NumBins(numBins) + , m_BinRangesStore(binRangesStore) + , m_HistogramStore(histogramStore) + , m_Overflow(overflow) + { + auto minMax = std::minmax_element(m_InputStore.begin(), m_InputStore.end()); + m_Range = std::make_pair(*minMax.first, *minMax.second + static_cast(1.0)); + } + + ~GenerateHistogramImpl() = default; + + /** + * @function operator() + * @brief This function serves as the execute method + */ + void operator()() const + { + serial::GenerateHistogram(m_InputStore, m_BinRangesStore, m_Range, m_ShouldCancel, m_NumBins, m_HistogramStore, m_Overflow); + } + +private: + const std::atomic_bool& m_ShouldCancel; + const int32 m_NumBins = 1; + std::pair m_Range = {static_cast(0.0), static_cast(0.0)}; + const AbstractDataStore& m_InputStore; + AbstractDataStore& m_BinRangesStore; + AbstractDataStore& m_HistogramStore; + std::atomic& m_Overflow; +}; + +/** + * @class InstantiateHistogramImplFunctor + * @brief This is a compatibility functor that leverages existing typecasting functions to create the appropriately typed GenerateHistogramImpl() cleanly. + * Designed for compatibility with the existing parallel execution classes. + */ +struct InstantiateHistogramImplFunctor +{ + template + auto operator()(const IDataArray* inputArray, IDataArray* binRangesArray, ArgsT&&... args) + { + return GenerateHistogramImpl(inputArray->template getIDataStoreRefAs>(), binRangesArray->template getIDataStoreRefAs>(), std::forward(args)...); + } +}; +} // namespace concurrent +} // namespace nx::core::HistogramUtilities diff --git a/src/simplnx/Utilities/Math/StatisticsCalculations.hpp b/src/simplnx/Utilities/Math/StatisticsCalculations.hpp index 29a306fdab..c65b8b5593 100644 --- a/src/simplnx/Utilities/Math/StatisticsCalculations.hpp +++ b/src/simplnx/Utilities/Math/StatisticsCalculations.hpp @@ -236,104 +236,57 @@ size_t findNumUniqueValues(const C& source) // ----------------------------------------------------------------------------- template