From a8950e0aeff4a320c437f9a16d3825de0a015618 Mon Sep 17 00:00:00 2001 From: Cris Luengo Date: Thu, 30 Jan 2025 14:33:24 -0700 Subject: [PATCH] Added overloads to dip::MinimumVariancePartitioning() and dip::KMeansClustering(). --- changelogs/diplib_next.md | 7 +++- dipimage/cluster.m | 2 +- dipimage/private/dip_segmentation.cpp | 12 +++---- examples/cpp/color_quantization.cpp | 26 +++++++------- examples/python/color_quantization.py | 24 +++++++++++++ include/diplib/histogram.h | 48 +++++++++++++++++++++++--- include/diplib/library/stringparams.h | 13 ++++++- include/diplib/segmentation.h | 19 +++++----- pydip/src/documentation_strings.h | 16 +++++---- pydip/src/documentation_urls.py | 6 ++-- pydip/src/histogram.cpp | 18 ++++++++-- src/histogram/threshold_algorithms.cpp | 42 +++++++++++++++++----- src/segmentation/kmeans_clustering.cpp | 1 - 13 files changed, 177 insertions(+), 57 deletions(-) create mode 100644 examples/python/color_quantization.py diff --git a/changelogs/diplib_next.md b/changelogs/diplib_next.md index 7951631b..5108a7a8 100644 --- a/changelogs/diplib_next.md +++ b/changelogs/diplib_next.md @@ -25,6 +25,11 @@ date: 2020-00-00 - `dip::ChainCode::Polygon()` has a new, optional argument to exclude border pixels from the polygon. +- The overloads for `dip::MinimumVariancePartitioning()` and `dip::KMeansClustering()` that take a histogram + as input now also have a version that take the output histogram as an argument, and return the cluster centers. + This version of `dip::KMeansClustering()` additionally has overloads that take a `dip::Random` object as input, + instead of using a default-initialized one. + ### Bug fixes - Fixed `dip::IsotropicErosion()` to use the same structuring element size as `dip::IsotropicDilation()`. @@ -43,7 +48,7 @@ date: 2020-00-00 - `dip::GetImageChainCodes()` and `dip::GetSingleChainCode()` marked a chain code as being on the image border if the end pixel was on the image border, instead of only if the step represented by the code is along the - image border (i.e. both the start and end pixels for the step are along the border). + image border. ### Updated dependencies diff --git a/dipimage/cluster.m b/dipimage/cluster.m index d69719f3..7706813f 100644 --- a/dipimage/cluster.m +++ b/dipimage/cluster.m @@ -5,7 +5,7 @@ % initialization, meaning the result can be different on subsequent runs. % Minimum variance clustering splits the image along one dimension % iteratively, creating a k-d-tree--like partitioning. The chosen split -% at each iteration is the one that most reduces the sum of variances +% at each iteration is the one that most reduces the weighted sum of variances % of the partitions. % % SYNOPSIS: diff --git a/dipimage/private/dip_segmentation.cpp b/dipimage/private/dip_segmentation.cpp index b00deda9..aa89cabe 100644 --- a/dipimage/private/dip_segmentation.cpp +++ b/dipimage/private/dip_segmentation.cpp @@ -210,7 +210,7 @@ void threshold( int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[] ) { DML_MAX_ARGS( index + 1 ); dml::MatlabInterface mi; dip::Image out = mi.NewImage(); - if(( method == "double" ) || ( method == "hysteresis" )) { + if(( method == "double" ) || ( method == dip::S::HYSTERESIS )) { dip::dfloat param1{}; dip::dfloat param2{}; if( nrhs > index ) { @@ -232,7 +232,7 @@ void threshold( int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[] ) { if( nlhs > 1 ) { plhs[ 1 ] = dml::CreateDouble2Vector( param1, param2 ); } - } else if(( method == "isodata" ) || ( method == "kmeans" ) || ( method == "gmm" )) { + } else if(( method == dip::S::ISODATA ) || ( method == dip::S::KMEANS ) || ( method == dip::S::GMM )) { dip::uint nThresholds = 1; if( nrhs > index ) { dip::dfloat parameter = dml::GetFloat( prhs[ index ] ); @@ -240,8 +240,8 @@ void threshold( int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[] ) { nThresholds = static_cast< dip::uint >( parameter ); } } - dip::FloatArray thresholds = ( method == "gmm" ) ? GaussianMixtureModelThreshold( in, mask, out, nThresholds ) - : IsodataThreshold( in, mask, out, nThresholds ); + dip::FloatArray thresholds = ( method == dip::S::GMM ) ? GaussianMixtureModelThreshold( in, mask, out, nThresholds ) + : IsodataThreshold( in, mask, out, nThresholds ); if( nlhs > 1 ) { plhs[ 1 ] = dml::GetArray( thresholds ); } @@ -274,7 +274,7 @@ void canny( mxArray* plhs[], int nrhs, const mxArray* prhs[] ) { void cornerdetector( mxArray* plhs[], int nrhs, const mxArray* prhs[] ) { DML_MAX_ARGS( 4 ); dip::Image const in = dml::GetImage( prhs[ 0 ] ); - dip::String method = ( nrhs > 1 ) ? dml::GetString( prhs[ 1 ] ) : "ShiTomasi"; + dip::String method = ( nrhs > 1 ) ? dml::GetString( prhs[ 1 ] ) : "shitomasi"; dip::ToLowerCase( method ); dip::FloatArray sigmas = ( nrhs > 2 ) ? dml::GetFloatArray( prhs[ 2 ] ) : dip::FloatArray{ 2.0 }; dml::MatlabInterface mi; @@ -298,7 +298,7 @@ void cornerdetector( mxArray* plhs[], int nrhs, const mxArray* prhs[] ) { void linedetector( mxArray* plhs[], int nrhs, const mxArray* prhs[] ) { DML_MAX_ARGS( 5 ); dip::Image const in = dml::GetImage( prhs[ 0 ] ); - dip::String method = ( nrhs > 1 ) ? dml::GetString( prhs[ 1 ] ) : "Frangi"; + dip::String method = ( nrhs > 1 ) ? dml::GetString( prhs[ 1 ] ) : "frangi"; dip::ToLowerCase( method ); dip::String polarity = ( nrhs > 4 ) ? dml::GetString( prhs[ 4 ] ) : dip::S::WHITE; dml::MatlabInterface mi; diff --git a/examples/cpp/color_quantization.cpp b/examples/cpp/color_quantization.cpp index 7c9e205b..f4b1abbd 100644 --- a/examples/cpp/color_quantization.cpp +++ b/examples/cpp/color_quantization.cpp @@ -3,6 +3,8 @@ * It displays the result using dip::viewer::ShowSimple. */ +#include + #include "diplib.h" #include "dipviewer.h" #include "diplib/display.h" @@ -11,38 +13,36 @@ #include "diplib/segmentation.h" #include "diplib/simple_file_io.h" + int main() { dip::Image input = dip::ImageRead( DIP_EXAMPLES_DIR "/DIP.tif" ); dip::viewer::ShowSimple( input, "input image" ); + constexpr dip::uint nClusters = 3; - constexpr dip::uint subsample = 4; // Compute the color histogram. - dip::Histogram hist( input, {}, { dip::Histogram::Configuration( 0.0, 255.0, 256 / subsample ) } ); + dip::Histogram hist( input, {}, { dip::Histogram::Configuration( 0.0, 255.0, 64 ) } ); // Cluster the histogram, the output histogram has a label assigned to each bin. // Each label corresponds to one of the clusters. - dip::Image histImage = hist.GetImage(); // Copy with shared data - dip::Image tmp; - dip::CoordinateArray centers = dip::MinimumVariancePartitioning( histImage, tmp, nClusters ); - std::cout << nClusters << " clusters requested, " << centers.size() << " clusters found.\n"; - histImage.Copy( tmp ); // Copy 32-bit label image into 64-bit histogram image. + dip::FloatCoordinateArray centers = dip::MinimumVariancePartitioning( hist, hist, nClusters ); // Find the cluster label for each pixel in the input image. dip::Image labels = hist.ReverseLookup( input ); dip::viewer::ShowSimple( dip::ApplyColorMap( labels, "label" ), "clusters" ); - // The `centers` array contains histogram coordinates for each of the centers. - // We need to convert these coordinates to RGB values by multiplying by 8 (=256/32). + std::cout << nClusters << " clusters requested, " << centers.size() << " clusters found:\n"; + for( dip::uint ii = 0; ii < centers.size(); ++ii ) { + std::cout << " cluster " << ii << ": " << centers[ ii ] << "\n"; + } + + // Create a lookup table with the colors and apply it to create an image with reduced number of colors. // `centers[ii]` corresponds to label `ii+1`. dip::Image lutImage( { centers.size() + 1 }, 3, dip::DT_UINT8 ); lutImage.At( 0 ) = 0; // label 0 doesn't exist for( dip::uint ii = 0; ii < centers.size(); ++ii ) { - lutImage.At( ii + 1 ) = { centers[ ii ][ 0 ] * subsample, centers[ ii ][ 1 ] * subsample, centers[ ii ][ 2 ] * subsample }; + lutImage.At( ii + 1 ) = { centers[ ii ][ 0 ], centers[ ii ][ 1 ], centers[ ii ][ 2 ] }; } - - // Finally, we apply our look-up table mapping, painting each label in the - // image with its corresponding RGB color. dip::LookupTable lut( lutImage ); dip::Image output = lut.Apply( labels ); output.SetColorSpace( "sRGB" ); diff --git a/examples/python/color_quantization.py b/examples/python/color_quantization.py new file mode 100644 index 00000000..8ed2dda3 --- /dev/null +++ b/examples/python/color_quantization.py @@ -0,0 +1,24 @@ +import diplib as dip +import numpy as np + + +input = dip.ImageRead('examples/DIP.tif') +dip.viewer.Show(input) + +# Compute histogram and cluster it +hist = dip.Histogram(input, configuration=dip.Histogram.Configuration(0.0, 255.0, 64)) +centers = dip.MinimumVariancePartitioning(hist, out=hist, nClusters=3) + +# Reverse lookup to create the segmented image +labels = hist.ReverseLookup(input) +dip.viewer.Show(labels, mapping="8bit", lut="labels") + +# A color lookup table to paint each label with the cluster center RGB values +centers.insert(0, [0.0, 0.0, 0.0]) # label #0 doesn't have an entry in centers, this aligns the labels to the indices +lut = dip.LookupTable(dip.Image(np.array(centers), tensor_axis=1)) +output = lut.Apply(labels) + +output.SetColorSpace(input.ColorSpace()) +dip.viewer.Show(output) +dip.viewer.Spin() + diff --git a/include/diplib/histogram.h b/include/diplib/histogram.h index 44d0733c..30dc6dd2 100644 --- a/include/diplib/histogram.h +++ b/include/diplib/histogram.h @@ -26,6 +26,7 @@ #include "diplib/iterators.h" #include "diplib/lookup_table.h" #include "diplib/measurement.h" +#include "diplib/random.h" /// \file @@ -965,24 +966,63 @@ DIP_EXPORT dfloat BackgroundThreshold( /// /// K-means clustering partitions the histogram into compact, similarly-weighted segments. The algorithm /// uses a random initialization, so multiple runs might yield different results. +/// See \ref KMeansClustering(Image const&, Image&, Random&, dip::uint). /// /// For 1D histograms, \ref dip::IsodataThreshold( Histogram const&, dip::uint ) is more efficient, and deterministic. -DIP_EXPORT Histogram KMeansClustering( +DIP_EXPORT FloatCoordinateArray KMeansClustering( Histogram const& in, - dip::uint nClusters = 2 + Histogram& out, + Random& random, + dip::uint nClusters ); +inline Histogram KMeansClustering( + Histogram const& in, + Random& random, + dip::uint nClusters = 2 +) { + Histogram out; + KMeansClustering( in, out, random, nClusters ); + return out; +} +/// \brief Like above, using a default-initialized \ref dip::Random object. +inline FloatCoordinateArray KMeansClustering( + Histogram const& in, + Histogram& out, + dip::uint nClusters = 2 +) { + Random random; + return KMeansClustering( in, out, random, nClusters ); +} +DIP_NODISCARD inline Histogram KMeansClustering( + Histogram const& in, + dip::uint nClusters = 2 +) { + Histogram out; + KMeansClustering( in, out, nClusters ); + return out; +} /// \brief Partitions a (multi-dimensional) histogram into `nClusters` partitions iteratively using Otsu /// thresholding along individual dimensions. /// /// Minimum variance partitioning builds a k-d tree of the histogram, where, for each node, the marginal histogram /// with the largest variance is split using Otsu thresholding. +/// See \ref MinimumVariancePartitioning(Image const&, Image&, dip::uint). /// /// For two clusters in a 1D histogram, use \ref dip::OtsuThreshold( Histogram const& ). -DIP_EXPORT Histogram MinimumVariancePartitioning( +DIP_EXPORT FloatCoordinateArray MinimumVariancePartitioning( Histogram const& in, - dip::uint nClusters = 2 + Histogram& out, + dip::uint nClusters ); +inline Histogram MinimumVariancePartitioning( + Histogram const& in, + dip::uint nClusters = 2 +) { + Histogram out; + MinimumVariancePartitioning( in, out, nClusters ); + return out; +} // diff --git a/include/diplib/library/stringparams.h b/include/diplib/library/stringparams.h index e704fbb1..62548383 100644 --- a/include/diplib/library/stringparams.h +++ b/include/diplib/library/stringparams.h @@ -41,7 +41,6 @@ constexpr char const* FIRST = "first"; constexpr char const* LAST = "last"; constexpr char const* STABLE = "stable"; constexpr char const* DIRECTIONAL = "directional"; -constexpr char const* OTSU = "otsu"; constexpr char const* INCLUDE = "include"; constexpr char const* EXCLUDE = "exclude"; constexpr char const* INTERPOLATE = "interpolate"; @@ -68,6 +67,18 @@ constexpr char const* INVERSE = "inverse"; constexpr char const* OTF = "OTF"; constexpr char const* PAD = "pad"; +// Thresholding/clustering +constexpr char const* ISODATA = "isodata"; +constexpr char const* OTSU = "otsu"; +constexpr char const* MINERROR = "minerror"; +constexpr char const* GMM = "gmm"; +constexpr char const* TRIANGLE = "triangle"; +//constexpr char const* BACKGROUND = "background"; +constexpr char const* VOLUME = "volume"; +constexpr char const* FIXED = "fixed"; +constexpr char const* HYSTERESIS = "hysteresis"; +constexpr char const* KMEANS = "kmeans"; + // Binary processing constexpr char const* BACKGROUND = "background"; constexpr char const* OBJECT = "object"; diff --git a/include/diplib/segmentation.h b/include/diplib/segmentation.h index 1668274f..d60b100e 100644 --- a/include/diplib/segmentation.h +++ b/include/diplib/segmentation.h @@ -46,7 +46,8 @@ namespace dip { /// /// K-means clustering is an iterative process with a random initialization. It is likely to get /// stuck in local minima. Repeating the clustering several times and picking the best result -/// (e.g. determined by times each cluster center is found) can be necessary. +/// (e.g. determined by times each cluster center is found) can be necessary. You can leave out +/// the \ref dip::Random` object in this function call, a default-initialized object will be used. /// /// The returned \ref dip::CoordinateArray contains the cluster centers. /// Element `i` in this array corresponds to label `i+1`. @@ -440,33 +441,33 @@ inline dfloat Threshold( String const& method = S::OTSU, dfloat parameter = infinity ) { - if( method == "isodata" ) { + if( method == S::ISODATA ) { FloatArray values = IsodataThreshold( in, mask, out, 1 ); return values[ 0 ]; } if( method == S::OTSU ) { return OtsuThreshold( in, mask, out ); } - if( method == "minerror" ) { + if( method == S::MINERROR ) { return MinimumErrorThreshold( in, mask, out ); } - if( method == "gmm" ) { + if( method == S::GMM ) { FloatArray values = GaussianMixtureModelThreshold( in, mask, out, 1 ); return values[ 0 ]; } - if( method == "triangle" ) { + if( method == S::TRIANGLE ) { return ( parameter == infinity ) ? TriangleThreshold( in, mask, out ) : TriangleThreshold( in, mask, out, parameter ); } - if( method == "background" ) { + if( method == S::BACKGROUND ) { return ( parameter == infinity ) ? BackgroundThreshold( in, mask, out ) : BackgroundThreshold( in, mask, out, parameter ); } - if( method == "volume" ) { + if( method == S::VOLUME ) { return ( parameter == infinity ) ? VolumeThreshold( in, mask, out ) : VolumeThreshold( in, mask, out, parameter ); } - if( method == "fixed" ) { + if( method == S::FIXED ) { if( parameter == infinity ) { parameter = 128.0; } @@ -521,12 +522,10 @@ DIP_EXPORT void PerObjectEllipsoidFit( Image const& in, Image& out, PerObjectEllipsoidFitParameters const& parameters - ); DIP_NODISCARD inline Image PerObjectEllipsoidFit( Image const& in, PerObjectEllipsoidFitParameters const& parameters - ) { Image out; PerObjectEllipsoidFit( in, out, parameters ); diff --git a/pydip/src/documentation_strings.h b/pydip/src/documentation_strings.h index d8a2c606..8ae43b95 100644 --- a/pydip/src/documentation_strings.h +++ b/pydip/src/documentation_strings.h @@ -898,6 +898,7 @@ constexpr char const* dip·GaussianParameters·position = "The location of the o constexpr char const* dip·GaussianParameters·amplitude = "The amplitude (value at the origin)"; constexpr char const* dip·GaussianParameters·sigma = "The sigma (width)"; constexpr char const* dip·GaussianMixtureModel·ConstSampleIteratorgtdfloatlt··SampleIteratorgtdfloatlt··dip·uint··dip·uint··dip·uint··Option·Periodicity· = "Determines the parameters for a Gaussian Mixture Model."; +constexpr char const* dip·RankFromPercentile·dfloat··dip·uint· = "Computes the rank (index into array) for a given percentile and an array of\nlength `n`."; constexpr char const* dip·Add·Image·CL·Image·CL·Image·L·DataType· = "Adds two images, sample-wise, with singleton expansion, and using saturated\narithmetic."; constexpr char const* dip·Subtract·Image·CL·Image·CL·Image·L·DataType· = "Subtracts two images, sample-wise, with singleton expansion, and using\nsaturated arithmetic."; constexpr char const* dip·Multiply·Image·CL·Image·CL·Image·L·DataType· = "Multiplies two images, pixel-wise, with singleton expansion, and using\nsaturated arithmetic."; @@ -1281,11 +1282,11 @@ constexpr char const* dip·RegressionParameters = "Represents the result of a 2D constexpr char const* dip·RegressionParameters·intercept = "intercept, $a$."; constexpr char const* dip·RegressionParameters·slope = "slope, $b$."; constexpr char const* dip·QuartilesResult = "Represents the quartiles, see `dip::Quartiles`."; -constexpr char const* dip·QuartilesResult·minimum = "Minimum (0th percentile)."; -constexpr char const* dip·QuartilesResult·lowerQuartile = "Lower or first quartile (25th percentile)."; -constexpr char const* dip·QuartilesResult·median = "Median or second quartile (50th percentile)."; -constexpr char const* dip·QuartilesResult·upperQuartile = "Upper or third quartile (75th percentile)."; -constexpr char const* dip·QuartilesResult·maximum = "Maximum (100th percentile)."; +constexpr char const* dip·QuartilesResult·minimum = "Minimum (0^th^ percentile)."; +constexpr char const* dip·QuartilesResult·lowerQuartile = "Lower or first quartile (25^th^ percentile)."; +constexpr char const* dip·QuartilesResult·median = "Median or second quartile (50^th^ percentile)."; +constexpr char const* dip·QuartilesResult·upperQuartile = "Upper or third quartile (75^th^ percentile)."; +constexpr char const* dip·QuartilesResult·maximum = "Maximum (100^th^ percentile)."; constexpr char const* dip·AlignedBuffer = "A container used to allocate 32-byte aligned buffers."; constexpr char const* dip·AlignedBuffer·AlignedBuffer = "A default-initialized buffer is empty."; constexpr char const* dip·AlignedBuffer·AlignedBuffer·dip·uint· = "A buffer of size `size`, uninitialized."; @@ -2484,8 +2485,9 @@ constexpr char const* dip·MinimumErrorThreshold·Histogram·CL = "Determines a constexpr char const* dip·GaussianMixtureModelThreshold·Histogram·CL·dip·uint· = "Determines a set of `nThresholds` thresholds by modeling the histogram with a\nGaussian Mixture Model, and choosing the optimal Bayes thresholds."; constexpr char const* dip·TriangleThreshold·Histogram·CL·dfloat· = "Determines a threshold using the using the chord method (a.k.a. skewed bi-\nmodality, maximum distance to triangle), and the image's histogram `in`."; constexpr char const* dip·BackgroundThreshold·Histogram·CL·dfloat··dfloat· = "Determines a threshold using the unimodal background-symmetry method, and the\nimage's histogram `in`."; -constexpr char const* dip·KMeansClustering·Histogram·CL·dip·uint· = "Partitions a (multi-dimensional) histogram into `nClusters` partitions using\nk-means clustering."; -constexpr char const* dip·MinimumVariancePartitioning·Histogram·CL·dip·uint· = "Partitions a (multi-dimensional) histogram into `nClusters` partitions\niteratively using Otsu thresholding along individual dimensions."; +constexpr char const* dip·KMeansClustering·Histogram·CL·Histogram·L·Random·L·dip·uint· = "Partitions a (multi-dimensional) histogram into `nClusters` partitions using\nk-means clustering."; +constexpr char const* dip·KMeansClustering·Histogram·CL·Histogram·L·dip·uint· = "Like above, using a default-initialized `dip::Random` object."; +constexpr char const* dip·MinimumVariancePartitioning·Histogram·CL·Histogram·L·dip·uint· = "Partitions a (multi-dimensional) histogram into `nClusters` partitions\niteratively using Otsu thresholding along individual dimensions."; constexpr char const* dip·EqualizationLookupTable·Histogram·CL = "Computes a lookup table that, when applied to an image with the histogram\n`in`, yields an image with a flat histogram (or rather a histogram that is as\nflat as possible)."; constexpr char const* dip·MatchingLookupTable·Histogram·CL·Histogram·CL = "Computes a lookup table that, when applied to an image with the histogram\n`in`, yields an image with a histogram as similar as possible to `example`."; constexpr char const* dip·PerObjectHistogram·Image·CL·Image·CL·Image·CL·Histogram·Configuration··String·CL·String·CL = "Computes a histogram of grey values in `grey` for each object in `label`."; diff --git a/pydip/src/documentation_urls.py b/pydip/src/documentation_urls.py index b3fb8fc5..6d098246 100644 --- a/pydip/src/documentation_urls.py +++ b/pydip/src/documentation_urls.py @@ -940,6 +940,7 @@ ('dip.GaussianParameters.amplitude', 'numeric.html#dip-GaussianParameters-amplitude'), ('dip.GaussianParameters.sigma', 'numeric.html#dip-GaussianParameters-sigma'), ('dip.GaussianMixtureModel', 'numeric.html#dip-GaussianMixtureModel-ConstSampleIterator%3Cdfloat%3E--SampleIterator%3Cdfloat%3E--dip-uint--dip-uint--dip-uint--Option-Periodicity-'), + ('dip.RankFromPercentile', 'numeric.html#dip-RankFromPercentile-dfloat--dip-uint-'), ('dip.Add', 'math_arithmetic.html#dip-Add-Image-CL-Image-CL-Image-L-DataType-'), ('dip.Subtract', 'math_arithmetic.html#dip-Subtract-Image-CL-Image-CL-Image-L-DataType-'), ('dip.Multiply', 'math_arithmetic.html#dip-Multiply-Image-CL-Image-CL-Image-L-DataType-'), @@ -2526,8 +2527,9 @@ ('dip.GaussianMixtureModelThreshold', 'histograms.html#dip-GaussianMixtureModelThreshold-Histogram-CL-dip-uint-'), ('dip.TriangleThreshold', 'histograms.html#dip-TriangleThreshold-Histogram-CL-dfloat-'), ('dip.BackgroundThreshold', 'histograms.html#dip-BackgroundThreshold-Histogram-CL-dfloat--dfloat-'), - ('dip.KMeansClustering', 'histograms.html#dip-KMeansClustering-Histogram-CL-dip-uint-'), - ('dip.MinimumVariancePartitioning', 'histograms.html#dip-MinimumVariancePartitioning-Histogram-CL-dip-uint-'), + ('dip.KMeansClustering', 'histograms.html#dip-KMeansClustering-Histogram-CL-Histogram-L-Random-L-dip-uint-'), + ('dip.KMeansClustering', 'histograms.html#dip-KMeansClustering-Histogram-CL-Histogram-L-dip-uint-'), + ('dip.MinimumVariancePartitioning', 'histograms.html#dip-MinimumVariancePartitioning-Histogram-CL-Histogram-L-dip-uint-'), ('dip.EqualizationLookupTable', 'histograms.html#dip-EqualizationLookupTable-Histogram-CL'), ('dip.MatchingLookupTable', 'histograms.html#dip-MatchingLookupTable-Histogram-CL-Histogram-CL'), ('dip.PerObjectHistogram', 'histograms.html#dip-PerObjectHistogram-Image-CL-Image-CL-Image-CL-Histogram-Configuration--String-CL-String-CL'), diff --git a/pydip/src/histogram.cpp b/pydip/src/histogram.cpp index c0252f7f..abc8d85b 100644 --- a/pydip/src/histogram.cpp +++ b/pydip/src/histogram.cpp @@ -268,10 +268,22 @@ void init_histogram( py::module& m ) { "in"_a, "sigma"_a = 4.0, doc_strings::dip·TriangleThreshold·Histogram·CL·dfloat· ); m.def( "BackgroundThreshold", py::overload_cast< dip::Histogram const&, dip::dfloat, dip::dfloat >( &dip::BackgroundThreshold ), "in"_a, "distance"_a = 2.0, "sigma"_a = 4.0, doc_strings::dip·BackgroundThreshold·Histogram·CL·dfloat··dfloat· ); - m.def( "KMeansClustering", py::overload_cast< dip::Histogram const&, dip::uint >( &dip::KMeansClustering ), - "in"_a, "nClusters"_a = 2, doc_strings::dip·KMeansClustering·Histogram·CL·dip·uint· ); + m.def( "KMeansClustering", []( dip::Histogram const& in, dip::uint nClusters ) { + return KMeansClustering( in, RandomNumberGenerator(), nClusters ); + }, + "in"_a, "nClusters"_a = 2, + "Partitions a (multi-dimensional) histogram into `nClusters` partitions using\nk-means clustering.\n" + "Like the C++ function, but using an internal `dip::Random` object." ); + m.def( "KMeansClustering", []( dip::Histogram const& in, dip::Histogram& out, dip::uint nClusters ) { + KMeansClustering( in, out, RandomNumberGenerator(), nClusters ); + }, + "in"_a, py::kw_only(), "out"_a, "nClusters"_a = 2, + "Partitions a (multi-dimensional) histogram into `nClusters` partitions using\nk-means clustering.\n" + "Like the C++ function, but using an internal `dip::Random` object." ); m.def( "MinimumVariancePartitioning", py::overload_cast< dip::Histogram const&, dip::uint >( &dip::MinimumVariancePartitioning ), - "in"_a, "nClusters"_a = 2, doc_strings::dip·MinimumVariancePartitioning·Histogram·CL·dip·uint· ); + "in"_a, "nClusters"_a = 2, doc_strings::dip·MinimumVariancePartitioning·Histogram·CL·Histogram·L·dip·uint· ); + m.def( "MinimumVariancePartitioning", py::overload_cast< dip::Histogram const&, dip::Histogram&, dip::uint >( &dip::MinimumVariancePartitioning ), + "in"_a, py::kw_only(), "out"_a, "nClusters"_a = 2, doc_strings::dip·MinimumVariancePartitioning·Histogram·CL·Histogram·L·dip·uint· ); m.def( "EqualizationLookupTable", &dip::EqualizationLookupTable, "in"_a, doc_strings::dip·EqualizationLookupTable·Histogram·CL ); m.def( "MatchingLookupTable", &dip::MatchingLookupTable, diff --git a/src/histogram/threshold_algorithms.cpp b/src/histogram/threshold_algorithms.cpp index 437fe87b..f4a60ed0 100644 --- a/src/histogram/threshold_algorithms.cpp +++ b/src/histogram/threshold_algorithms.cpp @@ -25,6 +25,7 @@ #include "diplib.h" #include "diplib/analysis.h" #include "diplib/chain_code.h" +#include "diplib/random.h" #include "diplib/segmentation.h" #include "diplib/statistics.h" @@ -35,28 +36,53 @@ namespace dip { using CountType = Histogram::CountType; -Histogram KMeansClustering( +namespace { + +// Compute nD bin centers from bin indices +FloatCoordinateArray ComputeCoordinates( Histogram const& hist, CoordinateArray const& bins ) { + dip::uint nDims = hist.Dimensionality(); + FloatCoordinateArray out( bins.size(), FloatArray( nDims, 0 ) ); + for( dip::uint ii = 0; ii < bins.size(); ++ii ) { + DIP_ASSERT( bins[ ii ].size() == nDims ); + for( dip::uint jj = 0; jj < nDims; ++jj ) { + out[ ii ][ jj ] = hist.BinCenter( bins[ ii ][ jj ], jj ); + } + } + return out; +} + +} // namespace + + +FloatCoordinateArray KMeansClustering( Histogram const& in, + Histogram& out, + Random& random, dip::uint nClusters ) { Image labs; - KMeansClustering( in.GetImage(), labs, nClusters ); - Histogram out = in.Copy(); + auto centers = KMeansClustering( in.GetImage(), labs, random, nClusters ); + if( &out != &in ) { + out = in.Copy(); + } Image tmp = out.GetImage(); // Copy with shared data tmp.Copy( labs ); - return out; + return ComputeCoordinates( out, centers ); } -Histogram MinimumVariancePartitioning( +FloatCoordinateArray MinimumVariancePartitioning( Histogram const& in, + Histogram& out, dip::uint nClusters ) { Image labs; - MinimumVariancePartitioning( in.GetImage(), labs, nClusters ); - Histogram out = in.Copy(); + auto centers = MinimumVariancePartitioning( in.GetImage(), labs, nClusters ); + if( &out != &in ) { + out = in.Copy(); + } Image tmp = out.GetImage(); // Copy with shared data tmp.Copy( labs ); - return out; + return ComputeCoordinates( out, centers ); } FloatArray IsodataThreshold( diff --git a/src/segmentation/kmeans_clustering.cpp b/src/segmentation/kmeans_clustering.cpp index 8eadaa3a..9a4307b3 100644 --- a/src/segmentation/kmeans_clustering.cpp +++ b/src/segmentation/kmeans_clustering.cpp @@ -220,7 +220,6 @@ CoordinateArray KMeansClustering( Clustering( in, out, clusters, true ); // Copy over cluster centers to output array - // This is a small amount of work, and will often be unused. But compared to the cost of the clustering, it's negligible. CoordinateArray coords( clusters.size() ); for( auto& cluster : clusters ) { coords[ cluster.label - 1 ] = UnsignedArray( cluster.mean );