JuliaStats · davidbp · Oct 28, 2022 · Oct 28, 2022 · Oct 28, 2022 · Oct 28, 2022
diff --git a/src/Clustering.jl b/src/Clustering.jl
@@ -65,7 +65,10 @@ module Clustering
     Hclust, hclust, cutree,
 
     # MCL
-    mcl, MCLResult
+    mcl, MCLResult,
+
+    # utils
+    assign_clusters
 
     ## source files
 

diff --git a/src/utils.jl b/src/utils.jl
@@ -70,3 +70,37 @@ function updatemin!(r::AbstractArray, x::AbstractArray)
     end
     return r
 end
+
+
+"""gi
+    assign_clusters(X::AbstractMatrix{<:Real}, R::ClusteringResult; ...) -> Vector{Int}
+
+Assign the samples specified as the columns of `X` to the corresponding clusters from `R`.
+
+# Arguments
+- `X`: Input data to be clustered.
+- `R`: Fitted clustering result.
+"""
+function assign_clusters(
+    X::AbstractMatrix{T}, 
+    R::ClusteringResult, 
+    distance::SemiMetric = SqEuclidean()) where {T}
+
+    cluster_assignments = zeros(Int, size(X, 2))
+
+    Threads.@threads for n in axes(X, 2)
+        min_dist = typemax(T)
+        cluster_assignment = 0
+
+        for k in axes(R.centers, 2)
+            dist = distance(@view(X[:, n]), @view(R.centers[:, k]))
+            if dist < min_dist
+                min_dist = dist
+                cluster_assignment = k
+            end
+        end
+        cluster_assignments[n] = cluster_assignment
+    end
+
+    return cluster_assignments
+end
diff --git a/test/kmeans.jl b/test/kmeans.jl
@@ -204,4 +204,11 @@ end
     end
 end
 
+@testset "get cluster assigments" begin
+    X = rand(5, 100)
+    R = kmeans(X, 10; maxiter=200)
+    reassigned_clusters = assign_clusters(X, R);
+    @test R.assignments == reassigned_clusters
+end
+
 end