From d48f0f14a51f4d404645660c7c64135c4577d9ea Mon Sep 17 00:00:00 2001 From: alexmul1114 <96952477+alexmul1114@users.noreply.github.com> Date: Mon, 8 Jan 2024 14:21:21 -0500 Subject: [PATCH 01/23] Fixes #36 (with simple MTTKRP implementation) --- src/gcp-opt.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gcp-opt.jl b/src/gcp-opt.jl index 4716326..6b3c2cf 100644 --- a/src/gcp-opt.jl +++ b/src/gcp-opt.jl @@ -144,7 +144,7 @@ function gcp_grad_U!( end function _gcp( - X::Array{TX,N}, + X::AbstractArray{TX,N}, r, loss::LeastSquaresLoss, constraints::Tuple{}, From 4596484762bb704d8f0a32ad95ddab19b9d4b1f0 Mon Sep 17 00:00:00 2001 From: alexmul1114 <96952477+alexmul1114@users.noreply.github.com> Date: Mon, 8 Jan 2024 14:40:31 -0500 Subject: [PATCH 02/23] Change to Arrays AbstractArrays --- src/gcp-opt.jl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/gcp-opt.jl b/src/gcp-opt.jl index 6b3c2cf..6fd70f5 100644 --- a/src/gcp-opt.jl +++ b/src/gcp-opt.jl @@ -2,7 +2,7 @@ # Main fitting function """ - gcp(X::Array, r, loss = LeastSquaresLoss(); + gcp(X::AbstractArray, r, loss = LeastSquaresLoss(); constraints = default_constraints(loss), algorithm = default_algorithm(X, r, loss, constraints)) -> CPD @@ -21,7 +21,7 @@ to see what losses are supported. See also: `CPD`, `AbstractLoss`. """ gcp( - X::Array, + X::AbstractArray, r, loss = LeastSquaresLoss(); constraints = default_constraints(loss), @@ -43,13 +43,13 @@ function default_constraints(loss) end # Choose default algorithm -default_algorithm(X::Array{<:Real}, r, loss::LeastSquaresLoss, constraints::Tuple{}) = +default_algorithm(X::AbstractArray{<:Real}, r, loss::LeastSquaresLoss, constraints::Tuple{}) = GCPAlgorithms.ALS() default_algorithm(X, r, loss, constraints) = GCPAlgorithms.LBFGSB() # TODO: remove this `func, grad, lower` signature # will require reworking how we do testing -_gcp(X::Array{TX,N}, r, func, grad, lower, lbfgsopts) where {TX,N} = _gcp( +_gcp(X::AbstractArray{TX,N}, r, func, grad, lower, lbfgsopts) where {TX,N} = _gcp( X, r, UserDefinedLoss(func; deriv = grad, domain = Interval(lower, +Inf)), @@ -57,7 +57,7 @@ _gcp(X::Array{TX,N}, r, func, grad, lower, lbfgsopts) where {TX,N} = _gcp( GCPAlgorithms.LBFGSB(; lbfgsopts...), ) function _gcp( - X::Array{TX,N}, + X::AbstractArray{TX,N}, r, loss, constraints::Tuple{Vararg{GCPConstraints.LowerBound}}, @@ -116,14 +116,14 @@ function _gcp( end # Objective function and gradient (w.r.t. `M.U`) -function gcp_func(M::CPD{T,N}, X::Array{TX,N}, loss) where {T,TX,N} +function gcp_func(M::CPD{T,N}, X::AbstractArray{TX,N}, loss) where {T,TX,N} return sum(value(loss, X[I], M[I]) for I in CartesianIndices(X) if !ismissing(X[I])) end function gcp_grad_U!( GU::NTuple{N,TGU}, M::CPD{T,N}, - X::Array{TX,N}, + X::AbstractArray{TX,N}, loss, ) where {T,TX,N,TGU<:AbstractMatrix{T}} Y = [ From 6e2a476d766e7fa3e8c879d7cccbd8538d39e287 Mon Sep 17 00:00:00 2001 From: alexmul1114 <96952477+alexmul1114@users.noreply.github.com> Date: Wed, 10 Jan 2024 11:00:05 -0500 Subject: [PATCH 03/23] Replace skipmissing --- src/gcp-opt.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gcp-opt.jl b/src/gcp-opt.jl index 6fd70f5..ce444e2 100644 --- a/src/gcp-opt.jl +++ b/src/gcp-opt.jl @@ -155,7 +155,7 @@ function _gcp( # Random initialization M0 = CPD(ones(T, r), rand.(T, size(X), r)) M0norm = sqrt(sum(abs2, M0[I] for I in CartesianIndices(size(M0)))) - Xnorm = sqrt(sum(abs2, skipmissing(X))) + Xnorm = sqrt(sum(x -> isnan(x) ? 0 : abs2(x), X)) for k in Base.OneTo(N) M0.U[k] .*= (Xnorm / M0norm)^(1 / N) end From 01e2ab94f26f23272aa3d6463bb477f9d7249e42 Mon Sep 17 00:00:00 2001 From: alexmul1114 <96952477+alexmul1114@users.noreply.github.com> Date: Wed, 10 Jan 2024 11:50:04 -0500 Subject: [PATCH 04/23] Use ismissing in mapreduce instead of skipmissing --- src/gcp-opt.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gcp-opt.jl b/src/gcp-opt.jl index ce444e2..bac2c0e 100644 --- a/src/gcp-opt.jl +++ b/src/gcp-opt.jl @@ -155,7 +155,7 @@ function _gcp( # Random initialization M0 = CPD(ones(T, r), rand.(T, size(X), r)) M0norm = sqrt(sum(abs2, M0[I] for I in CartesianIndices(size(M0)))) - Xnorm = sqrt(sum(x -> isnan(x) ? 0 : abs2(x), X)) + Xnorm = sqrt(mapreduce(x -> ismissing(x) ? 0 : abs2(x), +, X)) for k in Base.OneTo(N) M0.U[k] .*= (Xnorm / M0norm)^(1 / N) end From 1e6097e410cfdb0d2a3029a605d20cee6c695508 Mon Sep 17 00:00:00 2001 From: alexmul1114 <96952477+alexmul1114@users.noreply.github.com> Date: Wed, 10 Jan 2024 12:09:46 -0500 Subject: [PATCH 05/23] Add new _gcp method for CuArray --- src/gcp-opt.jl | 44 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/src/gcp-opt.jl b/src/gcp-opt.jl index bac2c0e..fa58653 100644 --- a/src/gcp-opt.jl +++ b/src/gcp-opt.jl @@ -49,7 +49,7 @@ default_algorithm(X, r, loss, constraints) = GCPAlgorithms.LBFGSB() # TODO: remove this `func, grad, lower` signature # will require reworking how we do testing -_gcp(X::AbstractArray{TX,N}, r, func, grad, lower, lbfgsopts) where {TX,N} = _gcp( +_gcp(X::Array{TX,N}, r, func, grad, lower, lbfgsopts) where {TX,N} = _gcp( X, r, UserDefinedLoss(func; deriv = grad, domain = Interval(lower, +Inf)), @@ -57,7 +57,7 @@ _gcp(X::AbstractArray{TX,N}, r, func, grad, lower, lbfgsopts) where {TX,N} = _gc GCPAlgorithms.LBFGSB(; lbfgsopts...), ) function _gcp( - X::AbstractArray{TX,N}, + X::Array{TX,N}, r, loss, constraints::Tuple{Vararg{GCPConstraints.LowerBound}}, @@ -116,14 +116,14 @@ function _gcp( end # Objective function and gradient (w.r.t. `M.U`) -function gcp_func(M::CPD{T,N}, X::AbstractArray{TX,N}, loss) where {T,TX,N} +function gcp_func(M::CPD{T,N}, X::Array{TX,N}, loss) where {T,TX,N} return sum(value(loss, X[I], M[I]) for I in CartesianIndices(X) if !ismissing(X[I])) end function gcp_grad_U!( GU::NTuple{N,TGU}, M::CPD{T,N}, - X::AbstractArray{TX,N}, + X::Array{TX,N}, loss, ) where {T,TX,N,TGU<:AbstractMatrix{T}} Y = [ @@ -144,7 +144,7 @@ function gcp_grad_U!( end function _gcp( - X::AbstractArray{TX,N}, + X::Array{TX,N}, r, loss::LeastSquaresLoss, constraints::Tuple{}, @@ -155,7 +155,39 @@ function _gcp( # Random initialization M0 = CPD(ones(T, r), rand.(T, size(X), r)) M0norm = sqrt(sum(abs2, M0[I] for I in CartesianIndices(size(M0)))) - Xnorm = sqrt(mapreduce(x -> ismissing(x) ? 0 : abs2(x), +, X)) + Xnorm = sqrt(sum(abs2, skipmissing(X))) + for k in Base.OneTo(N) + M0.U[k] .*= (Xnorm / M0norm)^(1 / N) + end + λ, U = M0.λ, collect(M0.U) + + # Inefficient but simple implementation + for _ in 1:algorithm.maxiters + for n in 1:N + V = reduce(.*, U[i]'U[i] for i in setdiff(1:N, n)) + U[n] = mttkrp(X, U, n) / V + λ = norm.(eachcol(U[n])) + U[n] = U[n] ./ permutedims(λ) + end + end + + return CPD(λ, Tuple(U)) +end + +# GPU implementation +function _gcp( + X::CuArray{TX,N}, + r, + loss::LeastSquaresLoss, + constraints::Tuple{}, + algorithm::GCPAlgorithms.ALS, +) where {TX<:Real,N} + T = promote_type(TX, Float64) + + # Random initialization + M0 = CPD(CUDA.ones(T, r), CUDA.rand.(T, size(X), r)) + M0norm = sqrt(sum(abs2, M0[I] for I in CartesianIndices(size(M0)))) + Xnorm = sqrt(mapreduce(x -> isnan(x) ? 0 : abs2(x), +, X)) for k in Base.OneTo(N) M0.U[k] .*= (Xnorm / M0norm)^(1 / N) end From fd71ca42db91b02a70f6a20fc570e3b83c5b4204 Mon Sep 17 00:00:00 2001 From: alexmul1114 <96952477+alexmul1114@users.noreply.github.com> Date: Wed, 10 Jan 2024 14:56:26 -0500 Subject: [PATCH 06/23] Add CUDA gcp as extension, use simple MTTKRP implementation and khatri rao function from #34 --- Project.toml | 3 +++ ext/CUDAExt.jl | 48 ++++++++++++++++++++++++++++++++++++++++ src/GCPDecompositions.jl | 1 + src/gcp-opt.jl | 47 ++++++++++----------------------------- 4 files changed, 64 insertions(+), 35 deletions(-) create mode 100644 ext/CUDAExt.jl diff --git a/Project.toml b/Project.toml index fef0d03..ae7db0e 100644 --- a/Project.toml +++ b/Project.toml @@ -11,12 +11,15 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LossFunctions = "30fc2ffe-d236-52d8-8643-a9d8f7c094a7" [weakdeps] +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" LossFunctions = "30fc2ffe-d236-52d8-8643-a9d8f7c094a7" [extensions] +CUDAExt = "CUDA" LossFunctionsExt = "LossFunctions" [compat] +CUDA = "5.0.0" ForwardDiff = "0.10.36" IntervalSets = "0.7.7" LBFGSB = "0.4.1" diff --git a/ext/CUDAExt.jl b/ext/CUDAExt.jl new file mode 100644 index 0000000..170c70f --- /dev/null +++ b/ext/CUDAExt.jl @@ -0,0 +1,48 @@ +module CUDAExt + +using GCPDecompositions, CUDA + +GCPDecompositions.gcp( + X::CuArray, + r, + loss = LeastSquaresLoss(); + constraints = (), + algorithm = GCPAlgorithms.ALS(), +) = _gcp(X, r, loss, constraints, algorithm) +function _gcp( + X::CuArray{TX,N}, + r, + loss::LeastSquaresLoss, + constraints::Tuple{}, + algorithm::GCPAlgorithms.ALS, +) where {TX<:Real,N} + T = promote_type(TX, Float32) + + # Random initialization + M0 = CPD(ones(T, r), rand.(T, size(X), r)) + #M0norm = sqrt(mapreduce(abs2, +, M0[I] for I in CartesianIndices(size(M0)))) + M0norm = sqrt(sum(abs2, M0[I] for I in CartesianIndices(size(M0)))) + Xnorm = sqrt(mapreduce(x -> isnan(x) ? 0 : abs2(x), +, X, init=0f0)) + for k in Base.OneTo(N) + M0.U[k] .*= (Xnorm / M0norm)^(1 / N) + end + λ, U = M0.λ, collect(M0.U) + + # Move λ, U to gpu + λ = CuArray(λ) + U = [CuArray(U[i]) for i in 1:N] + + # Inefficient but simple implementation + for _ in 1:algorithm.maxiters + for n in 1:N + V = reduce(.*, U[i]'U[i] for i in setdiff(1:N, n)) + U[n] = GCPDecompositions.mttkrp(X, U, n) / V + λ = CuArray(CUDA.norm.(eachcol(U[n]))) + U[n] = U[n] ./ permutedims(λ) + end + end + + return CPD(Array(λ), Tuple([Array(U[i]) for i in 1:N])) +end + +end \ No newline at end of file diff --git a/src/GCPDecompositions.jl b/src/GCPDecompositions.jl index 866dafd..02e34c5 100644 --- a/src/GCPDecompositions.jl +++ b/src/GCPDecompositions.jl @@ -39,6 +39,7 @@ include("gcp-opt.jl") if !isdefined(Base, :get_extension) include("../ext/LossFunctionsExt.jl") + include("../ext/CUDAExt.jl") end end diff --git a/src/gcp-opt.jl b/src/gcp-opt.jl index fa58653..b6f16e6 100644 --- a/src/gcp-opt.jl +++ b/src/gcp-opt.jl @@ -155,6 +155,7 @@ function _gcp( # Random initialization M0 = CPD(ones(T, r), rand.(T, size(X), r)) M0norm = sqrt(sum(abs2, M0[I] for I in CartesianIndices(size(M0)))) + #Xnorm = sqrt(mapreduce(x -> isnan(x) || ismissing(x) ? 0 : abs2(x), +, X, init=0f0)) Xnorm = sqrt(sum(abs2, skipmissing(X))) for k in Base.OneTo(N) M0.U[k] .*= (Xnorm / M0norm)^(1 / N) @@ -174,41 +175,9 @@ function _gcp( return CPD(λ, Tuple(U)) end -# GPU implementation -function _gcp( - X::CuArray{TX,N}, - r, - loss::LeastSquaresLoss, - constraints::Tuple{}, - algorithm::GCPAlgorithms.ALS, -) where {TX<:Real,N} - T = promote_type(TX, Float64) - - # Random initialization - M0 = CPD(CUDA.ones(T, r), CUDA.rand.(T, size(X), r)) - M0norm = sqrt(sum(abs2, M0[I] for I in CartesianIndices(size(M0)))) - Xnorm = sqrt(mapreduce(x -> isnan(x) ? 0 : abs2(x), +, X)) - for k in Base.OneTo(N) - M0.U[k] .*= (Xnorm / M0norm)^(1 / N) - end - λ, U = M0.λ, collect(M0.U) - - # Inefficient but simple implementation - for _ in 1:algorithm.maxiters - for n in 1:N - V = reduce(.*, U[i]'U[i] for i in setdiff(1:N, n)) - U[n] = mttkrp(X, U, n) / V - λ = norm.(eachcol(U[n])) - U[n] = U[n] ./ permutedims(λ) - end - end - - return CPD(λ, Tuple(U)) -end # inefficient but simple function mttkrp(X, U, n) - # Dimensions N, I, r = length(U), Tuple(size.(U, 1)), (only∘unique)(size.(U, 2)) (N == ndims(X) && I == size(X)) || throw(DimensionMismatch("`X` and `U` do not have matching dimensions")) @@ -217,10 +186,18 @@ function mttkrp(X, U, n) # Khatri-Rao product (in mode n) Zn = similar(U[1], prod(I[setdiff(1:N, n)]), r) - for j in 1:r - Zn[:, j] = reduce(kron, [view(U[i], :, j) for i in reverse(setdiff(1:N, n))]) - end + Zn = khatrirao(U[reverse(setdiff(1:N, n))]...) # MTTKRP (in mode n) return Xn * Zn end + +function khatrirao(A::Vararg{T,N}) where {T<:AbstractMatrix,N} + r = size(A[1],2) + # @boundscheck all(==(r),size.(A,2)) || throw(DimensionMismatch()) + R = ntuple(Val(N)) do k + dims = (ntuple(i->1,Val(N-k))..., :, ntuple(i->1,Val(k-1))..., r) + return reshape(A[k],dims) + end + return reshape(broadcast(*, R...),:,r) +end From e5b30ea1df745599d597b512e50630dd396e9054 Mon Sep 17 00:00:00 2001 From: alexmul1114 <96952477+alexmul1114@users.noreply.github.com> Date: Wed, 10 Jan 2024 15:09:11 -0500 Subject: [PATCH 07/23] Add CUDA to deps for backward compatiblity --- Project.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/Project.toml b/Project.toml index ae7db0e..456a9af 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,7 @@ authors = ["David Hong and contributors"] version = "0.1.2" [deps] +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953" LBFGSB = "5be7bae1-8223-5378-bac3-9e7378a2f6e6" From c34cc3c45b898caa04b1e9dc5062b709059ba331 Mon Sep 17 00:00:00 2001 From: alexmul1114 <96952477+alexmul1114@users.noreply.github.com> Date: Wed, 10 Jan 2024 15:10:57 -0500 Subject: [PATCH 08/23] Move CUDA to extras --- Project.toml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 456a9af..e20fc59 100644 --- a/Project.toml +++ b/Project.toml @@ -4,7 +4,6 @@ authors = ["David Hong and contributors"] version = "0.1.2" [deps] -CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953" LBFGSB = "5be7bae1-8223-5378-bac3-9e7378a2f6e6" @@ -19,6 +18,9 @@ LossFunctions = "30fc2ffe-d236-52d8-8643-a9d8f7c094a7" CUDAExt = "CUDA" LossFunctionsExt = "LossFunctions" +[extras] +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" + [compat] CUDA = "5.0.0" ForwardDiff = "0.10.36" From 966e30b8db9f04083f73b64e0a6be7fe041ca0e7 Mon Sep 17 00:00:00 2001 From: alexmul1114 <96952477+alexmul1114@users.noreply.github.com> Date: Wed, 10 Jan 2024 15:18:35 -0500 Subject: [PATCH 09/23] Change CUDA version compatibility --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index e20fc59..04aabd2 100644 --- a/Project.toml +++ b/Project.toml @@ -22,7 +22,7 @@ LossFunctionsExt = "LossFunctions" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" [compat] -CUDA = "5.0.0" +CUDA = ">= 4.4.1" ForwardDiff = "0.10.36" IntervalSets = "0.7.7" LBFGSB = "0.4.1" From 9938c46584a23d46a2369daa38cdd4fd14107680 Mon Sep 17 00:00:00 2001 From: alexmul1114 <96952477+alexmul1114@users.noreply.github.com> Date: Wed, 10 Jan 2024 15:38:09 -0500 Subject: [PATCH 10/23] Modify project.toml --- Project.toml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Project.toml b/Project.toml index 04aabd2..a0e754c 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,7 @@ authors = ["David Hong and contributors"] version = "0.1.2" [deps] +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953" LBFGSB = "5be7bae1-8223-5378-bac3-9e7378a2f6e6" @@ -18,9 +19,6 @@ LossFunctions = "30fc2ffe-d236-52d8-8643-a9d8f7c094a7" CUDAExt = "CUDA" LossFunctionsExt = "LossFunctions" -[extras] -CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" - [compat] CUDA = ">= 4.4.1" ForwardDiff = "0.10.36" @@ -29,3 +27,6 @@ LBFGSB = "0.4.1" LinearAlgebra = "1.6" LossFunctions = "0.11.1" julia = "1.6" + +[extras] +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" \ No newline at end of file From e8077a3c5d18e7ced9190b0a4f80b19c6cec939f Mon Sep 17 00:00:00 2001 From: alexmul1114 <96952477+alexmul1114@users.noreply.github.com> Date: Thu, 11 Jan 2024 09:57:09 -0500 Subject: [PATCH 11/23] Test with new MTTKRP from #35 --- src/gcp-opt.jl | 45 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/src/gcp-opt.jl b/src/gcp-opt.jl index b6f16e6..5d1e3dc 100644 --- a/src/gcp-opt.jl +++ b/src/gcp-opt.jl @@ -175,9 +175,16 @@ function _gcp( return CPD(λ, Tuple(U)) end - -# inefficient but simple +""" + mttkrp(X, U, n) -> Rn + + Algorithm for computing one mode of MTTKRP is from "Fast Alternating LS Algorithms + for High Order CANDECOMP/PARAFAC Tensor Factorizations" by Phan et al., specifically + section III-B. +""" function mttkrp(X, U, n) + + # Dimensions N, I, r = length(U), Tuple(size.(U, 1)), (only∘unique)(size.(U, 2)) (N == ndims(X) && I == size(X)) || throw(DimensionMismatch("`X` and `U` do not have matching dimensions")) @@ -186,15 +193,39 @@ function mttkrp(X, U, n) # Khatri-Rao product (in mode n) Zn = similar(U[1], prod(I[setdiff(1:N, n)]), r) - Zn = khatrirao(U[reverse(setdiff(1:N, n))]...) - - # MTTKRP (in mode n) - return Xn * Zn + for j in 1:r + Zn[:, j] = reduce(kron, [view(U[i], :, j) for i in reverse(setdiff(1:N, n))]) + # See section III-B from "Fast Alternating LS Algorithms for High Order CANDECOMP/PARAFAC Tensor Factorizations" by Phan et al. + Rn = similar(U[n]) + Jn = prod(size(X)[1:n]) + Kn = prod(size(X)[n+1:end]) + # Compute tensor-vector products right to left (equations 15, 17) for each rank + + # Special cases are n = 1 and n = N (n = 1 has no outer tensor-vector products), + # n = N has no inner tensor-vector products + if n == 1 + # Just inner tensor-vector products + kr_inner = khatrirao(U[reverse(2:N)]...) + mul!(Rn, reshape(X, size(X, 1), :), kr_inner) + elseif n == N + # Just outer tensor-vector products + kr_outer = khatrirao(U[reverse(1:N-1)]...) + mul!(Rn, transpose(reshape(X, prod(size(X)[1:N-1]), size(X)[N])), kr_outer) + else + kr_inner = khatrirao(U[reverse(n+1:N)]...) + kr_outer = khatrirao(U[reverse(1:n-1)]...) + inner = reshape(reshape(X, Jn, Kn) * kr_inner, (size(X)[1:n]..., r)) + Jn_inner = prod(size(inner)[1:n-1]) + Kn_inner = prod(size(inner)[n:end-1]) + for j in 1:r + Rn[:, j] .= transpose(reshape(selectdim(inner, ndims(inner), j), Jn_inner, Kn_inner)) * kr_outer[:, j] + end + end + return Rn end function khatrirao(A::Vararg{T,N}) where {T<:AbstractMatrix,N} r = size(A[1],2) - # @boundscheck all(==(r),size.(A,2)) || throw(DimensionMismatch()) R = ntuple(Val(N)) do k dims = (ntuple(i->1,Val(N-k))..., :, ntuple(i->1,Val(k-1))..., r) return reshape(A[k],dims) From fb1d0ceac6fe6e2b9e693b242cc90f5fb7ea5227 Mon Sep 17 00:00:00 2001 From: alexmul1114 <96952477+alexmul1114@users.noreply.github.com> Date: Thu, 11 Jan 2024 09:57:33 -0500 Subject: [PATCH 12/23] Change .= back to = --- src/gcp-opt.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gcp-opt.jl b/src/gcp-opt.jl index 5d1e3dc..47425a1 100644 --- a/src/gcp-opt.jl +++ b/src/gcp-opt.jl @@ -218,7 +218,7 @@ function mttkrp(X, U, n) Jn_inner = prod(size(inner)[1:n-1]) Kn_inner = prod(size(inner)[n:end-1]) for j in 1:r - Rn[:, j] .= transpose(reshape(selectdim(inner, ndims(inner), j), Jn_inner, Kn_inner)) * kr_outer[:, j] + Rn[:, j] = transpose(reshape(selectdim(inner, ndims(inner), j), Jn_inner, Kn_inner)) * kr_outer[:, j] end end return Rn From e368433d19070f3d605eca6fcc5bf4b220838885 Mon Sep 17 00:00:00 2001 From: alexmul1114 <96952477+alexmul1114@users.noreply.github.com> Date: Thu, 11 Jan 2024 10:01:56 -0500 Subject: [PATCH 13/23] Fix typos --- src/gcp-opt.jl | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/gcp-opt.jl b/src/gcp-opt.jl index 47425a1..92bbf37 100644 --- a/src/gcp-opt.jl +++ b/src/gcp-opt.jl @@ -188,19 +188,11 @@ function mttkrp(X, U, n) N, I, r = length(U), Tuple(size.(U, 1)), (only∘unique)(size.(U, 2)) (N == ndims(X) && I == size(X)) || throw(DimensionMismatch("`X` and `U` do not have matching dimensions")) - # Matricized tensor (in mode n) - Xn = reshape(permutedims(X, [n; setdiff(1:N, n)]), size(X, n), :) - - # Khatri-Rao product (in mode n) - Zn = similar(U[1], prod(I[setdiff(1:N, n)]), r) - for j in 1:r - Zn[:, j] = reduce(kron, [view(U[i], :, j) for i in reverse(setdiff(1:N, n))]) # See section III-B from "Fast Alternating LS Algorithms for High Order CANDECOMP/PARAFAC Tensor Factorizations" by Phan et al. Rn = similar(U[n]) Jn = prod(size(X)[1:n]) Kn = prod(size(X)[n+1:end]) - # Compute tensor-vector products right to left (equations 15, 17) for each rank - + # Special cases are n = 1 and n = N (n = 1 has no outer tensor-vector products), # n = N has no inner tensor-vector products if n == 1 From 614929d7c3d9e5a9c1ae6f248ecba6e9eda715af Mon Sep 17 00:00:00 2001 From: alexmul1114 <96952477+alexmul1114@users.noreply.github.com> Date: Thu, 11 Jan 2024 10:27:53 -0500 Subject: [PATCH 14/23] Use latest MTTKRP from #35 --- src/gcp-opt.jl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/gcp-opt.jl b/src/gcp-opt.jl index 92bbf37..3315151 100644 --- a/src/gcp-opt.jl +++ b/src/gcp-opt.jl @@ -209,9 +209,7 @@ function mttkrp(X, U, n) inner = reshape(reshape(X, Jn, Kn) * kr_inner, (size(X)[1:n]..., r)) Jn_inner = prod(size(inner)[1:n-1]) Kn_inner = prod(size(inner)[n:end-1]) - for j in 1:r - Rn[:, j] = transpose(reshape(selectdim(inner, ndims(inner), j), Jn_inner, Kn_inner)) * kr_outer[:, j] - end + Rn = reduce(hcat, [transpose(reshape(selectdim(inner, ndims(inner), j), Jn_inner, Kn_inner)) * kr_outer[:, j] for j in 1:r]) end return Rn end From 7854ec8af2e07b06fd977e4101949a628b02d1d6 Mon Sep 17 00:00:00 2001 From: alexmul1114 <96952477+alexmul1114@users.noreply.github.com> Date: Thu, 11 Jan 2024 10:33:49 -0500 Subject: [PATCH 15/23] collect selectdim for now --- src/gcp-opt.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gcp-opt.jl b/src/gcp-opt.jl index 3315151..8f5e282 100644 --- a/src/gcp-opt.jl +++ b/src/gcp-opt.jl @@ -209,7 +209,7 @@ function mttkrp(X, U, n) inner = reshape(reshape(X, Jn, Kn) * kr_inner, (size(X)[1:n]..., r)) Jn_inner = prod(size(inner)[1:n-1]) Kn_inner = prod(size(inner)[n:end-1]) - Rn = reduce(hcat, [transpose(reshape(selectdim(inner, ndims(inner), j), Jn_inner, Kn_inner)) * kr_outer[:, j] for j in 1:r]) + Rn = reduce(hcat, [transpose(reshape(collect(selectdim(inner, ndims(inner), j)), Jn_inner, Kn_inner)) * kr_outer[:, j] for j in 1:r]) end return Rn end From df803e147799f3213f989d033373c44d9e93b0cb Mon Sep 17 00:00:00 2001 From: alexmul1114 <96952477+alexmul1114@users.noreply.github.com> Date: Thu, 11 Jan 2024 11:11:43 -0500 Subject: [PATCH 16/23] Try reusing inner --- src/gcp-opt.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/gcp-opt.jl b/src/gcp-opt.jl index 8f5e282..d2589ce 100644 --- a/src/gcp-opt.jl +++ b/src/gcp-opt.jl @@ -189,7 +189,6 @@ function mttkrp(X, U, n) (N == ndims(X) && I == size(X)) || throw(DimensionMismatch("`X` and `U` do not have matching dimensions")) # See section III-B from "Fast Alternating LS Algorithms for High Order CANDECOMP/PARAFAC Tensor Factorizations" by Phan et al. - Rn = similar(U[n]) Jn = prod(size(X)[1:n]) Kn = prod(size(X)[n+1:end]) @@ -197,10 +196,12 @@ function mttkrp(X, U, n) # n = N has no inner tensor-vector products if n == 1 # Just inner tensor-vector products + Rn = similar(U[n]) kr_inner = khatrirao(U[reverse(2:N)]...) mul!(Rn, reshape(X, size(X, 1), :), kr_inner) elseif n == N # Just outer tensor-vector products + Rn = similar(U[n]) kr_outer = khatrirao(U[reverse(1:N-1)]...) mul!(Rn, transpose(reshape(X, prod(size(X)[1:N-1]), size(X)[N])), kr_outer) else @@ -209,7 +210,8 @@ function mttkrp(X, U, n) inner = reshape(reshape(X, Jn, Kn) * kr_inner, (size(X)[1:n]..., r)) Jn_inner = prod(size(inner)[1:n-1]) Kn_inner = prod(size(inner)[n:end-1]) - Rn = reduce(hcat, [transpose(reshape(collect(selectdim(inner, ndims(inner), j)), Jn_inner, Kn_inner)) * kr_outer[:, j] for j in 1:r]) + inner = reduce(hcat, [transpose(reshape(selectdim(inner, ndims(inner), j), Jn_inner, Kn_inner)) * kr_outer[:, j] for j in 1:r]) + return inner end return Rn end From 2a7f0c2332e4e5e45017a2f88a1e2add0212c6ed Mon Sep 17 00:00:00 2001 From: alexmul1114 <96952477+alexmul1114@users.noreply.github.com> Date: Thu, 11 Jan 2024 15:45:44 -0500 Subject: [PATCH 17/23] Use copy in MTTKRP middle mode Rn computation for now --- src/gcp-opt.jl | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/gcp-opt.jl b/src/gcp-opt.jl index d2589ce..dcf25f9 100644 --- a/src/gcp-opt.jl +++ b/src/gcp-opt.jl @@ -155,7 +155,6 @@ function _gcp( # Random initialization M0 = CPD(ones(T, r), rand.(T, size(X), r)) M0norm = sqrt(sum(abs2, M0[I] for I in CartesianIndices(size(M0)))) - #Xnorm = sqrt(mapreduce(x -> isnan(x) || ismissing(x) ? 0 : abs2(x), +, X, init=0f0)) Xnorm = sqrt(sum(abs2, skipmissing(X))) for k in Base.OneTo(N) M0.U[k] .*= (Xnorm / M0norm)^(1 / N) @@ -189,19 +188,18 @@ function mttkrp(X, U, n) (N == ndims(X) && I == size(X)) || throw(DimensionMismatch("`X` and `U` do not have matching dimensions")) # See section III-B from "Fast Alternating LS Algorithms for High Order CANDECOMP/PARAFAC Tensor Factorizations" by Phan et al. + Rn = similar(U[n]) Jn = prod(size(X)[1:n]) Kn = prod(size(X)[n+1:end]) # Special cases are n = 1 and n = N (n = 1 has no outer tensor-vector products), # n = N has no inner tensor-vector products if n == 1 - # Just inner tensor-vector products - Rn = similar(U[n]) + # Just inner tensor-vector products kr_inner = khatrirao(U[reverse(2:N)]...) mul!(Rn, reshape(X, size(X, 1), :), kr_inner) elseif n == N # Just outer tensor-vector products - Rn = similar(U[n]) kr_outer = khatrirao(U[reverse(1:N-1)]...) mul!(Rn, transpose(reshape(X, prod(size(X)[1:N-1]), size(X)[N])), kr_outer) else @@ -210,8 +208,7 @@ function mttkrp(X, U, n) inner = reshape(reshape(X, Jn, Kn) * kr_inner, (size(X)[1:n]..., r)) Jn_inner = prod(size(inner)[1:n-1]) Kn_inner = prod(size(inner)[n:end-1]) - inner = reduce(hcat, [transpose(reshape(selectdim(inner, ndims(inner), j), Jn_inner, Kn_inner)) * kr_outer[:, j] for j in 1:r]) - return inner + Rn = reduce(hcat, [copy(transpose(reshape(selectdim(inner, ndims(inner), j), Jn_inner, Kn_inner))) * kr_outer[:, j] for j in 1:r]) end return Rn end From b72f6e981d025eb50db9fec4645047e19a583fe4 Mon Sep 17 00:00:00 2001 From: alexmul1114 <96952477+alexmul1114@users.noreply.github.com> Date: Fri, 12 Jan 2024 10:23:43 -0500 Subject: [PATCH 18/23] Change abstract arrays back to arrays --- ext/CUDAExt.jl | 3 +-- src/gcp-opt.jl | 8 ++++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/ext/CUDAExt.jl b/ext/CUDAExt.jl index 170c70f..0d91761 100644 --- a/ext/CUDAExt.jl +++ b/ext/CUDAExt.jl @@ -19,8 +19,7 @@ function _gcp( T = promote_type(TX, Float32) # Random initialization - M0 = CPD(ones(T, r), rand.(T, size(X), r)) - #M0norm = sqrt(mapreduce(abs2, +, M0[I] for I in CartesianIndices(size(M0)))) + M0 = CPD(ones(T, r), rand.(T, size(X), r))X_gpu M0norm = sqrt(sum(abs2, M0[I] for I in CartesianIndices(size(M0)))) Xnorm = sqrt(mapreduce(x -> isnan(x) ? 0 : abs2(x), +, X, init=0f0)) for k in Base.OneTo(N) diff --git a/src/gcp-opt.jl b/src/gcp-opt.jl index dcf25f9..c72a359 100644 --- a/src/gcp-opt.jl +++ b/src/gcp-opt.jl @@ -2,7 +2,7 @@ # Main fitting function """ - gcp(X::AbstractArray, r, loss = LeastSquaresLoss(); + gcp(X::Array, r, loss = LeastSquaresLoss(); constraints = default_constraints(loss), algorithm = default_algorithm(X, r, loss, constraints)) -> CPD @@ -21,7 +21,7 @@ to see what losses are supported. See also: `CPD`, `AbstractLoss`. """ gcp( - X::AbstractArray, + X::Array, r, loss = LeastSquaresLoss(); constraints = default_constraints(loss), @@ -43,7 +43,7 @@ function default_constraints(loss) end # Choose default algorithm -default_algorithm(X::AbstractArray{<:Real}, r, loss::LeastSquaresLoss, constraints::Tuple{}) = +default_algorithm(X::Array{<:Real}, r, loss::LeastSquaresLoss, constraints::Tuple{}) = GCPAlgorithms.ALS() default_algorithm(X, r, loss, constraints) = GCPAlgorithms.LBFGSB() @@ -213,7 +213,7 @@ function mttkrp(X, U, n) return Rn end -function khatrirao(A::Vararg{T,N}) where {T<:AbstractMatrix,N} +function khatrirao(A::Vararg{T,N}) where {T<:Matrix,N} r = size(A[1],2) R = ntuple(Val(N)) do k dims = (ntuple(i->1,Val(N-k))..., :, ntuple(i->1,Val(k-1))..., r) From 12f0004095a20f50124cd67aa5f4857a3a5ffec0 Mon Sep 17 00:00:00 2001 From: alexmul1114 <96952477+alexmul1114@users.noreply.github.com> Date: Fri, 12 Jan 2024 12:32:28 -0500 Subject: [PATCH 19/23] Remove typo --- ext/CUDAExt.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/CUDAExt.jl b/ext/CUDAExt.jl index 0d91761..0dc143b 100644 --- a/ext/CUDAExt.jl +++ b/ext/CUDAExt.jl @@ -19,7 +19,7 @@ function _gcp( T = promote_type(TX, Float32) # Random initialization - M0 = CPD(ones(T, r), rand.(T, size(X), r))X_gpu + M0 = CPD(ones(T, r), rand.(T, size(X), r)) M0norm = sqrt(sum(abs2, M0[I] for I in CartesianIndices(size(M0)))) Xnorm = sqrt(mapreduce(x -> isnan(x) ? 0 : abs2(x), +, X, init=0f0)) for k in Base.OneTo(N) From e9680f6d3f120429e0c30da71dd2c7c8cd5c34d6 Mon Sep 17 00:00:00 2001 From: alexmul1114 <96952477+alexmul1114@users.noreply.github.com> Date: Fri, 12 Jan 2024 12:34:43 -0500 Subject: [PATCH 20/23] Switch khatrirao to accept abstractmatrix --- src/gcp-opt.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gcp-opt.jl b/src/gcp-opt.jl index c72a359..c82da6b 100644 --- a/src/gcp-opt.jl +++ b/src/gcp-opt.jl @@ -213,7 +213,7 @@ function mttkrp(X, U, n) return Rn end -function khatrirao(A::Vararg{T,N}) where {T<:Matrix,N} +function khatrirao(A::Vararg{T,N}) where {T<:AbstactMatrix,N} r = size(A[1],2) R = ntuple(Val(N)) do k dims = (ntuple(i->1,Val(N-k))..., :, ntuple(i->1,Val(k-1))..., r) From ebc545540f9a95ee0289ec0ba22e36949106263d Mon Sep 17 00:00:00 2001 From: alexmul1114 <96952477+alexmul1114@users.noreply.github.com> Date: Fri, 12 Jan 2024 12:36:59 -0500 Subject: [PATCH 21/23] Fix typo --- src/gcp-opt.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gcp-opt.jl b/src/gcp-opt.jl index c82da6b..f93dd89 100644 --- a/src/gcp-opt.jl +++ b/src/gcp-opt.jl @@ -213,7 +213,7 @@ function mttkrp(X, U, n) return Rn end -function khatrirao(A::Vararg{T,N}) where {T<:AbstactMatrix,N} +function khatrirao(A::Vararg{T,N}) where {T<:AbstractMatrix,N} r = size(A[1],2) R = ntuple(Val(N)) do k dims = (ntuple(i->1,Val(N-k))..., :, ntuple(i->1,Val(k-1))..., r) From ab40583cf9a98d5f5dd23182f8aeba8d4ebceca9 Mon Sep 17 00:00:00 2001 From: alexmul1114 <96952477+alexmul1114@users.noreply.github.com> Date: Mon, 15 Jan 2024 13:01:15 -0500 Subject: [PATCH 22/23] Temporarily use Float32 for comparison against GPU --- src/gcp-opt.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gcp-opt.jl b/src/gcp-opt.jl index f93dd89..1e9ad20 100644 --- a/src/gcp-opt.jl +++ b/src/gcp-opt.jl @@ -150,8 +150,8 @@ function _gcp( constraints::Tuple{}, algorithm::GCPAlgorithms.ALS, ) where {TX<:Real,N} - T = promote_type(TX, Float64) - + #T = promote_type(TX, Float64) + T = promote_type(TX, Float32) # Random initialization M0 = CPD(ones(T, r), rand.(T, size(X), r)) M0norm = sqrt(sum(abs2, M0[I] for I in CartesianIndices(size(M0)))) From f4696c0a96f7a2e6005e5e3962712026b234c298 Mon Sep 17 00:00:00 2001 From: alexmul1114 <96952477+alexmul1114@users.noreply.github.com> Date: Thu, 18 Jan 2024 15:06:06 -0500 Subject: [PATCH 23/23] Replace norm function --- ext/CUDAExt.jl | 2 +- src/gcp-opt.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/CUDAExt.jl b/ext/CUDAExt.jl index 0dc143b..2e7e99d 100644 --- a/ext/CUDAExt.jl +++ b/ext/CUDAExt.jl @@ -36,7 +36,7 @@ function _gcp( for n in 1:N V = reduce(.*, U[i]'U[i] for i in setdiff(1:N, n)) U[n] = GCPDecompositions.mttkrp(X, U, n) / V - λ = CuArray(CUDA.norm.(eachcol(U[n]))) + λ = vec(sqrt.(sum(abs2, U[n]; dims=1))) U[n] = U[n] ./ permutedims(λ) end end diff --git a/src/gcp-opt.jl b/src/gcp-opt.jl index 1e9ad20..5ac5933 100644 --- a/src/gcp-opt.jl +++ b/src/gcp-opt.jl @@ -166,7 +166,7 @@ function _gcp( for n in 1:N V = reduce(.*, U[i]'U[i] for i in setdiff(1:N, n)) U[n] = mttkrp(X, U, n) / V - λ = norm.(eachcol(U[n])) + λ = vec(sqrt.(sum(abs2, U[n]; dims=1))) U[n] = U[n] ./ permutedims(λ) end end