diff --git a/Project.toml b/Project.toml index 8a69dce..4ad2d0b 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,7 @@ authors = ["David Hong and contributors"] version = "0.1.2" [deps] +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" Compat = "34da2185-b29b-5c13-b0c7-acf172513d20" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953" @@ -13,12 +14,15 @@ LossFunctions = "30fc2ffe-d236-52d8-8643-a9d8f7c094a7" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" [weakdeps] +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" LossFunctions = "30fc2ffe-d236-52d8-8643-a9d8f7c094a7" [extensions] +CUDAExt = "CUDA" LossFunctionsExt = "LossFunctions" [compat] +CUDA = ">= 4.4.1" Compat = "3.42, 4" ForwardDiff = "0.10.36" IntervalSets = "0.7.7" @@ -27,3 +31,6 @@ LinearAlgebra = "1.6" LossFunctions = "0.11.1" Random = "1.6" julia = "1.6" + +[extras] +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" \ No newline at end of file diff --git a/ext/CUDAExt.jl b/ext/CUDAExt.jl new file mode 100644 index 0000000..2e7e99d --- /dev/null +++ b/ext/CUDAExt.jl @@ -0,0 +1,47 @@ +module CUDAExt + +using GCPDecompositions, CUDA + +GCPDecompositions.gcp( + X::CuArray, + r, + loss = LeastSquaresLoss(); + constraints = (), + algorithm = GCPAlgorithms.ALS(), +) = _gcp(X, r, loss, constraints, algorithm) +function _gcp( + X::CuArray{TX,N}, + r, + loss::LeastSquaresLoss, + constraints::Tuple{}, + algorithm::GCPAlgorithms.ALS, +) where {TX<:Real,N} + T = promote_type(TX, Float32) + + # Random initialization + M0 = CPD(ones(T, r), rand.(T, size(X), r)) + M0norm = sqrt(sum(abs2, M0[I] for I in CartesianIndices(size(M0)))) + Xnorm = sqrt(mapreduce(x -> isnan(x) ? 0 : abs2(x), +, X, init=0f0)) + for k in Base.OneTo(N) + M0.U[k] .*= (Xnorm / M0norm)^(1 / N) + end + λ, U = M0.λ, collect(M0.U) + + # Move λ, U to gpu + λ = CuArray(λ) + U = [CuArray(U[i]) for i in 1:N] + + # Inefficient but simple implementation + for _ in 1:algorithm.maxiters + for n in 1:N + V = reduce(.*, U[i]'U[i] for i in setdiff(1:N, n)) + U[n] = GCPDecompositions.mttkrp(X, U, n) / V + λ = vec(sqrt.(sum(abs2, U[n]; dims=1))) + U[n] = U[n] ./ permutedims(λ) + end + end + + return CPD(Array(λ), Tuple([Array(U[i]) for i in 1:N])) +end + +end \ No newline at end of file diff --git a/src/GCPDecompositions.jl b/src/GCPDecompositions.jl index 05c06b7..876c957 100644 --- a/src/GCPDecompositions.jl +++ b/src/GCPDecompositions.jl @@ -24,6 +24,7 @@ include("gcp-algorithms.jl") if !isdefined(Base, :get_extension) include("../ext/LossFunctionsExt.jl") + include("../ext/CUDAExt.jl") end # Main fitting function @@ -113,7 +114,8 @@ default_init(X, r, loss, constraints, algorithm) = function default_init(rng, X, r, loss, constraints, algorithm) # Generate CPD with random factors T, N = nonmissingtype(eltype(X)), ndims(X) - T = promote_type(T, Float64) + #T = promote_type(T, Float64) + T = promote_type(T, Float32) M = CPD(ones(T, r), rand.(rng, T, size(X), r)) # Normalize diff --git a/src/gcp-algorithms/als.jl b/src/gcp-algorithms/als.jl index 082e0c1..9b56604 100644 --- a/src/gcp-algorithms/als.jl +++ b/src/gcp-algorithms/als.jl @@ -34,7 +34,7 @@ function _gcp( V = reduce(.*, M.U[i]'M.U[i] for i in setdiff(1:N, n)) mttkrp!(M.U[n], X, M.U, n, mttkrp_buffers[n]) rdiv!(M.U[n], lu!(V)) - M.λ .= norm.(eachcol(M.U[n])) + M.λ .= vec(sqrt.(sum(abs2, U[n]; dims=1))) M.U[n] ./= permutedims(M.λ) end end