diff --git a/NDTensors/src/blocksparse/linearalgebra.jl b/NDTensors/src/blocksparse/linearalgebra.jl index 2835d262c7..c1a34598a8 100644 --- a/NDTensors/src/blocksparse/linearalgebra.jl +++ b/NDTensors/src/blocksparse/linearalgebra.jl @@ -75,7 +75,7 @@ function LinearAlgebra.svd(T::BlockSparseMatrix{ElT}; kwargs...) where {ElT} dropblocks = Int[] if truncate - truncerr, docut = truncate!(d; kwargs...) + truncerr, docut = truncate!!(d; kwargs...) for n in 1:nnzblocks(T) blockdim = _truncated_blockdim( Ss[n], docut; min_blockdim, singular_values=true, truncate @@ -237,7 +237,7 @@ function LinearAlgebra.eigen( sort!(d; rev=true, by=abs) if truncate - truncerr, docut = truncate!(d; kwargs...) + truncerr, docut = truncate!!(d; kwargs...) for n in 1:nnzblocks(T) blockdim = _truncated_blockdim(Ds[n], docut) if blockdim == 0 diff --git a/NDTensors/src/dense/dense.jl b/NDTensors/src/dense/dense.jl index 96afd695b5..8ca6fad263 100644 --- a/NDTensors/src/dense/dense.jl +++ b/NDTensors/src/dense/dense.jl @@ -101,7 +101,16 @@ Dense(::Type{ElT}) where {ElT} = Dense{ElT}() setdata(D::Dense, ndata) = Dense(ndata) setdata(storagetype::Type{<:Dense}, data) = Dense(data) -copy(D::Dense) = Dense(copy(data(D))) +## There is an GPU arrays which are ReshapedArray can +## fail when trying to copy (fail meaning they call get_index which is slow) so this forces a fix. +## TODO make a better implementation +function copy(D::Dense) + d = data(D) + if d isa Base.ReshapedArray + return Dense(copy(parent(d))) + end + return Dense(copy(data(D))) +end function Base.real(T::Type{<:Dense}) return set_datatype(T, similartype(datatype(T), real(eltype(T)))) diff --git a/NDTensors/src/linearalgebra/svd.jl b/NDTensors/src/linearalgebra/svd.jl index 6b9a09ae83..d931076bd6 100644 --- a/NDTensors/src/linearalgebra/svd.jl +++ b/NDTensors/src/linearalgebra/svd.jl @@ -1,11 +1,15 @@ +## TODO here it looks at the elements of S so convert to CPU when on GPU +## Could write this as a GPU impl which just converts S to array. S +## is not used again so we don't need to convert back to GPU. function checkSVDDone(S::AbstractArray, thresh::Float64) N = length(S) (N <= 1 || thresh < 0.0) && return (true, 1) - S1t = S[1] * thresh + Scpu = NDTensors.cpu(S) + S1t = Scpu[1] * thresh start = 2 while start <= N - (S[start] < S1t) && break + (Scpu[start] < S1t) && break start += 1 end if start >= N @@ -32,9 +36,8 @@ function svd_recursive(M::AbstractMatrix; thresh::Float64=1E-3, north_pass::Int= V = M' * U V, R = qr_positive(V) - for n in 1:Nd - D[n] = R[n, n] - end + n = size(R)[1] + D = diag(R, (n - Nd)) (done, start) = checkSVDDone(D, thresh) diff --git a/NDTensors/src/truncate.jl b/NDTensors/src/truncate.jl index 3acc2f2099..b1c8947b21 100644 --- a/NDTensors/src/truncate.jl +++ b/NDTensors/src/truncate.jl @@ -1,4 +1,23 @@ -export truncate! +export truncate!!, truncate! + +## TODO Here truncate does logical operations of the values in P +## So its more efficient to just make it a CPU vector and +## convert back to GPU + +function truncate!!(P::AbstractVector; kwargs...) + return truncate!(leaf_parenttype(P), P; kwargs...) +end + +function truncate!(::Type{<:Array}, P; kwargs...) + return truncate!(P; kwargs...) +end + +function truncate!(::Type{<:AbstractArray}, P; kwargs...) + P_cpu = cpu(P) + values = truncate!(P_cpu; kwargs...) + P = adapt(leaf_parenttype(P), P_cpu) + return values +end function truncate!(P::AbstractVector{ElT}; kwargs...)::Tuple{ElT,ElT} where {ElT} cutoff::Union{Nothing,ElT} = get(kwargs, :cutoff, zero(ElT))