Skip to content

Commit

Permalink
[NDTensors] Fix nonuniform Diag-Dense contractions on GPU (ITensor#1511)
Browse files Browse the repository at this point in the history
* [NDTensorscuTENSORExt] Better type promotion in cuTENSOR contraction

* [NDTensorsGPUArraysCoreExt] Fix nonuniform Diag-Dense contractions on GPU

* [NDTensors] Fix dot on GPU

* [NDTensors] Fix Diag-Diag contraction on GPU

* [NDTensors] Add GPU-friendly mapreduce

* [NDTensors] Fix mapping by function that doesn't preserve zeros

* [NDTensors] Bump to v0.3.35

* [ITensors] Define sum and prod for ITensor

* [ITensors] Use NDTensors.map_diag in ITensors

* [ITensors] Bump to v0.6.16
  • Loading branch information
mtfishman authored Jun 24, 2024
1 parent 7d5ecf9 commit d3afdb7
Show file tree
Hide file tree
Showing 20 changed files with 336 additions and 83 deletions.
2 changes: 1 addition & 1 deletion NDTensors/Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "NDTensors"
uuid = "23ae76d9-e61a-49c4-8f12-3f1a16adf9cf"
authors = ["Matthew Fishman <[email protected]>"]
version = "0.3.34"
version = "0.3.35"

[deps]
Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697"
Expand Down
61 changes: 47 additions & 14 deletions NDTensors/ext/NDTensorsGPUArraysCoreExt/contract.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,42 @@ using NDTensors: NDTensors, DenseTensor, DiagTensor, contract!, dense, inds, Ten
using NDTensors.Expose: Exposed, expose, unexpose
using NDTensors.TypeParameterAccessors: parenttype, set_ndims

function NDTensors.contract!(
output_tensor::Exposed{<:AbstractGPUArray,<:DenseTensor},
labelsoutput_tensor,
tensor1::Exposed{<:AbstractGPUArray,<:DiagTensor},
labelstensor1,
tensor2::Exposed{<:AbstractGPUArray,<:DenseTensor},
labelstensor2,
α::Number=one(Bool),
β::Number=zero(Bool);
convert_to_dense::Bool=true,
)
# Convert tensor1 to dense.
# TODO: Define `Exposed` overload for `dense`.
tensor1 = expose(dense(unexpose(tensor1)))
contract!(
output_tensor, labelsoutput_tensor, tensor1, labelstensor1, tensor2, labelstensor2, α, β
)
return output_tensor
end

function NDTensors.contract!(
output_tensor::Exposed{<:AbstractGPUArray,<:DenseTensor},
labelsoutput_tensor,
tensor1::Exposed{<:AbstractGPUArray,<:DenseTensor},
labelstensor1,
tensor2::Exposed{<:AbstractGPUArray,<:DiagTensor},
labelstensor2,
α::Number=one(Bool),
β::Number=zero(Bool),
)
contract!(
output_tensor, labelsoutput_tensor, tensor2, labelstensor2, tensor1, labelstensor1, α, β
)
return output_tensor
end

## In this function we convert the DiagTensor to a dense tensor and
## Feed it back into contract
function NDTensors.contract!(
Expand All @@ -16,20 +52,16 @@ function NDTensors.contract!(
α::Number=one(Bool),
β::Number=zero(Bool),
)
tensor1 = unexpose(tensor1)
## convert tensor1 to a dense
## TODO this allocates on CPU first then moves over to GPU which could be slow
tensor1 = adapt(set_ndims(parenttype(typeof(tensor2)), 1), dense(tensor1))
return contract!(
output_tensor,
labelsoutput_tensor,
expose(tensor1),
labelstensor1,
tensor2,
labelstensor2,
α,
β,
# Convert tensor1 to dense.
# TODO: Define `Exposed` overload for `dense`.
# TODO: This allocates on CPU first then moves over to GPU which could be optimized.
tensor1 = expose(
adapt(set_ndims(parenttype(typeof(tensor2)), 1), dense(unexpose(tensor1)))
)
contract!(
output_tensor, labelsoutput_tensor, tensor1, labelstensor1, tensor2, labelstensor2, α, β
)
return output_tensor
end

function NDTensors.contract!(
Expand All @@ -42,7 +74,8 @@ function NDTensors.contract!(
α::Number=one(Bool),
β::Number=zero(Bool),
)
return contract!(
contract!(
output_tensor, labelsoutput_tensor, tensor2, labelstensor2, tensor1, labelstensor1, α, β
)
return output_tensor
end
28 changes: 25 additions & 3 deletions NDTensors/ext/NDTensorscuTENSORExt/contract.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,17 @@
using Base: ReshapedArray
using NDTensors: NDTensors, DenseTensor, array
using NDTensors.Expose: Exposed, unexpose
using NDTensors.Expose: Exposed, expose, unexpose
using cuTENSOR: cuTENSOR, CuArray, CuTensor

# Handle cases that can't be handled by `cuTENSOR.jl`
# right now.
function to_zero_offset_cuarray(a::CuArray)
return iszero(a.offset) ? a : copy(a)
end
function to_zero_offset_cuarray(a::ReshapedArray)
return copy(expose(a))
end

function NDTensors.contract!(
exposedR::Exposed{<:CuArray,<:DenseTensor},
labelsR,
Expand All @@ -15,8 +25,20 @@ function NDTensors.contract!(
R, T1, T2 = unexpose.((exposedR, exposedT1, exposedT2))
zoffR = iszero(array(R).offset)
arrayR = zoffR ? array(R) : copy(array(R))
arrayT1 = iszero(array(T1).offset) ? array(T1) : copy(array(T1))
arrayT2 = iszero(array(T2).offset) ? array(T2) : copy(array(T2))
arrayT1 = to_zero_offset_cuarray(array(T1))
arrayT2 = to_zero_offset_cuarray(array(T2))
# Promote to a common type. This is needed because as of
# cuTENSOR.jl v5.4.2, cuTENSOR contraction only performs
# limited sets of type promotions of inputs, see:
# https://github.com/JuliaGPU/CUDA.jl/blob/v5.4.2/lib/cutensor/src/types.jl#L11-L19
elt = promote_type(eltype.((arrayR, arrayT1, arrayT2))...)
if elt !== eltype(arrayR)
return error(
"In cuTENSOR contraction, input tensors have element types `$(eltype(arrayT1))` and `$(eltype(arrayT2))` while the output has element type `$(eltype(arrayR))`.",
)
end
arrayT1 = convert(CuArray{elt}, arrayT1)
arrayT2 = convert(CuArray{elt}, arrayT2)
cuR = CuTensor(arrayR, collect(labelsR))
cuT1 = CuTensor(arrayT1, collect(labelsT1))
cuT2 = CuTensor(arrayT2, collect(labelsT2))
Expand Down
5 changes: 5 additions & 0 deletions NDTensors/src/abstractarray/generic_array_constructors.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
using .TypeParameterAccessors:
unwrap_array_type, specify_default_type_parameters, type_parameter

# Convert to Array, avoiding copying if possible
array(a::AbstractArray) = a
matrix(a::AbstractMatrix) = a
vector(a::AbstractVector) = a

## Warning to use these functions it is necessary to define `TypeParameterAccessors.position(::Type{<:YourArrayType}, ::typeof(ndims)))`
# Implementation, catches if `ndims(arraytype) != length(dims)`.
## TODO convert ndims to `type_parameter(::, typeof(ndims))`
Expand Down
40 changes: 39 additions & 1 deletion NDTensors/src/blocksparse/blocksparsetensor.jl
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,45 @@ function diag(ETensor::Exposed{<:AbstractArray,<:BlockSparseTensor})
return tensordiag
end

## TODO currently this fails on GPU with scalar indexing
function Base.mapreduce(
f, op, t1::BlockSparseTensor, t_tail::BlockSparseTensor...; kwargs...
)
# TODO: Take advantage of block sparsity here.
return mapreduce(f, op, array(t1), array.(t_tail)...; kwargs...)
end

# This is a special case that optimizes for a single tensor
# and takes advantage of block sparsity. Once the more general
# case handles block sparsity, this can be removed.
function Base.mapreduce(f, op, t::BlockSparseTensor; kwargs...)
elt = eltype(t)
if !iszero(f(zero(elt)))
return mapreduce(f, op, array(t); kwargs...)
end
if length(t) > nnz(t)
# Some elements are zero, account for that
# with the initial value.
init_kwargs = (; init=zero(elt))
else
init_kwargs = (;)
end
return mapreduce(f, op, storage(t); kwargs..., init_kwargs...)
end

function blocksparse_isequal(x, y)
return array(x) == array(y)
end
function Base.:(==)(x::BlockSparseTensor, y::BlockSparseTensor)
return blocksparse_isequal(x, y)
end
function Base.:(==)(x::BlockSparseTensor, y::Tensor)
return blocksparse_isequal(x, y)
end
function Base.:(==)(x::Tensor, y::BlockSparseTensor)
return blocksparse_isequal(x, y)
end

## TODO currently this fails on GPU with scalar indexing
function map_diag!(
f::Function,
exposed_t_destination::Exposed{<:AbstractArray,<:BlockSparseTensor},
Expand Down
15 changes: 15 additions & 0 deletions NDTensors/src/diag/diagtensor.jl
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,21 @@ function permutedims!!(
return RR
end

function Base.mapreduce(f, op, t1::DiagTensor, t_tail::DiagTensor...; kwargs...)
elt = mapreduce(eltype, promote_type, (t1, t_tail...))
if !iszero(f(zero(elt)))
return mapreduce(f, op, array(t1), array.(t_tail)...; kwargs...)
end
if length(t1) > diaglength(t1)
# Some elements are zero, account for that
# with the initial value.
init_kwargs = (; init=zero(elt))
else
init_kwargs = (;)
end
return mapreduce(f, op, diagview(t1), diagview.(t_tail)...; kwargs..., init_kwargs...)
end

function Base.show(io::IO, mime::MIME"text/plain", T::DiagTensor)
summary(io, T)
print_tensor(io, T)
Expand Down
19 changes: 6 additions & 13 deletions NDTensors/src/diag/tensoralgebra/contract.jl
Original file line number Diff line number Diff line change
Expand Up @@ -89,20 +89,13 @@ function contract!(
labelsT2,
) where {ElR,NR,N1,N2}
if NR == 0 # If all indices of A and B are contracted
# all indices are summed over, just add the product of the diagonal
# elements of A and B
Rdiag = zero(ElR)
for i in 1:diaglength(T1)
Rdiag += getdiagindex(T1, i) * getdiagindex(T2, i)
end
setdiagindex!(R, Rdiag, 1)
# All indices are summed over, just add the product of the diagonal
# elements of A and B.
# `expose` allows dispatching on the data type
# in order to allow scalar indexing on GPU.
expose(R)[] = mapreduce(*, +, diagview(T1), diagview(T2))
else
min_dim = min(diaglength(T1), diaglength(T2))
# not all indices are summed over, set the diagonals of the result
# to the product of the diagonals of A and B
for i in 1:min_dim
setdiagindex!(R, getdiagindex(T1, i) * getdiagindex(T2, i), i)
end
diagview(R) .= diagview(T1) .* diagview(T2)
end
return R
end
Expand Down
18 changes: 15 additions & 3 deletions NDTensors/src/linearalgebra/linearalgebra.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,26 @@ using .RankFactorization: Spectrum
# be made <: StridedArray
import .Expose: qr_positive, ql, ql_positive

function (
T1::Tensor{ElT1,2,StoreT1} * T2::Tensor{ElT2,2,StoreT2}
) where {ElT1,StoreT1<:Dense,ElT2,StoreT2<:Dense}
# TODO: Generalize this to any `Tensor` type using:
# ```julia
# contract(T1, (1, -1), T2, (-1, 2))
# ```
function Base.:*(T1::Tensor{<:Any,2,<:Dense}, T2::Tensor{<:Any,2,<:Dense})
RM = matrix(T1) * matrix(T2)
indsR = (ind(T1, 1), ind(T2, 2))
return tensor(Dense(vec(RM)), indsR)
end

function LinearAlgebra.dot(x::Tensor, y::Tensor)
size(x) == size(y) || throw(
DimensionMismatch(
"dimensions must match in `dot(x::Tensor, y::Tensor)`: `x` has size `$(size(x))` while `y` has size `$(size(y))`.",
),
)
labels = ntuple(dim -> -dim, ndims(x))
return contract(conj(x), labels, y, labels)[]
end

function LinearAlgebra.exp(T::DenseTensor{ElT,2}) where {ElT<:Union{Real,Complex}}
expTM = exp(matrix(T))
return tensor(Dense(vec(expTM)), inds(T))
Expand Down
77 changes: 67 additions & 10 deletions NDTensors/src/tensor/tensor.jl
Original file line number Diff line number Diff line change
Expand Up @@ -210,13 +210,21 @@ real(T::Tensor) = setstorage(T, real(storage(T)))

imag(T::Tensor) = setstorage(T, imag(storage(T)))

function map(f, x::Tensor{T}) where {T}
if !iszero(f(zero(T)))
error(
"map(f, ::Tensor) currently doesn't support functions that don't preserve zeros, while you passed a function such that f(0) = $(f(zero(T))). This isn't supported right now because it doesn't necessarily preserve the sparsity structure of the input tensor.",
)
function Base.map(f, t1::Tensor, t_tail::Tensor...; kwargs...)
elt = mapreduce(eltype, promote_type, (t1, t_tail...))
if !iszero(f(zero(elt)))
# TODO: Do a better job of preserving the storage type, if possible.
return tensor(Dense(map(f, array(t1), array.(t_tail)...; kwargs...)), inds(t1))
end
return setstorage(x, map(f, storage(x)))
return setstorage(t1, map(f, storage(t1), storage.(t_tail)...; kwargs...))
end

function Base.mapreduce(f, op, t1::Tensor, t_tail::Tensor...; kwargs...)
elt = mapreduce(eltype, promote_type, (t1, t_tail...))
if !iszero(f(zero(elt)))
return mapreduce(f, op, array(t1), array.(t_tail)...; kwargs...)
end
return mapreduce(f, op, storage(t1), storage.(t_tail)...; kwargs...)
end

#
Expand Down Expand Up @@ -281,6 +289,9 @@ array(T::Tensor) = array(dense(T))
matrix(T::Tensor{<:Number,2}) = array(T)
vector(T::Tensor{<:Number,1}) = array(T)

array(T::Transpose{<:Any,<:Tensor}) = transpose(array(transpose(T)))
matrix(T::Transpose{<:Any,<:Tensor}) = transpose(array(transpose(T)))

#
# Helper functions for BlockSparse-type storage
#
Expand Down Expand Up @@ -352,6 +363,42 @@ end

insertblock!!(T::Tensor, block) = insertblock!(T, block)

function tensor_isequal(x, y)
# TODO: Use a reduction to avoid intermediates.
# This doesn't work right now because `mapreduce`
# on `Tensor`s is limited to functions that preserve
# zeros.
# return mapreduce(==, ==, x, y)

# TODO: Use `x - y` instead of `map(-, x, y)`.
# `x - y` calls `x .- y` and broadcasting isn't
# defined properly for sparse Tensor storage
# like `Diag` and `BlockSparse`.
return iszero(norm(map(-, x, y)))
end

function Base.:(==)(x::Tensor, y::Tensor)
return tensor_isequal(x, y)
end

function Base.:(==)(x::AbstractArray, y::Tensor)
return array(x) == array(y)
end
function Base.:(==)(x::Tensor, y::AbstractArray)
return array(x) == array(y)
end

function Base.isequal(x::Tensor, y::Tensor)
return tensor_isequal(x, y)
end

function Base.isequal(x::AbstractArray, y::Tensor)
return isequal(array(x), array(y))
end
function Base.isequal(x::Tensor, y::AbstractArray)
return isequal(array(x), array(y))
end

"""
getdiagindex
Expand Down Expand Up @@ -386,11 +433,21 @@ function setdiagindex!(T::Tensor{<:Number,N}, val, ind::Int) where {N}
return T
end

function map_diag!(f::Function, exposed_t_destination::Exposed, exposed_t_source::Exposed)
diagview(unexpose(exposed_t_destination)) .= f.(diagview(unexpose(exposed_t_source)))
return unexpose(exposed_t_destination)
function map_diag!(f::Function, t_dest::Tensor, t_src::Tensor)
map_diag!(f, expose(t_dest), expose(t_src))
return t_dest
end
function map_diag!(f::Function, exposed_t_dest::Exposed, exposed_t_src::Exposed)
diagview(unexpose(exposed_t_dest)) .= f.(diagview(unexpose(exposed_t_src)))
return unexpose(exposed_t_dest)
end

map_diag(f::Function, t::Tensor) = map_diag(f, expose(t))
function map_diag(f::Function, exposed_t::Exposed)
t_dest = copy(exposed_t)
map_diag!(f, expose(t_dest), exposed_t)
return t_dest
end
map_diag(f::Function, t::Tensor) = map_diag!(f, expose(copy(t)), expose(t))

#
# Some generic contraction functionality
Expand Down
13 changes: 6 additions & 7 deletions NDTensors/src/tensorstorage/tensorstorage.jl
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,12 @@ end
Random.randn!(S::TensorStorage) = randn!(Random.default_rng(), S)
Random.randn!(rng::AbstractRNG, S::TensorStorage) = (randn!(rng, data(S)); S)

function map(f, x::TensorStorage{T}) where {T}
if !iszero(f(zero(T)))
error(
"map(f, ::TensorStorage) currently doesn't support functions that don't preserve zeros, while you passed a function such that f(0) = $(f(zero(T))). This isn't supported right now because it doesn't necessarily preserve the sparsity structure of the input tensor.",
)
end
return setdata(x, map(f, data(x)))
function Base.map(f, t1::TensorStorage, t_tail::TensorStorage...; kwargs...)
return setdata(t1, map(f, data(t1), data.(t_tail)...; kwargs...))
end

function Base.mapreduce(f, op, t1::TensorStorage, t_tail::TensorStorage...; kwargs...)
return mapreduce(f, op, data(t1), data.(t_tail)...; kwargs...)
end

Base.fill!(S::TensorStorage, v) = (fill!(data(S), v); S)
Expand Down
Loading

0 comments on commit d3afdb7

Please sign in to comment.