diff --git a/src/host/base.jl b/src/host/base.jl index cfb91681..20cfd563 100644 --- a/src/host/base.jl +++ b/src/host/base.jl @@ -308,21 +308,10 @@ function Adapt.adapt_storage(to::ToGPU, xs::Array) arr end -# we don't really want an array, so don't call `adapt(Array, ...)`, -# but just want GPUArray indices to get downloaded back to the CPU. -# this makes sure we preserve array-like containers, like Base.Slice. -struct BackToCPU end -Adapt.adapt_storage(::BackToCPU, xs::AbstractGPUArray) = convert(Array, xs) - @inline function Base.view(A::AbstractGPUArray, I::Vararg{Any,N}) where {N} J = to_indices(A, I) - @boundscheck begin - # Base's boundscheck accesses the indices, so make sure they reside on the CPU. - # this is expensive, but it's a bounds check after all. - J_cpu = map(j->adapt(BackToCPU(), j), J) - checkbounds(A, J_cpu...) - end J_gpu = map(j->adapt(ToGPU(A), j), J) + @boundscheck checkbounds(A, J...) unsafe_view(A, J_gpu, GPUIndexStyle(I...)) end diff --git a/src/host/indexing.jl b/src/host/indexing.jl index 2b8e17da..e4ea6ad2 100644 --- a/src/host/indexing.jl +++ b/src/host/indexing.jl @@ -1,51 +1,77 @@ # host-level indexing -# basic indexing with integers +# indexing operators Base.IndexStyle(::Type{<:AbstractGPUArray}) = Base.IndexLinear() -function Base.getindex(xs::AbstractGPUArray{T}, I::Integer...) where T +vectorized_indices(Is::Union{Integer,CartesianIndex}...) = Val{false}() +vectorized_indices(Is...) = Val{true}() + +# TODO: re-use Base functionality for the conversion of indices to a linear index, +# by only implementing `getindex(A, ::Int)` etc. this is difficult due to +# ambiguities with the vectorized method that can take any index type. + +Base.@propagate_inbounds Base.getindex(A::AbstractGPUArray, Is...) = + _getindex(vectorized_indices(Is...), A, to_indices(A, Is)...) +Base.@propagate_inbounds _getindex(::Val{false}, A::AbstractGPUArray, Is...) = + scalar_getindex(A, to_indices(A, Is)...) +Base.@propagate_inbounds _getindex(::Val{true}, A::AbstractGPUArray, Is...) = + vectorized_getindex(A, to_indices(A, Is)...) + +Base.@propagate_inbounds Base.setindex!(A::AbstractGPUArray, v, Is...) = + _setindex!(vectorized_indices(Is...), A, v, to_indices(A, Is)...) +Base.@propagate_inbounds _setindex!(::Val{false}, A::AbstractGPUArray, v, Is...) = + scalar_setindex!(A, v, to_indices(A, Is)...) +Base.@propagate_inbounds _setindex!(::Val{true}, A::AbstractGPUArray, v, Is...) = + vectorized_setindex!(A, v, to_indices(A, Is)...) + +## scalar indexing + +function scalar_getindex(A::AbstractGPUArray{T}, Is...) where T + @boundscheck checkbounds(A, Is...) + I = Base._to_linear_index(A, Is...) + getindex(A, I) +end + +function scalar_setindex!(A::AbstractGPUArray{T}, v, Is...) where T + @boundscheck checkbounds(A, Is...) + I = Base._to_linear_index(A, Is...) + setindex!(A, v, I) +end + +# we still dispatch to `Base.getindex(a, ::Int)` etc so that there's a single method to +# override when a back-end (e.g. with unified memory) wants to allow scalar indexing. + +function Base.getindex(A::AbstractGPUArray{T}, I::Int) where T + @boundscheck checkbounds(A, I) assertscalar("getindex") - i = Base._to_linear_index(xs, I...) x = Array{T}(undef, 1) - copyto!(x, 1, xs, i, 1) + copyto!(x, 1, A, I, 1) return x[1] end -function Base.setindex!(xs::AbstractGPUArray{T}, v::T, I::Integer...) where T +function Base.setindex!(A::AbstractGPUArray{T}, v, I::Int) where T + @boundscheck checkbounds(A, I) assertscalar("setindex!") - i = Base._to_linear_index(xs, I...) x = T[v] - copyto!(xs, i, x, 1, 1) - return xs + copyto!(A, I, x, 1, 1) + return A end -Base.setindex!(xs::AbstractGPUArray, v, I::Integer...) = - setindex!(xs, convert(eltype(xs), v), I...) - +## vectorized indexing -# basic indexing with cartesian indices - -Base.@propagate_inbounds Base.getindex(A::AbstractGPUArray, I::Union{Integer, CartesianIndex}...) = - A[Base.to_indices(A, I)...] -Base.@propagate_inbounds Base.setindex!(A::AbstractGPUArray, v, I::Union{Integer, CartesianIndex}...) = - (A[Base.to_indices(A, I)...] = v; A) - - -# generalized multidimensional indexing - -Base.getindex(A::AbstractGPUArray, I...) = _getindex(A, to_indices(A, I)...) - -function _getindex(src::AbstractGPUArray, Is...) +function vectorized_getindex(src::AbstractGPUArray, Is...) shape = Base.index_shape(Is...) dest = similar(src, shape) any(isempty, Is) && return dest # indexing with empty array idims = map(length, Is) - AT = typeof(src).name.wrapper # NOTE: we are pretty liberal here supporting non-GPU indices... - gpu_call(getindex_kernel, dest, src, idims, adapt(AT, Is)...) + Is = map(x->adapt(ToGPU(src), x), Is) + @boundscheck checkbounds(src, Is...) + + gpu_call(getindex_kernel, dest, src, idims, Is...) return dest end @@ -61,9 +87,7 @@ end end end -Base.setindex!(A::AbstractGPUArray, v, I...) = _setindex!(A, v, to_indices(A, I)...) - -function _setindex!(dest::AbstractGPUArray, src, Is...) +function vectorized_setindex!(dest::AbstractGPUArray, src, Is...) isempty(Is) && return dest idims = length.(Is) len = prod(idims) @@ -76,9 +100,11 @@ function _setindex!(dest::AbstractGPUArray, src, Is...) end end - AT = typeof(dest).name.wrapper - # NOTE: we are pretty liberal here supporting non-GPU sources and indices... - gpu_call(setindex_kernel, dest, adapt(AT, src), idims, len, adapt(AT, Is)...; + # NOTE: we are pretty liberal here supporting non-GPU indices... + Is = map(x->adapt(ToGPU(dest), x), Is) + @boundscheck checkbounds(dest, Is...) + + gpu_call(setindex_kernel, dest, adapt(ToGPU(dest), src), idims, len, Is...; elements=len) return dest end @@ -96,7 +122,30 @@ end end -## find* +# bounds checking + +# indices residing on the GPU should be bounds-checked on the GPU to avoid iteration. + +# not all wrapped GPU arrays make sense as indices, so we use a subset of `AnyGPUArray` +const IndexGPUArray{T} = Union{AbstractGPUArray{T}, + SubArray{T, <:Any, <:AbstractGPUArray}, + LinearAlgebra.Adjoint{T}} + +@inline function Base.checkindex(::Type{Bool}, inds::AbstractUnitRange, I::IndexGPUArray) + all(broadcast(I) do i + Base.checkindex(Bool, inds, i) + end) +end + +@inline function Base.checkindex(::Type{Bool}, inds::Tuple, + I::IndexGPUArray{<:CartesianIndex}) + all(broadcast(I) do i + Base.checkbounds_indices(Bool, inds, (i,)) + end) +end + + +# find* # simple array type that returns the index used to access an element, while # retaining the dimensionality of the original array. this can be used to @@ -107,15 +156,15 @@ struct EachIndex{T,N,IS} <: AbstractArray{T,N} dims::NTuple{N,Int} indices::IS end -EachIndex(xs::AbstractArray) = - EachIndex{typeof(firstindex(xs)), ndims(xs), typeof(eachindex(xs))}( - size(xs), eachindex(xs)) +EachIndex(A::AbstractArray) = + EachIndex{typeof(firstindex(A)), ndims(A), typeof(eachindex(A))}( + size(A), eachindex(A)) Base.size(ei::EachIndex) = ei.dims Base.getindex(ei::EachIndex, i::Int) = ei.indices[i] Base.IndexStyle(::Type{<:EachIndex}) = Base.IndexLinear() -function Base.findfirst(f::Function, xs::AnyGPUArray) - indices = EachIndex(xs) +function Base.findfirst(f::Function, A::AnyGPUArray) + indices = EachIndex(A) dummy_index = first(indices) # given two pairs of (istrue, index), return the one with the smallest index @@ -130,23 +179,23 @@ function Base.findfirst(f::Function, xs::AnyGPUArray) return (false, dummy_index) end - res = mapreduce((x, y)->(f(x), y), reduction, xs, indices; + res = mapreduce((x, y)->(f(x), y), reduction, A, indices; init = (false, dummy_index)) if res[1] # out of consistency with Base.findarray, return a CartesianIndex # when the input is a multidimensional array - ndims(xs) == 1 && return res[2] - return CartesianIndices(xs)[res[2]] + ndims(A) == 1 && return res[2] + return CartesianIndices(A)[res[2]] else return nothing end end -Base.findfirst(xs::AnyGPUArray{Bool}) = findfirst(identity, xs) +Base.findfirst(A::AnyGPUArray{Bool}) = findfirst(identity, A) -function findminmax(binop, xs::AnyGPUArray; init, dims) - indices = EachIndex(xs) - dummy_index = firstindex(xs) +function findminmax(binop, A::AnyGPUArray; init, dims) + indices = EachIndex(A) + dummy_index = firstindex(A) function reduction(t1, t2) (x, i), (y, j) = t1, t2 @@ -157,16 +206,16 @@ function findminmax(binop, xs::AnyGPUArray; init, dims) end if dims == Colon() - res = mapreduce(tuple, reduction, xs, indices; init = (init, dummy_index)) + res = mapreduce(tuple, reduction, A, indices; init = (init, dummy_index)) # out of consistency with Base.findarray, return a CartesianIndex # when the input is a multidimensional array - return (res[1], ndims(xs) == 1 ? res[2] : CartesianIndices(xs)[res[2]]) + return (res[1], ndims(A) == 1 ? res[2] : CartesianIndices(A)[res[2]]) else - res = mapreduce(tuple, reduction, xs, indices; + res = mapreduce(tuple, reduction, A, indices; init = (init, dummy_index), dims=dims) vals = map(x->x[1], res) - inds = map(x->ndims(xs) == 1 ? x[2] : CartesianIndices(xs)[x[2]], res) + inds = map(x->ndims(A) == 1 ? x[2] : CartesianIndices(A)[x[2]], res) return (vals, inds) end end diff --git a/test/testsuite/indexing.jl b/test/testsuite/indexing.jl index 7aa29758..8d2a2039 100644 --- a/test/testsuite/indexing.jl +++ b/test/testsuite/indexing.jl @@ -129,6 +129,12 @@ end @test_throws DimensionMismatch x[1:9,1:9,:,:] = y end + @testset "mismatching axes/indices" begin + a = rand(Float32, 1,1) + @test compare(a->a[1:1], AT, a) + @test compare(a->a[1:1,1:1], AT, a) + @test compare(a->a[1:1,1:1,1:1], AT, a) + end end @testsuite "indexing find" (AT, eltypes)->begin