Skip to content

Commit

Permalink
add CUDA extension (#2268)
Browse files Browse the repository at this point in the history
* add CUDACUDNN extension

* why test is complaining about cuDNN not a dependency?

* try require cuDNN in Flux

* news and cleanup

* require Main

* relax test GroupedConvTranspose gpu test
  • Loading branch information
CarloLucibello authored Jul 9, 2023
1 parent 34a5f98 commit d3a083c
Show file tree
Hide file tree
Showing 36 changed files with 295 additions and 197 deletions.
24 changes: 12 additions & 12 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
steps:
- label: "GPU integration with julia v1.6"
plugins:
- JuliaCI/julia#v1:
# Drop default "registries" directory, so it is not persisted from execution to execution
# Taken from https://github.com/JuliaLang/julia/blob/v1.7.2/.buildkite/pipelines/main/platforms/package_linux.yml#L11-L12
persist_depot_dirs: packages,artifacts,compiled
version: "1.6"
- JuliaCI/julia-test#v1: ~
agents:
queue: "juliagpu"
cuda: "*"
timeout_in_minutes: 60
# - label: "GPU integration with julia v1.9"
# plugins:
# - JuliaCI/julia#v1:
# # Drop default "registries" directory, so it is not persisted from execution to execution
# # Taken from https://github.com/JuliaLang/julia/blob/v1.7.2/.buildkite/pipelines/main/platforms/package_linux.yml#L11-L12
# persist_depot_dirs: packages,artifacts,compiled
# version: "1.9"
# - JuliaCI/julia-test#v1: ~
# agents:
# queue: "juliagpu"
# cuda: "*"
# timeout_in_minutes: 60

- label: "GPU integration with julia v1"
plugins:
Expand Down
10 changes: 5 additions & 5 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
fail-fast: false
matrix:
version:
- '1.6' # Replace this with the minimum Julia version that your package supports.
# - '1.9' # Uncomment when 1.10 is out. Replace this with the minimum Julia version that your package supports.
- '1'
os: [ubuntu-latest]
arch: [x64]
Expand Down Expand Up @@ -47,17 +47,17 @@ jobs:
- uses: julia-actions/julia-buildpkg@v1
- name: "Run test without coverage report"
uses: julia-actions/julia-runtest@v1
if: ${{ !contains(fromJson('["1", "1.6"]'), matrix.version) || matrix.os != 'ubuntu-latest' }}
if: ${{ !contains(fromJson('["1", "1.9"]'), matrix.version) || matrix.os != 'ubuntu-latest' }}
with:
coverage: false

- name: "Run test with coverage report"
uses: julia-actions/julia-runtest@v1
if: contains(fromJson('["1", "1.6"]'), matrix.version) && matrix.os == 'ubuntu-latest'
if: contains(fromJson('["1", "1.9"]'), matrix.version) && matrix.os == 'ubuntu-latest'
- uses: julia-actions/julia-processcoverage@v1
if: contains(fromJson('["1", "1.6"]'), matrix.version) && matrix.os == 'ubuntu-latest'
if: contains(fromJson('["1", "1.9"]'), matrix.version) && matrix.os == 'ubuntu-latest'
- uses: codecov/codecov-action@v3
if: contains(fromJson('["1", "1.6"]'), matrix.version) && matrix.os == 'ubuntu-latest'
if: contains(fromJson('["1", "1.9"]'), matrix.version) && matrix.os == 'ubuntu-latest'
with:
file: lcov.info

Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

See also [github's page](https://github.com/FluxML/Flux.jl/releases) for a complete list of PRs merged before each release.

## v0.14.0
* Flux now requires julia v1.9 or later.
* CUDA.jl is not a hard dependency anymore. CUDA support is now provided through the extension mechanism. In order to unlock the CUDA
functionalities user are required to load CUDA, e.g. with `using CUDA`.
The package `cuDNN.jl` also needs to be installed in the environment.

## v0.13.17
* Apple's Metal GPU acceleration preliminary support via the extension mechanism.

Expand Down
32 changes: 17 additions & 15 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,12 @@ version = "0.13.17"

[deps]
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54"
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
NNlibCUDA = "a00861dc-f156-4864-bf3c-e6376f28a68d"
OneHotArrays = "0b1bfda6-eb8a-41d2-88d8-f5af5cad476f"
Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2"
Preferences = "21216c6a-2e73-6563-6e65-726566657250"
Expand All @@ -22,50 +20,54 @@ SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"

[weakdeps]
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"
Metal = "dde4c033-4e86-420c-a63e-0dd931031962"

[extensions]
AMDGPUExt = "AMDGPU"
FluxAMDGPUExt = "AMDGPU"
FluxCUDAExt = "CUDA"
FluxCUDAcuDNNExt = ["CUDA", "cuDNN"]
FluxMetalExt = "Metal"

[compat]
AMDGPU = "0.4.13"
Adapt = "3.0"
CUDA = "3, 4"
CUDA = "4"
ChainRulesCore = "1.12"
Functors = "0.3, 0.4"
MLUtils = "0.2, 0.3.1, 0.4"
Functors = "0.4"
MLUtils = "0.4"
MacroTools = "0.5"
Metal = "0.4"
NNlib = "0.8.19"
NNlibCUDA = "0.2.6"
OneHotArrays = "0.1, 0.2"
NNlib = "0.9.1"
OneHotArrays = "0.2.4"
Optimisers = "0.2.12"
Preferences = "1"
ProgressLogging = "0.1"
Reexport = "0.2, 1.0"
SpecialFunctions = "1.8.2, 2.1.2"
Reexport = "1.0"
SpecialFunctions = "2.1.2"
Zygote = "0.6.49"
cuDNN = "1"
julia = "1.6"
julia = "1.9"

[extras]
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"
ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Metal = "dde4c033-4e86-420c-a63e-0dd931031962"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"

[targets]
test = ["Test", "Documenter", "IterTools", "LinearAlgebra",
"FillArrays", "ComponentArrays", "BSON",
"Pkg"]
"FillArrays", "ComponentArrays", "BSON", "Pkg",
"CUDA", "cuDNN", "Metal"]
9 changes: 9 additions & 0 deletions cuda.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
using Flux, CUDA

BN = BatchNorm(3) |> gpu;
x = randn(2, 2, 3, 4) |> gpu;

NNlib.batchnorm(BN.γ, BN.β, x, BN.μ, BN.σ², BN.momentum;
alpha=1, beta=0, eps=BN.ϵ,
training=Flux._isactive(BN, x))

2 changes: 1 addition & 1 deletion docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Flux is a library for machine learning. It comes "batteries-included" with many

### Installation

Download [Julia 1.6](https://julialang.org/downloads/) or later, preferably the current stable release. You can add Flux using Julia's package manager, by typing `] add Flux` in the Julia prompt. This will automatically install several other packages, including [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl) for Nvidia GPU support.
Download [Julia 1.9](https://julialang.org/downloads/) or later, preferably the current stable release. You can add Flux using Julia's package manager, by typing `] add Flux` in the Julia prompt. This will automatically install several other packages, including [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl) for Nvidia GPU support.

### Learning Flux

Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
module AMDGPUExt
module FluxAMDGPUExt

import ChainRulesCore
import ChainRulesCore: NoTangent
import Flux
import Flux: FluxCPUAdaptor, FluxAMDAdaptor, _amd, _isleaf, adapt_storage, fmap
import Flux: FluxCPUAdaptor, FluxAMDAdaptor, _amd, adapt_storage, fmap
import Flux: DenseConvDims, Conv, ConvTranspose, conv, conv_reshape_bias
import NNlib

Expand All @@ -13,10 +13,14 @@ using Random
using Zygote

const MIOPENFloat = AMDGPU.MIOpen.MIOPENFloat

# Set to boolean on the first call to check_use_amdgpu
const USE_AMDGPU = Ref{Union{Nothing, Bool}}(nothing)

function check_use_amdgpu()
isnothing(USE_AMDGPU[]) || return
if !isnothing(USE_AMDGPU[])
return
end

USE_AMDGPU[] = AMDGPU.functional()
if USE_AMDGPU[]
Expand All @@ -25,12 +29,13 @@ function check_use_amdgpu()
end
else
@info """
The AMDGPU function is being called but the AMDGPU is not functional.
The AMDGPU function is being called but AMDGPU.jl is not functional.
Defaulting back to the CPU. (No action is required if you want to run on the CPU).
""" maxlog=1
end
return
end

ChainRulesCore.@non_differentiable check_use_amdgpu()

include("functor.jl")
Expand Down
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion ext/AMDGPUExt/functor.jl → ext/FluxAMDGPUExt/functor.jl
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ _conv_basetype(::ConvTranspose) = ConvTranspose

Flux._isleaf(::AMD_CONV) = true

_exclude(x) = _isleaf(x)
_exclude(x) = Flux._isleaf(x)
_exclude(::CPU_CONV) = true

function _amd(x)
Expand Down
49 changes: 49 additions & 0 deletions ext/FluxCUDAExt/FluxCUDAExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
module FluxCUDAExt

using Flux
import Flux: _cuda
using Flux: FluxCPUAdaptor, FluxCUDAAdaptor, fmap
using CUDA
using NNlib
using Zygote
using ChainRulesCore
using Random
using Adapt
import Adapt: adapt_storage


const USE_CUDA = Ref{Union{Nothing, Bool}}(nothing)

function check_use_cuda()
if !isnothing(USE_CUDA[])
return
end

USE_CUDA[] = CUDA.functional()
if !USE_CUDA[]
@info """
The CUDA function is being called but CUDA.jl is not functional.
Defaulting back to the CPU. (No action is required if you want to run on the CPU).
""" maxlog=1
end
return
end

ChainRulesCore.@non_differentiable check_use_cuda()

include("functor.jl")

function __init__()
Flux.CUDA_LOADED[] = true

try
Base.require(Main, :cuDNN)
catch
@warn """Package cuDNN not found in current path.
- Run `import Pkg; Pkg.add(\"cuDNN\")` to install the cuDNN package, then restart julia.
- If cuDNN is not installed, some Flux functionalities will not be available when running on the GPU.
"""
end
end

end
32 changes: 32 additions & 0 deletions ext/FluxCUDAExt/functor.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@

adapt_storage(to::FluxCUDAAdaptor, x) = CUDA.cu(x)
adapt_storage(to::FluxCUDAAdaptor, x::Zygote.FillArrays.AbstractFill) = CUDA.cu(collect(x))
adapt_storage(to::FluxCUDAAdaptor, x::Random.TaskLocalRNG) = CUDA.default_rng()
adapt_storage(to::FluxCUDAAdaptor, x::CUDA.RNG) = x
adapt_storage(to::FluxCUDAAdaptor, x::AbstractRNG) =
error("Cannot map RNG of type $(typeof(x)) to GPU. GPU execution only supports Random.default_rng().")

# TODO: figure out the correct design for OneElement
adapt_storage(to::FluxCUDAAdaptor, x::Zygote.OneElement) = CUDA.cu(collect(x))

adapt_storage(to::FluxCPUAdaptor, x::T) where T <: CUDA.CUSPARSE.CUDA.CUSPARSE.AbstractCuSparseMatrix = adapt(Array, x)
adapt_storage(to::FluxCPUAdaptor, x::CUDA.RNG) = Random.default_rng()

function ChainRulesCore.rrule(::typeof(Adapt.adapt_storage), to::FluxCPUAdaptor, x::CUDA.AbstractGPUArray)
adapt_storage(to, x), dx -> (NoTangent(), NoTangent(), adapt_storage(FluxCUDAAdaptor(), unthunk(dx)))
end

ChainRulesCore.rrule(::typeof(adapt), a::FluxCPUAdaptor, x::AnyCuArray) =
adapt(a, x), Δ -> (NoTangent(), NoTangent(), adapt(FluxCUDAAdaptor(), unthunk(Δ)))

ChainRulesCore.rrule(::typeof(adapt), a::FluxCUDAAdaptor, x::AnyCuArray) =
adapt(a, x), Δ -> (NoTangent(), NoTangent(), Δ)

ChainRulesCore.rrule(::typeof(adapt), a::FluxCUDAAdaptor, x::AbstractArray) =
adapt(a, x), Δ -> (NoTangent(), NoTangent(), adapt(FluxCPUAdaptor(), unthunk(Δ)))

function _cuda(x)
check_use_cuda()
USE_CUDA[] || return x
fmap(x -> Adapt.adapt(FluxCUDAAdaptor(), x), x; exclude=Flux._isleaf)
end
1 change: 1 addition & 0 deletions ext/FluxCUDAExt/utils.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
rng_from_array(::CuArray) = CUDA.default_rng()
34 changes: 25 additions & 9 deletions src/cuda/cudnn.jl → ext/FluxCUDAcuDNNExt/FluxCUDAcuDNNExt.jl
Original file line number Diff line number Diff line change
@@ -1,21 +1,37 @@
import NNlibCUDA: batchnorm, ∇batchnorm
module FluxCUDAcuDNNExt

using Flux
using CUDA, cuDNN
using NNlib

const USE_CUDNN = Ref{Union{Nothing, Bool}}(nothing)

function check_use_cudnn()
if !isnothing(USE_CUDNN[])
return
end

USE_CUDNN[] = cuDNN.has_cudnn()
if !USE_CUDNN[]
@warn """
cuDNN.jl didn't found libcudnn, some Flux functionality will not be available.
""" maxlog=1
end
return
end

function (BN::Flux.BatchNorm)(x::Union{CuArray{T,2},CuArray{T,4},CuArray{T,5}},
cache=nothing) where T<:Union{Float32, Float64}

@assert BN.affine "BatchNorm: only affine=true supported on gpu"
@assert BN.track_stats "BatchNorm: only track_stats=true supported on gpu"
@assert length(BN.β) == size(x, ndims(x)-1) "BatchNorm: input has wrong number of channels"
return BN.λ.(batchnorm(BN.γ, BN.β, x, BN.μ, BN.σ², BN.momentum;

return BN.λ.(NNlib.batchnorm(BN.γ, BN.β, x, BN.μ, BN.σ², BN.momentum;
cache=cache, alpha=1, beta=0, eps=BN.ϵ,
training=Flux._isactive(BN, x)))
end

function ChainRulesCore.rrule(::typeof(batchnorm), g, b, x, running_mean, running_var, momentum; kw...)
y = batchnorm(g, b, x, running_mean, running_var, momentum; kw...)
function batchnorm_pullback(Δ)
grad = ∇batchnorm(g, b, x, unthunk(Δ), running_mean, running_var, momentum; kw...)
(NoTangent(), grad..., NoTangent(), NoTangent(), NoTangent())
end
y, batchnorm_pullback


end
5 changes: 0 additions & 5 deletions src/Flux.jl
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,6 @@ include("train.jl")
using .Train
using .Train: setup

using CUDA
import cuDNN
const use_cuda = Ref{Union{Nothing,Bool}}(nothing)

using Adapt, Functors, OneHotArrays
include("utils.jl")
include("functor.jl")
Expand All @@ -75,6 +71,5 @@ include("deprecations.jl")
include("losses/Losses.jl")
using .Losses

include("cuda/cuda.jl")

end # module
11 changes: 0 additions & 11 deletions src/cuda/cuda.jl

This file was deleted.

Loading

0 comments on commit d3a083c

Please sign in to comment.