Merge pull request #78 from SciML/refactor

Refactor
yuehhua · Jul 7, 2022 · d6dbe6b · d6dbe6b
2 parents 61d32af + 221ddd4
commit d6dbe6b
Show file tree

Hide file tree

Showing 25 changed files with 146 additions and 153 deletions.
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -2,13 +2,15 @@ name: CI
 env:
   DATADEPS_ALWAYS_ACCEPT: true
 on:
-  - push
-  - pull_request
-
-defaults:
-  run:
-    shell: bash
-
+  push:
+    branches: '*'
+    tags: '*'
+  pull_request:
+concurrency:
+  # Skip intermediate builds: always.
+  # Cancel intermediate builds: only if it is a pull request build.
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
 jobs:
   test:
     name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
@@ -31,42 +33,30 @@ jobs:
         with:
           version: ${{ matrix.version }}
           arch: ${{ matrix.arch }}
-      - uses: actions/cache@v1
-        env:
-          cache-name: cache-artifacts
-        with:
-          path: ~/.julia/artifacts
-          key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
-          restore-keys: |
-            ${{ runner.os }}-test-${{ env.cache-name }}-
-            ${{ runner.os }}-test-
-            ${{ runner.os }}-
+      - uses: julia-actions/cache@v1
       - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
       - uses: julia-actions/julia-processcoverage@v1
-      - uses: codecov/codecov-action@v1
+      - uses: codecov/codecov-action@v2
         with:
-          file: lcov.info
+          files: lcov.info
   docs:
     name: Documentation
     runs-on: ubuntu-latest
+    permissions:
+      contents: write
     steps:
       - uses: actions/checkout@v2
       - uses: julia-actions/setup-julia@v1
         with:
           version: '1'
-      - run: |
-          julia --project=docs -e '
-            using Pkg
-            Pkg.develop(PackageSpec(path=pwd()))
-            Pkg.instantiate()'
+      - uses: julia-actions/julia-buildpkg@v1
+      - uses: julia-actions/julia-docdeploy@v1
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
       - run: |
           julia --project=docs -e '
             using Documenter: DocMeta, doctest
             using NeuralOperators
             DocMeta.setdocmeta!(NeuralOperators, :DocTestSetup, :(using NeuralOperators); recursive=true)
             doctest(NeuralOperators)'
-      - run: julia --project=docs docs/make.jl
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }}
diff --git a/Project.toml b/Project.toml
@@ -16,10 +16,10 @@ Tullio = "bc48ee85-29a4-5162-ae0b-a64e1601d4bc"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [compat]
-CUDA = "3.9"
+CUDA = "3"
 CUDAKernels = "0.3, 0.4"
-ChainRulesCore = "1.14"
-FFTW = "1.4"
+ChainRulesCore = "1"
+FFTW = "1"
 Flux = "0.13"
 GeometricFlux = "0.12"
 KernelAbstractions = "0.7, 0.8"

diff --git a/README.md b/README.md
@@ -76,7 +76,7 @@ And then train as a Flux model.
 
 ```julia
 loss(𝐱, 𝐲) = l₂loss(model(𝐱), 𝐲)
-opt = Flux.Optimiser(WeightDecay(1f-4), Flux.ADAM(1f-3))
+opt = Flux.Optimiser(WeightDecay(1f-4), Flux.Adam(1f-3))
 Flux.@epochs 50 Flux.train!(loss, params(model), data, opt)
 ```
 
@@ -102,7 +102,7 @@ loss(xtrain, ytrain, sensor) = Flux.Losses.mse(model(xtrain, sensor), ytrain)
 evalcb() = @show(loss(xval, yval, grid))
 
 learning_rate = 0.001
-opt = ADAM(learning_rate)
+opt = Adam(learning_rate)
 parameters = params(model)
 Flux.@epochs 400 Flux.train!(loss, parameters, [(xtrain, ytrain, grid)], opt, cb=evalcb)
 ```

diff --git a/docs/src/index.md b/docs/src/index.md
@@ -53,7 +53,7 @@ And then train as a Flux model.
 
 ```julia
 loss(𝐱, 𝐲) = l₂loss(model(𝐱), 𝐲)
-opt = Flux.Optimiser(WeightDecay(1f-4), Flux.ADAM(1f-3))
+opt = Flux.Optimiser(WeightDecay(1f-4), Flux.Adam(1f-3))
 Flux.@epochs 50 Flux.train!(loss, params(model), data, opt)
 ```
 
@@ -80,7 +80,7 @@ loss(xtrain, ytrain, sensor) = Flux.Losses.mse(model(xtrain, sensor), ytrain)
 evalcb() = @show(loss(xval, yval, grid))
 
 learning_rate = 0.001
-opt = ADAM(learning_rate)
+opt = Adam(learning_rate)
 parameters = params(model)
 Flux.@epochs 400 Flux.train!(loss, parameters, [(xtrain, ytrain, grid)], opt, cb=evalcb)
 ```

diff --git a/example/Burgers/src/Burgers.jl b/example/Burgers/src/Burgers.jl
@@ -56,7 +56,7 @@ function train(; cuda = true, η₀ = 1.0f-3, λ = 1.0f-4, epochs = 500)
     model = FourierNeuralOperator(ch = (2, 64, 64, 64, 64, 64, 128, 1), modes = (16,),
                                   σ = gelu)
     data = get_dataloader()
-    optimiser = Flux.Optimiser(WeightDecay(λ), Flux.ADAM(η₀))
+    optimiser = Flux.Optimiser(WeightDecay(λ), Flux.Adam(η₀))
     loss_func = l₂loss
 
     learner = Learner(model, data, optimiser, loss_func,
@@ -88,7 +88,7 @@ function train_nomad(; n = 300, cuda = true, learning_rate = 0.001, epochs = 400
     grid = rand(collect(0:0.001:1), (280, 1024)) |> device
     gridval = rand(collect(0:0.001:1), (20, 1024)) |> device
 
-    opt = ADAM(learning_rate)
+    opt = Adam(learning_rate)
 
     m = NOMAD((1024, 1024), (2048, 1024), gelu, gelu) |> device
 

diff --git a/example/Burgers/src/Burgers_deeponet.jl b/example/Burgers/src/Burgers_deeponet.jl
@@ -26,7 +26,7 @@ function train_don(; n = 300, cuda = true, learning_rate = 0.001, epochs = 400)
 
     grid = collect(range(0, 1, length = 1024)') |> device
 
-    opt = ADAM(learning_rate)
+    opt = Adam(learning_rate)
 
     m = DeepONet((1024, 1024, 1024), (1, 1024, 1024), gelu, gelu) |> device
 

diff --git a/example/Burgers/test/runtests.jl b/example/Burgers/test/runtests.jl
@@ -1,4 +1,5 @@
 using Burgers
+using FluxTraining
 using Test
 
 @testset "Burgers" begin

diff --git a/example/DoublePendulum/src/DoublePendulum.jl b/example/DoublePendulum/src/DoublePendulum.jl
@@ -93,7 +93,7 @@ function train(; cuda = true, Δt = 1, η₀ = 1.0f-3, λ = 1.0f-4, epochs = 20)
     model = FourierNeuralOperator(ch = (2, 64, 64, 64, 64, 64, 128, 2), modes = (4, 16),
                                   σ = gelu)
     data = get_dataloader(Δt = Δt)
-    optimiser = Flux.Optimiser(WeightDecay(λ), Flux.ADAM(η₀))
+    optimiser = Flux.Optimiser(WeightDecay(λ), Flux.Adam(η₀))
     loss_func = l₂loss
 
     learner = Learner(model, data, optimiser, loss_func,

diff --git a/example/DoublePendulum/test/runtests.jl b/example/DoublePendulum/test/runtests.jl
@@ -1,4 +1,5 @@
 using DoublePendulum
+using FluxTraining
 using Test
 
 @testset "DoublePendulum" begin

diff --git a/example/FlowOverCircle/src/FlowOverCircle.jl b/example/FlowOverCircle/src/FlowOverCircle.jl
@@ -62,7 +62,7 @@ function train(; cuda = true, η₀ = 1.0f-3, λ = 1.0f-4, epochs = 50)
     model = MarkovNeuralOperator(ch = (1, 64, 64, 64, 64, 64, 1), modes = (24, 24),
                                  σ = gelu)
     data = get_dataloader()
-    optimiser = Flux.Optimiser(WeightDecay(λ), Flux.ADAM(η₀))
+    optimiser = Flux.Optimiser(WeightDecay(λ), Flux.Adam(η₀))
     loss_func = l₂loss
 
     learner = Learner(model, data, optimiser, loss_func,
@@ -92,7 +92,7 @@ function train_gno(; cuda = true, η₀ = 1.0f-3, λ = 1.0f-4, epochs = 50)
                   WithGraph(featured_graph, GraphKernel(Dense(2 * 16, 16, gelu), 16)),
                   Dense(16, 1))
     data = get_dataloader(batchsize = 16, flatten = true)
-    optimiser = Flux.Optimiser(WeightDecay(λ), Flux.ADAM(η₀))
+    optimiser = Flux.Optimiser(WeightDecay(λ), Flux.Adam(η₀))
     loss_func = l₂loss
 
     learner = Learner(model, data, optimiser, loss_func,

diff --git a/example/SuperResolution/src/SuperResolution.jl b/example/SuperResolution/src/SuperResolution.jl
@@ -87,7 +87,7 @@ function train(; cuda = true, η₀ = 1.0f-3, λ = 1.0f-4, epochs = 50)
     model = MarkovNeuralOperator(ch = (1, 64, 64, 64, 64, 64, 1), modes = (24, 24),
                                  σ = gelu)
     data = get_dataloader()
-    optimiser = Flux.Optimiser(WeightDecay(λ), Flux.ADAM(η₀))
+    optimiser = Flux.Optimiser(WeightDecay(λ), Flux.Adam(η₀))
     loss_func = l₂loss
 
     learner = Learner(model, data, optimiser, loss_func,

diff --git a/src/DeepONet.jl → src/DeepONet/DeepONet.jl b/src/DeepONet.jl → src/DeepONet/DeepONet.jl
@@ -1,3 +1,7 @@
+export DeepONet
+
+include("subnets.jl")
+
 """
 `DeepONet(architecture_branch::Tuple, architecture_trunk::Tuple,
                         act_branch = identity, act_trunk = identity;

diff --git a/src/subnets.jl → src/DeepONet/subnets.jl b/src/subnets.jl → src/DeepONet/subnets.jl
diff --git a/src/model.jl → src/FNO/FNO.jl b/src/model.jl → src/FNO/FNO.jl
diff --git a/src/NOMAD.jl → src/NOMAD/NOMAD.jl b/src/NOMAD.jl → src/NOMAD/NOMAD.jl
@@ -1,3 +1,5 @@
+export NOMAD
+
 struct NOMAD{T1, T2}
     approximator_net::T1
     decoder_net::T2

diff --git a/src/NeuralOperators.jl b/src/NeuralOperators.jl
@@ -1,4 +1,5 @@
 module NeuralOperators
+
 using Flux
 using FFTW
 using Tullio
@@ -10,15 +11,15 @@ using ChainRulesCore
 using GeometricFlux
 using Statistics
 
-export DeepONet
-export NOMAD
-
+# kernels
 include("Transform/Transform.jl")
 include("operator_kernel.jl")
+include("graph_kernel.jl")
 include("loss.jl")
-include("model.jl")
-include("DeepONet.jl")
-include("subnets.jl")
-include("NOMAD.jl")
 
-end
+# models
+include("FNO/FNO.jl")
+include("DeepONet/DeepONet.jl")
+include("NOMAD/NOMAD.jl")
+
+end # module
diff --git a/src/graph_kernel.jl b/src/graph_kernel.jl
@@ -0,0 +1,50 @@
+export GraphKernel
+
+"""
+    GraphKernel(κ, ch, σ=identity)
+
+Graph kernel layer.
+
+## Arguments
+
+* `κ`: A neural network layer for approximation, e.g. a `Dense` layer or a MLP.
+* `ch`: Channel size for linear transform, e.g. `32`.
+* `σ`: Activation function.
+
+## Keyword Arguments
+
+* `init`: Initial function to initialize parameters.
+"""
+struct GraphKernel{A, B, F} <: MessagePassing
+    linear::A
+    κ::B
+    σ::F
+end
+
+function GraphKernel(κ, ch::Int, σ = identity; init = Flux.glorot_uniform)
+    W = init(ch, ch)
+    return GraphKernel(W, κ, σ)
+end
+
+Flux.@functor GraphKernel
+
+function GeometricFlux.message(l::GraphKernel, x_i::AbstractArray, x_j::AbstractArray, e_ij)
+    return l.κ(vcat(x_i, x_j))
+end
+
+function GeometricFlux.update(l::GraphKernel, m::AbstractArray, x::AbstractArray)
+    return l.σ.(GeometricFlux._matmul(l.linear, x) + m)
+end
+
+function (l::GraphKernel)(el::NamedTuple, X::AbstractArray)
+    GraphSignals.check_num_nodes(el.N, X)
+    _, V, _ = GeometricFlux.propagate(l, el, nothing, X, nothing, mean, nothing, nothing)
+    return V
+end
+
+function Base.show(io::IO, l::GraphKernel)
+    channel, _ = size(l.linear)
+    print(io, "GraphKernel(", l.κ, ", channel=", channel)
+    l.σ == identity || print(io, ", ", l.σ)
+    print(io, ")")
+end
diff --git a/src/operator_kernel.jl b/src/operator_kernel.jl
@@ -1,8 +1,7 @@
 export
        OperatorConv,
        SpectralConv,
-       OperatorKernel,
-       GraphKernel
+       OperatorKernel
 
 struct OperatorConv{P, T, S, TT}
     weight::T
@@ -66,8 +65,8 @@ function SpectralConv(ch::Pair{S, S},
                       init = c_glorot_uniform,
                       permuted = false,
                       T::DataType = ComplexF32) where {S <: Integer, N}
-    return OperatorConv(ch, modes, FourierTransform, init = init, permuted = permuted,
-                        T = T)
+    return OperatorConv(ch, modes, FourierTransform,
+                        init = init, permuted = permuted, T = T)
 end
 
 Flux.@functor OperatorConv{true}
@@ -181,55 +180,6 @@ function (m::OperatorKernel)(𝐱)
     return m.σ.(m.linear(𝐱) + m.conv(𝐱))
 end
 
-"""
-    GraphKernel(κ, ch, σ=identity)
-
-Graph kernel layer.
-
-## Arguments
-
-* `κ`: A neural network layer for approximation, e.g. a `Dense` layer or a MLP.
-* `ch`: Channel size for linear transform, e.g. `32`.
-* `σ`: Activation function.
-
-## Keyword Arguments
-
-* `init`: Initial function to initialize parameters.
-"""
-struct GraphKernel{A, B, F} <: MessagePassing
-    linear::A
-    κ::B
-    σ::F
-end
-
-function GraphKernel(κ, ch::Int, σ = identity; init = Flux.glorot_uniform)
-    W = init(ch, ch)
-    return GraphKernel(W, κ, σ)
-end
-
-Flux.@functor GraphKernel
-
-function GeometricFlux.message(l::GraphKernel, x_i::AbstractArray, x_j::AbstractArray, e_ij)
-    return l.κ(vcat(x_i, x_j))
-end
-
-function GeometricFlux.update(l::GraphKernel, m::AbstractArray, x::AbstractArray)
-    return l.σ.(GeometricFlux._matmul(l.linear, x) + m)
-end
-
-function (l::GraphKernel)(el::NamedTuple, X::AbstractArray)
-    GraphSignals.check_num_nodes(el.N, X)
-    _, V, _ = GeometricFlux.propagate(l, el, nothing, X, nothing, mean, nothing, nothing)
-    return V
-end
-
-function Base.show(io::IO, l::GraphKernel)
-    channel, _ = size(l.linear)
-    print(io, "GraphKernel(", l.κ, ", channel=", channel)
-    l.σ == identity || print(io, ", ", l.σ)
-    print(io, ")")
-end
-
 #########
 # utils #
 #########

diff --git a/test/deeponet.jl → test/DeepONet/DeepONet.jl b/test/deeponet.jl → test/DeepONet/DeepONet.jl
@@ -33,3 +33,9 @@
     @test_throws AssertionError DeepONet((32, 64, 70), (24, 48, 72), σ, tanh)
     @test_throws DimensionMismatch m(a, sensors)
 end
+
+if CUDA.functional()
+    include("cuda.jl")
+else
+    @warn "CUDA unavailable, not testing GPU support"
+end
diff --git a/test/cuda.jl → test/DeepONet/cuda.jl b/test/cuda.jl → test/DeepONet/cuda.jl
@@ -1,4 +1,4 @@
-@testset "CUDA" begin @testset "DeepONet" begin
+@testset "DeepONet CUDA" begin
     batch_size = 2
     a = [0.83541104, 0.83479851, 0.83404712, 0.83315711, 0.83212979, 0.83096755,
         0.82967374, 0.82825263, 0.82670928, 0.82504949, 0.82327962, 0.82140651,
@@ -13,4 +13,4 @@
 
     mgrad = Flux.Zygote.gradient(() -> sum(model(a, sensors)), Flux.params(model))
     @test length(mgrad.grads) == 9
-end end
+end