Skip to content

Commit

Permalink
Merge pull request #88 from FourierFlows/OptimizeMultilayerQG
Browse files Browse the repository at this point in the history
Optimize MultilayerQG on CPU
  • Loading branch information
navidcy authored Sep 14, 2020
2 parents 20db7c6 + ae72e4a commit 979e407
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 24 deletions.
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"

[compat]
Expand Down
49 changes: 25 additions & 24 deletions src/multilayerqg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ using
FFTW,
CUDA,
LinearAlgebra,
StaticArrays,
Reexport

@reexport using FourierFlows
Expand Down Expand Up @@ -176,19 +177,20 @@ function Params(nlayers, g, f0, β, ρ, H, U, eta, μ, ν, nν, grid; calcFq=not
ρ = reshape(T.(ρ), (1, 1, nlayers))
H = reshape(T.(H), (1, 1, nlayers))

g′ = T(g) * (ρ[2:nlayers] - ρ[1:nlayers-1]) ./ ρ[2:nlayers] # reduced gravity at each interface;
g′ = T(g) * (ρ[2:nlayers] - ρ[1:nlayers-1]) ./ ρ[2:nlayers] # reduced gravity at each interface

Fm = @. T( f0^2 / (g′ * H[2:nlayers]) )
Fp = @. T( f0^2 / (g′ * H[1:nlayers-1]) )

S = Array{T}(undef, (nkr, nl, nlayers, nlayers))
calcS!(S, Fp, Fm, grid)
typeofSkl = SArray{Tuple{nlayers, nlayers}, T, 2, nlayers^2} # StaticArrays of type T and dims = (nlayers x nlayers)

S = Array{typeofSkl, 2}(undef, (nkr, nl))
calcS!(S, Fp, Fm, nlayers, grid)

invS = Array{T}(undef, (nkr, nl, nlayers, nlayers))
calcinvS!(invS, Fp, Fm, grid)
invS = Array{typeofSkl, 2}(undef, (nkr, nl))
calcinvS!(invS, Fp, Fm, nlayers, grid)

S, invS = A(S), A(invS) # convert S and invS to appropriate ArrayType
Fp, Fm = A(Fp), A(Fm) # convert S and invS to appropriate ArrayType
S, invS, Fp, Fm = A(S), A(invS), A(Fp), A(Fm) # convert to appropriate ArrayType

CUDA.@allowscalar @views Qy[:, :, 1] = @. Qy[:, :, 1] - Fp[1] * ( U[:, :, 2] - U[:, :, 1] )
for j = 2:nlayers-1
Expand Down Expand Up @@ -296,13 +298,13 @@ invtransform!(var, varh, params::AbstractParams) = ldiv!(var, params.rfftplan, v

function streamfunctionfrompv!(ψh, qh, params, grid)
for j=1:grid.nl, i=1:grid.nkr
CUDA.@allowscalar @views ψh[i, j, :] .= params.invS[i, j, :, :] * qh[i, j, :]
CUDA.@allowscalar @views ψh[i, j, :] .= params.invS[i, j] * qh[i, j, :]
end
end

function pvfromstreamfunction!(qh, ψh, params, grid)
for j=1:grid.nl, i=1:grid.nkr
CUDA.@allowscalar @views qh[i, j, :] .= params.S[i, j, :, :] * ψh[i, j, :]
CUDA.@allowscalar @views qh[i, j, :] .= params.S[i, j] * ψh[i, j, :]
end
end

Expand All @@ -315,37 +317,36 @@ function pvfromstreamfunction!(qh, ψh, params::SingleLayerParams, grid)
end

"""
calcS!(S, Fp, Fm, grid)
calcS!(S, Fp, Fm, nlayers, grid)
Constructs the stretching matrix S that connects q and ψ: q_{k,l} = S * ψ_{k,l}.
Constructs the array S, which consists of nlayer x nlayer static arrays S_kl that relate
the q's and ψ's at every wavenumber: q̂_{k, l} = S_kl * ψ̂_{k, l}.
"""
function calcS!(S, Fp, Fm, grid)
function calcS!(S, Fp, Fm, nlayers, grid)
F = Matrix(Tridiagonal(Fm, -([Fp; 0] + [0; Fm]), Fp))
for n=1:grid.nl, m=1:grid.nkr
CUDA.@allowscalar= grid.Krsq[m, n]
Skl = -*I + F
CUDA.@allowscalar @views S[m, n, :, :] .= Skl
Skl = SMatrix{nlayers, nlayers}( - * I + F )
S[m, n] = Skl
end
return nothing
end

"""
calcinvS!(S, Fp, Fm, grid)
calcinvS!(S, Fp, Fm, nlayers, grid)
Constructs the inverse of the stretching matrix S that connects q and ψ:
ψ_{k,l} = invS * q_{k,l}.
Constructs the array invS, which consists of nlayer x nlayer static arrays (S_kl)⁻¹ that
relate the q's and ψ's at every wavenumber: ψ̂_{k, l} = (S_kl)⁻¹ * q̂_{k, l}.
"""
function calcinvS!(invS, Fp, Fm, grid)
function calcinvS!(invS, Fp, Fm, nlayers, grid)
T = eltype(grid)
F = Matrix(Tridiagonal(Fm, -([Fp; 0] + [0; Fm]), Fp))
for n=1:grid.nl, m=1:grid.nkr
CUDA.@allowscalar= grid.Krsq[m, n]
if== 0
= 1
end
CUDA.@allowscalar= grid.Krsq[m, n] == 0 ? 1 : grid.Krsq[m, n]
Skl = -*I + F
CUDA.@allowscalar @views invS[m, n, :, :] .= I / Skl
invS[m, n] = SMatrix{nlayers, nlayers}( I / Skl )
end
CUDA.@allowscalar @views invS[1, 1, :, :] .= 0
invS[1, 1] = SMatrix{nlayers, nlayers}( zeros(T, (nlayers, nlayers)) )
return nothing
end

Expand Down

0 comments on commit 979e407

Please sign in to comment.