Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add shock capturing with nonconservative_terms::True #36

Merged
merged 6 commits into from
Sep 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 3 additions & 14 deletions examples/shallowwater_dirichlet_1d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,9 @@ using OrdinaryDiffEq
###############################################################################
# Semidiscretization of the shallow water equations

equations = ShallowWaterEquations1D(gravity_constant = 1.0, H0 = 3.0)
equations = ShallowWaterEquations1D(gravity_constant = 9.81, H0 = 3.0)

# An initial condition with constant total water height and zero velocities to test well-balancedness.
function initial_condition_well_balancedness(x, t, equations::ShallowWaterEquations1D)
# Set the background values
H = equations.H0
v = 0.0

b = (1.5 / exp(0.5 * ((x[1] - 1.0)^2)) + 0.75 / exp(0.5 * ((x[1] + 1.0)^2)))

return prim2cons(SVector(H, v, b), equations)
end

initial_condition = initial_condition_well_balancedness
initial_condition = initial_condition_convergence_test

boundary_condition = BoundaryConditionDirichlet(initial_condition)

Expand Down Expand Up @@ -49,7 +38,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
###############################################################################
# ODE solvers, callbacks etc.

tspan = (0.0, 100.0)
tspan = (0.0, 1.0)
ode = semidiscretize_gpu(semi, tspan) # from TrixiGPU.jl

summary_callback = SummaryCallback()
Expand Down
2 changes: 1 addition & 1 deletion src/solvers/common.jl
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ function pure_blended_element_count_kernel!(element_ids_dg, element_ids_dgfv, al
if (i <= length(alpha))
dg_only = isapprox(alpha[i], 0, atol = atol)

if dg_only
if dg_only # bad
element_ids_dg[i] = i
else
element_ids_dgfv[i] = i
Expand Down
198 changes: 190 additions & 8 deletions src/solvers/dg_1d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,9 @@ function symmetric_noncons_flux_kernel!(symmetric_flux_arr, noncons_flux_arr, u,

@inbounds begin
for ii in axes(u, 1)
symmetric_flux_arr[ii, j1, j2, k] = symmetric_flux_node[ii]
noncons_flux_arr[ii, j1, j2, k] = noncons_flux_node[ii] * derivative_split[j1, j2]
symmetric_flux_arr[ii, j1, j2, k] = symmetric_flux_node[ii] *
derivative_split[j1, j2]
noncons_flux_arr[ii, j1, j2, k] = noncons_flux_node[ii]
end
end
end
Expand Down Expand Up @@ -123,8 +124,8 @@ function volume_integral_kernel!(du, derivative_split, symmetric_flux_arr, nonco
integral_contribution = 0.0 # change back to `Float32`

for ii in axes(du, 2)
du[i, j, k] += derivative_split[j, ii] * symmetric_flux_arr[i, j, ii, k]
integral_contribution += noncons_flux_arr[i, j, ii, k]
du[i, j, k] += symmetric_flux_arr[i, j, ii, k]
integral_contribution += derivative_split[j, ii] * noncons_flux_arr[i, j, ii, k]
end

du[i, j, k] += 0.5 * integral_contribution # change back to `Float32`
Expand All @@ -149,6 +150,9 @@ function volume_flux_dgfv_kernel!(volume_flux_arr, fstar1_L, fstar1_R, u,

element_dgfv = element_ids_dgfv[k] # check if `element_dgfv` is zero

# The sets of `get_node_vars` operations may be combined
# into a single set of operation for better performance (to be explored)

u_node = get_node_vars(u, equations, j1, k)
u_node1 = get_node_vars(u, equations, j2, k)

Expand Down Expand Up @@ -177,9 +181,63 @@ function volume_flux_dgfv_kernel!(volume_flux_arr, fstar1_L, fstar1_R, u,
return nothing
end

# Kernel for calculating pure DG and DG-FV volume fluxes
function volume_flux_dgfv_kernel!(volume_flux_arr, noncons_flux_arr, fstar1_L, fstar1_R, u,
element_ids_dgfv, derivative_split,
equations::AbstractEquations{1},
volume_flux_dg::Any, nonconservative_flux_dg::Any,
volume_flux_fv::Any, nonconservative_flux_fv::Any)
j = (blockIdx().x - 1) * blockDim().x + threadIdx().x
k = (blockIdx().y - 1) * blockDim().y + threadIdx().y

if (j <= size(u, 2)^2 && k <= size(u, 3))
# length(element_ids_dgfv) == size(u, 3)

j1 = div(j - 1, size(u, 2)) + 1
j2 = rem(j - 1, size(u, 2)) + 1

element_dgfv = element_ids_dgfv[k] # check if `element_dgfv` is zero

# The sets of `get_node_vars` operations may be combined
# into a single set of operation for better performance (to be explored)

u_node = get_node_vars(u, equations, j1, k)
u_node1 = get_node_vars(u, equations, j2, k)

volume_flux_node = volume_flux_dg(u_node, u_node1, 1, equations)
noncons_flux_node = nonconservative_flux_dg(u_node, u_node1, 1, equations)

@inbounds begin
for ii in axes(u, 1)
volume_flux_arr[ii, j1, j2, k] = derivative_split[j1, j2] * volume_flux_node[ii]
noncons_flux_arr[ii, j1, j2, k] = noncons_flux_node[ii]
end
end

if j1 !== 1 && j2 === 1 && element_dgfv !== 0 # bad
u_ll = get_node_vars(u, equations, j1 - 1, element_dgfv)
u_rr = get_node_vars(u, equations, j1, element_dgfv)

f1_node = volume_flux_fv(u_ll, u_rr, 1, equations)

f1_L_node = nonconservative_flux_fv(u_ll, u_rr, 1, equations)
f1_R_node = nonconservative_flux_fv(u_rr, u_ll, 1, equations)

@inbounds begin
for ii in axes(u, 1)
fstar1_L[ii, j1, element_dgfv] = f1_node[ii] + 0.5 * f1_L_node[ii]
fstar1_R[ii, j1, element_dgfv] = f1_node[ii] + 0.5 * f1_R_node[ii]
end
end
end
end

return nothing
end

# Kernel for calculating DG volume integral contribution
function volume_integral_dg_kernel!(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split,
volume_flux_arr)
volume_flux_arr, equations::AbstractEquations{1})
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
j = (blockIdx().y - 1) * blockDim().y + threadIdx().y
k = (blockIdx().z - 1) * blockDim().z + threadIdx().z
Expand Down Expand Up @@ -212,6 +270,53 @@ function volume_integral_dg_kernel!(du, element_ids_dg, element_ids_dgfv, alpha,
return nothing
end

# Kernel for calculating DG volume integral contribution
function volume_integral_dg_kernel!(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split,
volume_flux_arr, noncons_flux_arr,
equations::AbstractEquations{1})
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
j = (blockIdx().y - 1) * blockDim().y + threadIdx().y
k = (blockIdx().z - 1) * blockDim().z + threadIdx().z

if (i <= size(du, 1) && j <= size(du, 2) && k <= size(du, 3))
# length(element_ids_dg) == size(du, 3)
# length(element_ids_dgfv) == size(du, 3)

element_dg = element_ids_dg[k] # check if `element_dg` is zero
element_dgfv = element_ids_dgfv[k] # check if `element_dgfv` is zero
alpha_element = alpha[k]

@inbounds begin
if element_dg !== 0 # bad
integral_contribution = 0.0

for ii in axes(du, 2)
du[i, j, element_dg] += volume_flux_arr[i, j, ii, element_dg]
integral_contribution += derivative_split[j, ii] *
noncons_flux_arr[i, j, ii, element_dg]
end

du[i, j, element_dg] += 0.5 * integral_contribution
end

if element_dgfv !== 0 # bad
integral_contribution = 0.0

for ii in axes(du, 2)
du[i, j, element_dgfv] += (1 - alpha_element) *
volume_flux_arr[i, j, ii, element_dgfv]
integral_contribution += derivative_split[j, ii] *
noncons_flux_arr[i, j, ii, element_dgfv]
end

du[i, j, element_dgfv] += 0.5 * (1 - alpha_element) * integral_contribution
end
end
end

return nothing
end

# Kernel for calculating FV volume integral contribution
function volume_integral_fv_kernel!(du, fstar1_L, fstar1_R, inverse_weights, element_ids_dgfv,
alpha)
Expand Down Expand Up @@ -473,7 +578,7 @@ end

# Kernel for calculating source terms
function source_terms_kernel!(du, u, node_coordinates, t, equations::AbstractEquations{1},
source_terms::Function)
source_terms::Any)
j = (blockIdx().x - 1) * blockDim().x + threadIdx().x
k = (blockIdx().y - 1) * blockDim().y + threadIdx().y

Expand Down Expand Up @@ -627,9 +732,86 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms::
element_ids_dgfv,
alpha,
derivative_split,
volume_flux_arr)
volume_flux_arr,
equations)
volume_integral_dg_kernel(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split,
volume_flux_arr, equations;
configurator_3d(volume_integral_dg_kernel, du)...)

size_arr = CuArray{Float64}(undef, size(u, 2), size(u, 3))

volume_integral_fv_kernel = @cuda launch=false volume_integral_fv_kernel!(du, fstar1_L,
fstar1_R,
inverse_weights,
element_ids_dgfv,
alpha)
volume_integral_fv_kernel(du, fstar1_L, fstar1_R, inverse_weights, element_ids_dgfv, alpha;
configurator_2d(volume_integral_fv_kernel, size_arr)...)

return nothing
end

# Pack kernels for calculating volume integrals
function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms::True, equations,
volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM, cache)
volume_flux_dg, nonconservative_flux_dg = dg.volume_integral.volume_flux_dg
volume_flux_fv, nonconservative_flux_fv = dg.volume_integral.volume_flux_fv
indicator = dg.volume_integral.indicator

# TODO: Get copies of `u` and `du` on both device and host
alpha = indicator(Array(u), mesh, equations, dg, cache)
alpha = CuArray{Float64}(alpha)

# For `Float64`, this gives 1.8189894035458565e-12
# For `Float32`, this gives 1.1920929f-5
atol = 1.8189894035458565e-12 # Ref: `pure_and_blended_element_ids!` in Trixi.jl

element_ids_dg = zero(CuArray{Int64}(undef, length(alpha)))
element_ids_dgfv = zero(CuArray{Int64}(undef, length(alpha)))

pure_blended_element_count_kernel = @cuda launch=false pure_blended_element_count_kernel!(element_ids_dg,
element_ids_dgfv,
alpha,
atol)
pure_blended_element_count_kernel(element_ids_dg, element_ids_dgfv, alpha, atol;
configurator_1d(pure_blended_element_count_kernel, alpha)...)

derivative_split = dg.basis.derivative_split
set_diagonal_to_zero!(derivative_split) # temporarily set here, maybe move outside `rhs!`

derivative_split = CuArray{Float64}(derivative_split)
volume_flux_arr = CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 3))
noncons_flux_arr = CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 3))

inverse_weights = CuArray{Float64}(dg.basis.inverse_weights)
fstar1_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 3)))
fstar1_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 3)))

size_arr = CuArray{Float64}(undef, size(u, 2)^2, size(u, 3))

volume_flux_dgfv_kernel = @cuda launch=false volume_flux_dgfv_kernel!(volume_flux_arr,
noncons_flux_arr,
fstar1_L, fstar1_R, u,
element_ids_dgfv,
derivative_split,
equations, volume_flux_dg,
nonconservative_flux_dg,
volume_flux_fv,
nonconservative_flux_fv)
volume_flux_dgfv_kernel(volume_flux_arr, noncons_flux_arr, fstar1_L, fstar1_R, u,
element_ids_dgfv, derivative_split, equations, volume_flux_dg,
nonconservative_flux_dg, volume_flux_fv, nonconservative_flux_fv;
configurator_2d(volume_flux_dgfv_kernel, size_arr)...)

volume_integral_dg_kernel = @cuda launch=false volume_integral_dg_kernel!(du, element_ids_dg,
element_ids_dgfv,
alpha,
derivative_split,
volume_flux_arr,
noncons_flux_arr,
equations)
volume_integral_dg_kernel(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split,
volume_flux_arr;
volume_flux_arr, noncons_flux_arr, equations;
configurator_3d(volume_integral_dg_kernel, du)...)

size_arr = CuArray{Float64}(undef, size(u, 2), size(u, 3))
Expand Down
17 changes: 14 additions & 3 deletions src/solvers/dg_2d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,9 @@ function volume_flux_dgfv_kernel!(volume_flux_arr1, volume_flux_arr2, fstar1_L,

element_dgfv = element_ids_dgfv[k] # check if `element_dgfv` is zero

# The sets of `get_node_vars` operations may be combined
# into a single set of operation for better performance (to be explored)

u_node = get_node_vars(u, equations, j1, j2, k)
u_node1 = get_node_vars(u, equations, j3, j2, k)
u_node2 = get_node_vars(u, equations, j1, j3, k)
Expand Down Expand Up @@ -237,7 +240,8 @@ end

# Kernel for calculating DG volume integral contribution
function volume_integral_dg_kernel!(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split,
volume_flux_arr1, volume_flux_arr2)
volume_flux_arr1, volume_flux_arr2,
equations::AbstractEquations{2})
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
j = (blockIdx().y - 1) * blockDim().y + threadIdx().y
k = (blockIdx().z - 1) * blockDim().z + threadIdx().z
Expand Down Expand Up @@ -976,9 +980,10 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms::
alpha,
derivative_split,
volume_flux_arr1,
volume_flux_arr2)
volume_flux_arr2,
equations)
volume_integral_dg_kernel(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split,
volume_flux_arr1, volume_flux_arr2;
volume_flux_arr1, volume_flux_arr2, equations;
configurator_3d(volume_integral_dg_kernel, size_arr)...)

size_arr = CuArray{Float64}(undef, size(u, 2)^2, size(u, 4))
Expand All @@ -996,6 +1001,12 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms::
return nothing
end

# Pack kernels for calculating volume integrals
function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms::True, equations,
volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM, cache)
return nothing
end

# Pack kernels for prolonging solution to interfaces
function cuda_prolong2interfaces!(u, mesh::TreeMesh{2}, equations, cache)
neighbor_ids = CuArray{Int64}(cache.interfaces.neighbor_ids)
Expand Down
Loading
Loading