Skip to content

Commit

Permalink
Fix interface flux with true noncons
Browse files Browse the repository at this point in the history
  • Loading branch information
huiyuxie committed Sep 15, 2024
1 parent 0317588 commit db199a5
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 106 deletions.
44 changes: 22 additions & 22 deletions src/solvers/dg_1d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -382,23 +382,23 @@ function surface_flux_kernel!(surface_flux_arr, interfaces_u, equations::Abstrac
end

# Kernel for calculating surface and both nonconservative fluxes
function surface_noncons_flux_kernel!(surface_flux_arr, interfaces_u, noncons_left_arr,
noncons_right_arr, equations::AbstractEquations{1},
function surface_noncons_flux_kernel!(surface_flux_arr, noncons_left_arr, noncons_right_arr,
interfaces_u, equations::AbstractEquations{1},
surface_flux::Any, nonconservative_flux::Any)
k = (blockIdx().x - 1) * blockDim().x + threadIdx().x
j = (blockIdx().x - 1) * blockDim().x + threadIdx().x

if (k <= size(surface_flux_arr, 3))
u_ll, u_rr = get_surface_node_vars(interfaces_u, equations, k)
if (j <= size(surface_flux_arr, 2))
u_ll, u_rr = get_surface_node_vars(interfaces_u, equations, j)

surface_flux_node = surface_flux(u_ll, u_rr, 1, equations)
noncons_left_node = nonconservative_flux(u_ll, u_rr, 1, equations)
noncons_right_node = nonconservative_flux(u_rr, u_ll, 1, equations)

@inbounds begin
for jj in axes(surface_flux_arr, 2)
surface_flux_arr[1, jj, k] = surface_flux_node[jj]
noncons_left_arr[1, jj, k] = noncons_left_node[jj]
noncons_right_arr[1, jj, k] = noncons_right_node[jj]
for ii in axes(surface_flux_arr, 1)
surface_flux_arr[ii, j] = surface_flux_node[ii]
noncons_left_arr[ii, j] = noncons_left_node[ii]
noncons_right_arr[ii, j] = noncons_right_node[ii]
end
end
end
Expand Down Expand Up @@ -428,17 +428,17 @@ end
function interface_flux_kernel!(surface_flux_values, surface_flux_arr, noncons_left_arr,
noncons_right_arr, neighbor_ids)
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
k = (blockIdx().y - 1) * blockDim().y + threadIdx().y
j = (blockIdx().y - 1) * blockDim().y + threadIdx().y

if (i <= size(surface_flux_values, 1) && k <= size(surface_flux_arr, 3))
left_id = neighbor_ids[1, k]
right_id = neighbor_ids[2, k]
if (i <= size(surface_flux_values, 1) && j <= size(surface_flux_arr, 2))
left_id = neighbor_ids[1, j]
right_id = neighbor_ids[2, j]

@inbounds begin
surface_flux_values[i, 2, left_id] = surface_flux_arr[1, i, k] +
0.5 * noncons_left_arr[1, i, k] # change back to `Float32`
surface_flux_values[i, 1, right_id] = surface_flux_arr[1, i, k] +
0.5 * noncons_right_arr[1, i, k] # change back to `Float32`
surface_flux_values[i, 2, left_id] = surface_flux_arr[i, j] +
0.5 * noncons_left_arr[i, j] # change back to `Float32`
surface_flux_values[i, 1, right_id] = surface_flux_arr[i, j] +
0.5 * noncons_right_arr[i, j] # change back to `Float32`
end
end

Expand Down Expand Up @@ -883,21 +883,21 @@ function cuda_interface_flux!(mesh::TreeMesh{1}, nonconservative_terms::True, eq

neighbor_ids = CuArray{Int64}(cache.interfaces.neighbor_ids)
interfaces_u = CuArray{Float64}(cache.interfaces.u)
surface_flux_arr = CuArray{Float64}(undef, 1, size(interfaces_u)[2:end]...)
noncons_left_arr = CuArray{Float64}(undef, 1, size(interfaces_u)[2:end]...)
noncons_right_arr = CuArray{Float64}(undef, 1, size(interfaces_u)[2:end]...)
surface_flux_arr = CuArray{Float64}(undef, size(interfaces_u)[2:end]...)
noncons_left_arr = CuArray{Float64}(undef, size(interfaces_u)[2:end]...)
noncons_right_arr = CuArray{Float64}(undef, size(interfaces_u)[2:end]...)
surface_flux_values = CuArray{Float64}(cache.elements.surface_flux_values)

size_arr = CuArray{Float64}(undef, size(interfaces_u, 3))

surface_noncons_flux_kernel = @cuda launch=false surface_noncons_flux_kernel!(surface_flux_arr,
interfaces_u,
noncons_left_arr,
noncons_right_arr,
interfaces_u,
equations,
surface_flux,
nonconservative_flux)
surface_noncons_flux_kernel(surface_flux_arr, interfaces_u, noncons_left_arr, noncons_right_arr,
surface_noncons_flux_kernel(surface_flux_arr, noncons_left_arr, noncons_right_arr, interfaces_u,
equations, surface_flux, nonconservative_flux;
configurator_1d(surface_noncons_flux_kernel, size_arr)...)

Expand Down
44 changes: 21 additions & 23 deletions src/solvers/dg_2d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -367,26 +367,25 @@ function surface_flux_kernel!(surface_flux_arr, interfaces_u, orientations,
end

# Kernel for calculating surface and both nonconservative fluxes
function surface_noncons_flux_kernel!(surface_flux_arr, interfaces_u, noncons_left_arr,
noncons_right_arr, orientations,
equations::AbstractEquations{2}, surface_flux::Any,
nonconservative_flux::Any)
j2 = (blockIdx().x - 1) * blockDim().x + threadIdx().x
function surface_noncons_flux_kernel!(surface_flux_arr, noncons_left_arr, noncons_right_arr,
interfaces_u, orientations, equations::AbstractEquations{2},
surface_flux::Any, nonconservative_flux::Any)
j = (blockIdx().x - 1) * blockDim().x + threadIdx().x
k = (blockIdx().y - 1) * blockDim().y + threadIdx().y

if (j2 <= size(surface_flux_arr, 3) && k <= size(surface_flux_arr, 4))
u_ll, u_rr = get_surface_node_vars(interfaces_u, equations, j2, k)
if (j <= size(surface_flux_arr, 2) && k <= size(surface_flux_arr, 3))
u_ll, u_rr = get_surface_node_vars(interfaces_u, equations, j, k)
orientation = orientations[k]

surface_flux_node = surface_flux(u_ll, u_rr, orientation, equations)
noncons_left_node = nonconservative_flux(u_ll, u_rr, orientation, equations)
noncons_right_node = nonconservative_flux(u_rr, u_ll, orientation, equations)

@inbounds begin
for j1j1 in axes(surface_flux_arr, 2)
surface_flux_arr[1, j1j1, j2, k] = surface_flux_node[j1j1]
noncons_left_arr[1, j1j1, j2, k] = noncons_left_node[j1j1]
noncons_right_arr[1, j1j1, j2, k] = noncons_right_node[j1j1]
for ii in axes(surface_flux_arr, 1)
surface_flux_arr[ii, j, k] = surface_flux_node[ii]
noncons_left_arr[ii, j, k] = noncons_left_node[ii]
noncons_right_arr[ii, j, k] = noncons_right_node[ii]
end
end
end
Expand Down Expand Up @@ -426,22 +425,21 @@ function interface_flux_kernel!(surface_flux_values, surface_flux_arr, noncons_l
j = (blockIdx().y - 1) * blockDim().y + threadIdx().y
k = (blockIdx().z - 1) * blockDim().z + threadIdx().z

if (i <= size(surface_flux_values, 1) &&
j <= size(surface_flux_arr, 3) &&
k <= size(surface_flux_arr, 4))
if (i <= size(surface_flux_values, 1) && j <= size(surface_flux_arr, 2) &&
k <= size(surface_flux_arr, 3))
left_id = neighbor_ids[1, k]
right_id = neighbor_ids[2, k]

left_direction = 2 * orientations[k]
right_direction = 2 * orientations[k] - 1

@inbounds begin
surface_flux_values[i, j, left_direction, left_id] = surface_flux_arr[1, i, j, k] +
surface_flux_values[i, j, left_direction, left_id] = surface_flux_arr[i, j, k] +
0.5 * # change back to `Float32`
noncons_left_arr[1, i, j, k]
surface_flux_values[i, j, right_direction, right_id] = surface_flux_arr[1, i, j, k] +
noncons_left_arr[i, j, k]
surface_flux_values[i, j, right_direction, right_id] = surface_flux_arr[i, j, k] +
0.5 * # change back to `Float32`
noncons_right_arr[1, i, j, k]
noncons_right_arr[i, j, k]
end
end

Expand Down Expand Up @@ -1075,22 +1073,22 @@ function cuda_interface_flux!(mesh::TreeMesh{2}, nonconservative_terms::True, eq
neighbor_ids = CuArray{Int64}(cache.interfaces.neighbor_ids)
orientations = CuArray{Int64}(cache.interfaces.orientations)
interfaces_u = CuArray{Float64}(cache.interfaces.u)
surface_flux_arr = CuArray{Float64}(undef, 1, size(interfaces_u)[2:end]...)
noncons_left_arr = CuArray{Float64}(undef, 1, size(interfaces_u)[2:end]...)
noncons_right_arr = CuArray{Float64}(undef, 1, size(interfaces_u)[2:end]...)
surface_flux_arr = CuArray{Float64}(undef, size(interfaces_u)[2:end]...)
noncons_left_arr = CuArray{Float64}(undef, size(interfaces_u)[2:end]...)
noncons_right_arr = CuArray{Float64}(undef, size(interfaces_u)[2:end]...)
surface_flux_values = CuArray{Float64}(cache.elements.surface_flux_values)

size_arr = CuArray{Float64}(undef, size(interfaces_u, 3), size(interfaces_u, 4))

surface_noncons_flux_kernel = @cuda launch=false surface_noncons_flux_kernel!(surface_flux_arr,
interfaces_u,
noncons_left_arr,
noncons_right_arr,
interfaces_u,
orientations,
equations,
surface_flux,
nonconservative_flux)
surface_noncons_flux_kernel(surface_flux_arr, interfaces_u, noncons_left_arr, noncons_right_arr,
surface_noncons_flux_kernel(surface_flux_arr, noncons_left_arr, noncons_right_arr, interfaces_u,
orientations, equations, surface_flux, nonconservative_flux;
configurator_2d(surface_noncons_flux_kernel, size_arr)...)

Expand Down
8 changes: 8 additions & 0 deletions src/solvers/dg_3d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1290,6 +1290,14 @@ function cuda_interface_flux!(mesh::TreeMesh{3}, nonconservative_terms::True, eq
cache)
surface_flux, nonconservative_flux = dg.surface_integral.surface_flux

neighbor_ids = CuArray{Int64}(cache.interfaces.neighbor_ids)
orientations = CuArray{Int64}(cache.interfaces.orientations)
interfaces_u = CuArray{Float64}(cache.interfaces.u)
surface_flux_arr = CuArray{Float64}(undef, size(interfaces_u)[2:end]...)
noncons_left_arr = CuArray{Float64}(undef, size(interfaces_u)[2:end]...)
noncons_right_arr = CuArray{Float64}(undef, size(interfaces_u)[2:end]...)
surface_flux_values = CuArray{Float64}(cache.elements.surface_flux_values)

return nothing
end

Expand Down
119 changes: 58 additions & 61 deletions test/test_script.jl
Original file line number Diff line number Diff line change
@@ -1,26 +1,23 @@
include("test_trixigpu.jl")

using CUDA
equations = ShallowWaterEquations2D(gravity_constant = 9.81)

equations = IdealGlmMhdEquations3D(5 / 3)
initial_condition = initial_condition_convergence_test # MMS EOC test

initial_condition = initial_condition_convergence_test

volume_flux = (flux_hindenlang_gassner, flux_nonconservative_powell)
volume_flux = (flux_wintermeyer_etal, flux_nonconservative_wintermeyer_etal)
solver = DGSEM(polydeg = 3,
surface_flux = (flux_lax_friedrichs, flux_nonconservative_powell),
surface_flux = (flux_lax_friedrichs, flux_nonconservative_fjordholm_etal),
volume_integral = VolumeIntegralFluxDifferencing(volume_flux))

coordinates_min = (-1.0, -1.0, -1.0)
coordinates_max = (1.0, 1.0, 1.0)
coordinates_min = (0.0, 0.0)
coordinates_max = (sqrt(2.0), sqrt(2.0))
mesh = TreeMesh(coordinates_min, coordinates_max,
initial_refinement_level = 2,
n_cells_max = 10_000)

semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
initial_refinement_level = 3,
n_cells_max = 10_000,
periodicity = true)

###############################################################################
# ODE solvers, callbacks etc.
semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
source_terms = source_terms_convergence_test)

tspan = (0.0, 1.0)

Expand Down Expand Up @@ -68,50 +65,50 @@ TrixiGPU.cuda_interface_flux!(mesh_gpu, Trixi.have_nonconservative_terms(equatio
Trixi.calc_interface_flux!(cache.elements.surface_flux_values, mesh,
Trixi.have_nonconservative_terms(equations), equations,
solver.surface_integral, solver, cache)
# @test_approx cache_gpu.elements.surface_flux_values ≈ cache.elements.surface_flux_values

# # Test `cuda_prolong2boundaries!`
# TrixiGPU.cuda_prolong2boundaries!(u_gpu, mesh_gpu, boundary_conditions_gpu, equations_gpu,
# cache_gpu)
# Trixi.prolong2boundaries!(cache, u, mesh, equations, solver.surface_integral, solver)
# @test_approx cache_gpu.boundaries.u ≈ cache.boundaries.u

# # Test `cuda_boundary_flux!`
# TrixiGPU.cuda_boundary_flux!(t_gpu, mesh_gpu, boundary_conditions_gpu,
# Trixi.have_nonconservative_terms(equations_gpu), equations_gpu,
# solver_gpu, cache_gpu)
# Trixi.calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations,
# solver.surface_integral, solver)
# @test_approx cache_gpu.elements.surface_flux_values ≈ cache.elements.surface_flux_values

# # Test `cuda_prolong2mortars!`
# TrixiGPU.cuda_prolong2mortars!(u_gpu, mesh_gpu, TrixiGPU.check_cache_mortars(cache_gpu),
# solver_gpu, cache_gpu)
# Trixi.prolong2mortars!(cache, u, mesh, equations,
# solver.mortar, solver.surface_integral, solver)
# @test_approx cache_gpu.mortars.u_upper ≈ cache.mortars.u_upper
# @test_approx cache_gpu.mortars.u_lower ≈ cache.mortars.u_lower

# # Test `cuda_mortar_flux!`
# TrixiGPU.cuda_mortar_flux!(mesh_gpu, TrixiGPU.check_cache_mortars(cache_gpu),
# Trixi.have_nonconservative_terms(equations_gpu), equations_gpu,
# solver_gpu, cache_gpu)
# Trixi.calc_mortar_flux!(cache.elements.surface_flux_values, mesh,
# Trixi.have_nonconservative_terms(equations), equations,
# solver.mortar, solver.surface_integral, solver, cache)
# @test_approx cache_gpu.elements.surface_flux_values ≈ cache.elements.surface_flux_values

# # Test `cuda_surface_integral!`
# TrixiGPU.cuda_surface_integral!(du_gpu, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
# Trixi.calc_surface_integral!(du, u, mesh, equations, solver.surface_integral, solver, cache)
# @test_approx du_gpu ≈ du

# # Test `cuda_jacobian!`
# TrixiGPU.cuda_jacobian!(du_gpu, mesh_gpu, equations_gpu, cache_gpu)
# Trixi.apply_jacobian!(du, mesh, equations, solver, cache)
# @test_approx du_gpu ≈ du

# # Test `cuda_sources!`
# TrixiGPU.cuda_sources!(du_gpu, u_gpu, t_gpu, source_terms_gpu, equations_gpu, cache_gpu)
# Trixi.calc_sources!(du, u, t, source_terms, equations, solver, cache)
# @test_approx du_gpu ≈ du
@test_approx cache_gpu.elements.surface_flux_values cache.elements.surface_flux_values

# Test `cuda_prolong2boundaries!`
TrixiGPU.cuda_prolong2boundaries!(u_gpu, mesh_gpu, boundary_conditions_gpu, equations_gpu,
cache_gpu)
Trixi.prolong2boundaries!(cache, u, mesh, equations, solver.surface_integral, solver)
@test_approx cache_gpu.boundaries.u cache.boundaries.u

# Test `cuda_boundary_flux!`
TrixiGPU.cuda_boundary_flux!(t_gpu, mesh_gpu, boundary_conditions_gpu,
Trixi.have_nonconservative_terms(equations_gpu), equations_gpu,
solver_gpu, cache_gpu)
Trixi.calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations,
solver.surface_integral, solver)
@test_approx cache_gpu.elements.surface_flux_values cache.elements.surface_flux_values

# Test `cuda_prolong2mortars!`
TrixiGPU.cuda_prolong2mortars!(u_gpu, mesh_gpu, TrixiGPU.check_cache_mortars(cache_gpu),
solver_gpu, cache_gpu)
Trixi.prolong2mortars!(cache, u, mesh, equations,
solver.mortar, solver.surface_integral, solver)
@test_approx cache_gpu.mortars.u_upper cache.mortars.u_upper
@test_approx cache_gpu.mortars.u_lower cache.mortars.u_lower

# Test `cuda_mortar_flux!`
TrixiGPU.cuda_mortar_flux!(mesh_gpu, TrixiGPU.check_cache_mortars(cache_gpu),
Trixi.have_nonconservative_terms(equations_gpu), equations_gpu,
solver_gpu, cache_gpu)
Trixi.calc_mortar_flux!(cache.elements.surface_flux_values, mesh,
Trixi.have_nonconservative_terms(equations), equations,
solver.mortar, solver.surface_integral, solver, cache)
@test_approx cache_gpu.elements.surface_flux_values cache.elements.surface_flux_values

# Test `cuda_surface_integral!`
TrixiGPU.cuda_surface_integral!(du_gpu, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
Trixi.calc_surface_integral!(du, u, mesh, equations, solver.surface_integral, solver, cache)
@test_approx du_gpu du

# Test `cuda_jacobian!`
TrixiGPU.cuda_jacobian!(du_gpu, mesh_gpu, equations_gpu, cache_gpu)
Trixi.apply_jacobian!(du, mesh, equations, solver, cache)
@test_approx du_gpu du

# Test `cuda_sources!`
TrixiGPU.cuda_sources!(du_gpu, u_gpu, t_gpu, source_terms_gpu, equations_gpu, cache_gpu)
Trixi.calc_sources!(du, u, t, source_terms, equations, solver, cache)
@test_approx du_gpu du

0 comments on commit db199a5

Please sign in to comment.