Skip to content

Commit

Permalink
Complete 2D
Browse files Browse the repository at this point in the history
  • Loading branch information
huiyuxie committed Sep 18, 2024
1 parent 3db974c commit 17dc541
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 29 deletions.
104 changes: 98 additions & 6 deletions src/solvers/dg_2d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -673,7 +673,8 @@ function prolong_mortars_large2small_kernel!(u_upper, u_lower, u, forward_upper,
end

# Kernel for calculating mortar fluxes
function mortar_flux_kernel!(fstar_upper, fstar_lower, u_upper, u_lower, orientations, equations,
function mortar_flux_kernel!(fstar_upper, fstar_lower, u_upper, u_lower, orientations,
equations::AbstractEquations{2},
surface_flux::Any)
j = (blockIdx().x - 1) * blockDim().x + threadIdx().x
k = (blockIdx().y - 1) * blockDim().y + threadIdx().y
Expand All @@ -687,9 +688,9 @@ function mortar_flux_kernel!(fstar_upper, fstar_lower, u_upper, u_lower, orienta
flux_lower_node = surface_flux(u_ll_lower, u_rr_lower, orientation, equations)

@inbounds begin
for i in axes(fstar_upper, 1)
fstar_upper[i, j, k] = flux_upper_node[i]
fstar_lower[i, j, k] = flux_lower_node[i]
for ii in axes(fstar_upper, 1)
fstar_upper[ii, j, k] = flux_upper_node[ii]
fstar_lower[ii, j, k] = flux_lower_node[ii]
end
end
end
Expand Down Expand Up @@ -1338,16 +1339,107 @@ function cuda_mortar_flux!(mesh::TreeMesh{2}, cache_mortars::True, nonconservati
large_sides,
orientations)
mortar_flux_copy_to_kernel(surface_flux_values, tmp_surface_flux_values, fstar_upper,
fstar_lower,
reverse_upper, reverse_lower, neighbor_ids, large_sides,
fstar_lower, reverse_upper, reverse_lower, neighbor_ids, large_sides,
orientations;
configurator_3d(mortar_flux_copy_to_kernel, size_arr)...)

cache.elements.surface_flux_values = surface_flux_values # copy back to host automatically

return nothing
end

# Kernel for calculating mortar fluxes and adding nonconservative fluxes
function mortar_flux_kernel!(fstar_upper, fstar_lower, u_upper, u_lower, orientations, large_sides,
equations::AbstractEquations{2}, surface_flux::Any,
nonconservative_flux::Any)
j = (blockIdx().x - 1) * blockDim().x + threadIdx().x
k = (blockIdx().y - 1) * blockDim().y + threadIdx().y

if (j <= size(u_upper, 3) && k <= length(orientations))
u_ll_upper, u_rr_upper = get_surface_node_vars(u_upper, equations, j, k)
u_ll_lower, u_rr_lower = get_surface_node_vars(u_lower, equations, j, k)

orientation = orientations[k]
large_side = large_sides[k]

flux_upper_node = surface_flux(u_ll_upper, u_rr_upper, orientation, equations)
flux_lower_node = surface_flux(u_ll_lower, u_rr_lower, orientation, equations)

@inbounds begin
for ii in axes(fstar_upper, 1)
fstar_upper[ii, j, k] = flux_upper_node[ii]
fstar_lower[ii, j, k] = flux_lower_node[ii]
end
end

u_upper1 = (2 - large_side) * u_ll_upper + (large_side - 1) * u_rr_upper
u_upper2 = (large_side - 1) * u_ll_upper + (2 - large_side) * u_rr_upper

u_lower1 = (2 - large_side) * u_ll_lower + (large_side - 1) * u_rr_lower
u_lower2 = (large_side - 1) * u_ll_lower + (2 - large_side) * u_rr_lower

noncons_flux_upper = nonconservative_flux(u_upper1, u_upper2, orientation, equations)
noncons_flux_lower = nonconservative_flux(u_lower1, u_lower2, orientation, equations)

@inbounds begin
for ii in axes(fstar_upper, 1)
fstar_upper[ii, j, k] += 0.5 * noncons_flux_upper[ii]
fstar_lower[ii, j, k] += 0.5 * noncons_flux_lower[ii]
end
end
end

return nothing
end

# Pack kernels for calculating mortar fluxes
function cuda_mortar_flux!(mesh::TreeMesh{2}, cache_mortars::True, nonconservative_terms::True,
equations, dg::DGSEM, cache)
surface_flux, nonconservative_flux = dg.surface_integral.surface_flux
neighbor_ids = CuArray{Int64}(cache.mortars.neighbor_ids)
large_sides = CuArray{Int64}(cache.mortars.large_sides)
orientations = CuArray{Int64}(cache.mortars.orientations)

u_upper = CuArray{Float64}(cache.mortars.u_upper)
u_lower = CuArray{Float64}(cache.mortars.u_lower)
reverse_upper = CuArray{Float64}(dg.mortar.reverse_upper)
reverse_lower = CuArray{Float64}(dg.mortar.reverse_lower)

surface_flux_values = CuArray{Float64}(cache.elements.surface_flux_values)
tmp_surface_flux_values = zero(similar(surface_flux_values))

fstar_upper = CuArray{Float64}(undef, size(u_upper, 2), size(u_upper, 3), length(orientations))
fstar_lower = CuArray{Float64}(undef, size(u_upper, 2), size(u_upper, 3), length(orientations))

size_arr = CuArray{Float64}(undef, size(u_upper, 3), length(orientations))

mortar_flux_kernel = @cuda launch=false mortar_flux_kernel!(fstar_upper, fstar_lower, u_upper,
u_lower, orientations, large_sides,
equations, surface_flux,
nonconservative_flux)
mortar_flux_kernel(fstar_upper, fstar_lower, u_upper, u_lower, orientations, large_sides,
equations, surface_flux, nonconservative_flux;
configurator_2d(mortar_flux_kernel, size_arr)...)

size_arr = CuArray{Float64}(undef, size(surface_flux_values, 1), size(surface_flux_values, 2),
length(orientations))

mortar_flux_copy_to_kernel = @cuda launch=false mortar_flux_copy_to_kernel!(surface_flux_values,
tmp_surface_flux_values,
fstar_upper,
fstar_lower,
reverse_upper,
reverse_lower,
neighbor_ids,
large_sides,
orientations)
mortar_flux_copy_to_kernel(surface_flux_values, tmp_surface_flux_values, fstar_upper,
fstar_lower, reverse_upper, reverse_lower, neighbor_ids, large_sides,
orientations;
configurator_3d(mortar_flux_copy_to_kernel, size_arr)...)

cache.elements.surface_flux_values = surface_flux_values # copy back to host automatically

return nothing
end

Expand Down
46 changes: 23 additions & 23 deletions test/test_script.jl
Original file line number Diff line number Diff line change
Expand Up @@ -95,26 +95,26 @@ Trixi.prolong2mortars!(cache, u, mesh, equations,
@test_approx cache_gpu.mortars.u_upper cache.mortars.u_upper
@test_approx cache_gpu.mortars.u_lower cache.mortars.u_lower

# # Test `cuda_mortar_flux!`
# TrixiGPU.cuda_mortar_flux!(mesh_gpu, TrixiGPU.check_cache_mortars(cache_gpu),
# Trixi.have_nonconservative_terms(equations_gpu), equations_gpu,
# solver_gpu, cache_gpu)
# Trixi.calc_mortar_flux!(cache.elements.surface_flux_values, mesh,
# Trixi.have_nonconservative_terms(equations), equations,
# solver.mortar, solver.surface_integral, solver, cache)
# @test_approx cache_gpu.elements.surface_flux_values ≈ cache.elements.surface_flux_values

# # Test `cuda_surface_integral!`
# TrixiGPU.cuda_surface_integral!(du_gpu, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
# Trixi.calc_surface_integral!(du, u, mesh, equations, solver.surface_integral, solver, cache)
# @test_approx du_gpu ≈ du

# # Test `cuda_jacobian!`
# TrixiGPU.cuda_jacobian!(du_gpu, mesh_gpu, equations_gpu, cache_gpu)
# Trixi.apply_jacobian!(du, mesh, equations, solver, cache)
# @test_approx du_gpu ≈ du

# # Test `cuda_sources!`
# TrixiGPU.cuda_sources!(du_gpu, u_gpu, t_gpu, source_terms_gpu, equations_gpu, cache_gpu)
# Trixi.calc_sources!(du, u, t, source_terms, equations, solver, cache)
# @test_approx du_gpu ≈ du
# Test `cuda_mortar_flux!`
TrixiGPU.cuda_mortar_flux!(mesh_gpu, TrixiGPU.check_cache_mortars(cache_gpu),
Trixi.have_nonconservative_terms(equations_gpu), equations_gpu,
solver_gpu, cache_gpu)
Trixi.calc_mortar_flux!(cache.elements.surface_flux_values, mesh,
Trixi.have_nonconservative_terms(equations), equations,
solver.mortar, solver.surface_integral, solver, cache)
@test_approx cache_gpu.elements.surface_flux_values cache.elements.surface_flux_values

# Test `cuda_surface_integral!`
TrixiGPU.cuda_surface_integral!(du_gpu, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
Trixi.calc_surface_integral!(du, u, mesh, equations, solver.surface_integral, solver, cache)
@test_approx du_gpu du

# Test `cuda_jacobian!`
TrixiGPU.cuda_jacobian!(du_gpu, mesh_gpu, equations_gpu, cache_gpu)
Trixi.apply_jacobian!(du, mesh, equations, solver, cache)
@test_approx du_gpu du

# Test `cuda_sources!`
TrixiGPU.cuda_sources!(du_gpu, u_gpu, t_gpu, source_terms_gpu, equations_gpu, cache_gpu)
Trixi.calc_sources!(du, u, t, source_terms, equations, solver, cache)
@test_approx du_gpu du

0 comments on commit 17dc541

Please sign in to comment.