diff --git a/src/solvers/dg_2d.jl b/src/solvers/dg_2d.jl index 5509c4de..630be8fb 100644 --- a/src/solvers/dg_2d.jl +++ b/src/solvers/dg_2d.jl @@ -673,7 +673,8 @@ function prolong_mortars_large2small_kernel!(u_upper, u_lower, u, forward_upper, end # Kernel for calculating mortar fluxes -function mortar_flux_kernel!(fstar_upper, fstar_lower, u_upper, u_lower, orientations, equations, +function mortar_flux_kernel!(fstar_upper, fstar_lower, u_upper, u_lower, orientations, + equations::AbstractEquations{2}, surface_flux::Any) j = (blockIdx().x - 1) * blockDim().x + threadIdx().x k = (blockIdx().y - 1) * blockDim().y + threadIdx().y @@ -687,9 +688,9 @@ function mortar_flux_kernel!(fstar_upper, fstar_lower, u_upper, u_lower, orienta flux_lower_node = surface_flux(u_ll_lower, u_rr_lower, orientation, equations) @inbounds begin - for i in axes(fstar_upper, 1) - fstar_upper[i, j, k] = flux_upper_node[i] - fstar_lower[i, j, k] = flux_lower_node[i] + for ii in axes(fstar_upper, 1) + fstar_upper[ii, j, k] = flux_upper_node[ii] + fstar_lower[ii, j, k] = flux_lower_node[ii] end end end @@ -1338,16 +1339,107 @@ function cuda_mortar_flux!(mesh::TreeMesh{2}, cache_mortars::True, nonconservati large_sides, orientations) mortar_flux_copy_to_kernel(surface_flux_values, tmp_surface_flux_values, fstar_upper, - fstar_lower, - reverse_upper, reverse_lower, neighbor_ids, large_sides, + fstar_lower, reverse_upper, reverse_lower, neighbor_ids, large_sides, orientations; configurator_3d(mortar_flux_copy_to_kernel, size_arr)...) cache.elements.surface_flux_values = surface_flux_values # copy back to host automatically + + return nothing end +# Kernel for calculating mortar fluxes and adding nonconservative fluxes +function mortar_flux_kernel!(fstar_upper, fstar_lower, u_upper, u_lower, orientations, large_sides, + equations::AbstractEquations{2}, surface_flux::Any, + nonconservative_flux::Any) + j = (blockIdx().x - 1) * blockDim().x + threadIdx().x + k = (blockIdx().y - 1) * blockDim().y + threadIdx().y + + if (j <= size(u_upper, 3) && k <= length(orientations)) + u_ll_upper, u_rr_upper = get_surface_node_vars(u_upper, equations, j, k) + u_ll_lower, u_rr_lower = get_surface_node_vars(u_lower, equations, j, k) + + orientation = orientations[k] + large_side = large_sides[k] + + flux_upper_node = surface_flux(u_ll_upper, u_rr_upper, orientation, equations) + flux_lower_node = surface_flux(u_ll_lower, u_rr_lower, orientation, equations) + + @inbounds begin + for ii in axes(fstar_upper, 1) + fstar_upper[ii, j, k] = flux_upper_node[ii] + fstar_lower[ii, j, k] = flux_lower_node[ii] + end + end + + u_upper1 = (2 - large_side) * u_ll_upper + (large_side - 1) * u_rr_upper + u_upper2 = (large_side - 1) * u_ll_upper + (2 - large_side) * u_rr_upper + + u_lower1 = (2 - large_side) * u_ll_lower + (large_side - 1) * u_rr_lower + u_lower2 = (large_side - 1) * u_ll_lower + (2 - large_side) * u_rr_lower + + noncons_flux_upper = nonconservative_flux(u_upper1, u_upper2, orientation, equations) + noncons_flux_lower = nonconservative_flux(u_lower1, u_lower2, orientation, equations) + + @inbounds begin + for ii in axes(fstar_upper, 1) + fstar_upper[ii, j, k] += 0.5 * noncons_flux_upper[ii] + fstar_lower[ii, j, k] += 0.5 * noncons_flux_lower[ii] + end + end + end + + return nothing +end + +# Pack kernels for calculating mortar fluxes function cuda_mortar_flux!(mesh::TreeMesh{2}, cache_mortars::True, nonconservative_terms::True, equations, dg::DGSEM, cache) + surface_flux, nonconservative_flux = dg.surface_integral.surface_flux + neighbor_ids = CuArray{Int64}(cache.mortars.neighbor_ids) + large_sides = CuArray{Int64}(cache.mortars.large_sides) + orientations = CuArray{Int64}(cache.mortars.orientations) + + u_upper = CuArray{Float64}(cache.mortars.u_upper) + u_lower = CuArray{Float64}(cache.mortars.u_lower) + reverse_upper = CuArray{Float64}(dg.mortar.reverse_upper) + reverse_lower = CuArray{Float64}(dg.mortar.reverse_lower) + + surface_flux_values = CuArray{Float64}(cache.elements.surface_flux_values) + tmp_surface_flux_values = zero(similar(surface_flux_values)) + + fstar_upper = CuArray{Float64}(undef, size(u_upper, 2), size(u_upper, 3), length(orientations)) + fstar_lower = CuArray{Float64}(undef, size(u_upper, 2), size(u_upper, 3), length(orientations)) + + size_arr = CuArray{Float64}(undef, size(u_upper, 3), length(orientations)) + + mortar_flux_kernel = @cuda launch=false mortar_flux_kernel!(fstar_upper, fstar_lower, u_upper, + u_lower, orientations, large_sides, + equations, surface_flux, + nonconservative_flux) + mortar_flux_kernel(fstar_upper, fstar_lower, u_upper, u_lower, orientations, large_sides, + equations, surface_flux, nonconservative_flux; + configurator_2d(mortar_flux_kernel, size_arr)...) + + size_arr = CuArray{Float64}(undef, size(surface_flux_values, 1), size(surface_flux_values, 2), + length(orientations)) + + mortar_flux_copy_to_kernel = @cuda launch=false mortar_flux_copy_to_kernel!(surface_flux_values, + tmp_surface_flux_values, + fstar_upper, + fstar_lower, + reverse_upper, + reverse_lower, + neighbor_ids, + large_sides, + orientations) + mortar_flux_copy_to_kernel(surface_flux_values, tmp_surface_flux_values, fstar_upper, + fstar_lower, reverse_upper, reverse_lower, neighbor_ids, large_sides, + orientations; + configurator_3d(mortar_flux_copy_to_kernel, size_arr)...) + + cache.elements.surface_flux_values = surface_flux_values # copy back to host automatically + return nothing end diff --git a/test/test_script.jl b/test/test_script.jl index 3e27577b..98b685f2 100644 --- a/test/test_script.jl +++ b/test/test_script.jl @@ -95,26 +95,26 @@ Trixi.prolong2mortars!(cache, u, mesh, equations, @test_approx cache_gpu.mortars.u_upper ≈ cache.mortars.u_upper @test_approx cache_gpu.mortars.u_lower ≈ cache.mortars.u_lower -# # Test `cuda_mortar_flux!` -# TrixiGPU.cuda_mortar_flux!(mesh_gpu, TrixiGPU.check_cache_mortars(cache_gpu), -# Trixi.have_nonconservative_terms(equations_gpu), equations_gpu, -# solver_gpu, cache_gpu) -# Trixi.calc_mortar_flux!(cache.elements.surface_flux_values, mesh, -# Trixi.have_nonconservative_terms(equations), equations, -# solver.mortar, solver.surface_integral, solver, cache) -# @test_approx cache_gpu.elements.surface_flux_values ≈ cache.elements.surface_flux_values - -# # Test `cuda_surface_integral!` -# TrixiGPU.cuda_surface_integral!(du_gpu, mesh_gpu, equations_gpu, solver_gpu, cache_gpu) -# Trixi.calc_surface_integral!(du, u, mesh, equations, solver.surface_integral, solver, cache) -# @test_approx du_gpu ≈ du - -# # Test `cuda_jacobian!` -# TrixiGPU.cuda_jacobian!(du_gpu, mesh_gpu, equations_gpu, cache_gpu) -# Trixi.apply_jacobian!(du, mesh, equations, solver, cache) -# @test_approx du_gpu ≈ du - -# # Test `cuda_sources!` -# TrixiGPU.cuda_sources!(du_gpu, u_gpu, t_gpu, source_terms_gpu, equations_gpu, cache_gpu) -# Trixi.calc_sources!(du, u, t, source_terms, equations, solver, cache) -# @test_approx du_gpu ≈ du +# Test `cuda_mortar_flux!` +TrixiGPU.cuda_mortar_flux!(mesh_gpu, TrixiGPU.check_cache_mortars(cache_gpu), + Trixi.have_nonconservative_terms(equations_gpu), equations_gpu, + solver_gpu, cache_gpu) +Trixi.calc_mortar_flux!(cache.elements.surface_flux_values, mesh, + Trixi.have_nonconservative_terms(equations), equations, + solver.mortar, solver.surface_integral, solver, cache) +@test_approx cache_gpu.elements.surface_flux_values ≈ cache.elements.surface_flux_values + +# Test `cuda_surface_integral!` +TrixiGPU.cuda_surface_integral!(du_gpu, mesh_gpu, equations_gpu, solver_gpu, cache_gpu) +Trixi.calc_surface_integral!(du, u, mesh, equations, solver.surface_integral, solver, cache) +@test_approx du_gpu ≈ du + +# Test `cuda_jacobian!` +TrixiGPU.cuda_jacobian!(du_gpu, mesh_gpu, equations_gpu, cache_gpu) +Trixi.apply_jacobian!(du, mesh, equations, solver, cache) +@test_approx du_gpu ≈ du + +# Test `cuda_sources!` +TrixiGPU.cuda_sources!(du_gpu, u_gpu, t_gpu, source_terms_gpu, equations_gpu, cache_gpu) +Trixi.calc_sources!(du, u, t, source_terms, equations, solver, cache) +@test_approx du_gpu ≈ du