From 041da5749302c119beea6681e9817d762af5a9fb Mon Sep 17 00:00:00 2001 From: huiyuxie Date: Thu, 29 Aug 2024 16:43:51 -1000 Subject: [PATCH 1/4] add mortar flux kernel to 2D solver --- src/auxiliary/helpers.jl | 4 +- src/solvers/dg_2d.jl | 176 +++++++++++++++++++++++++++---- src/solvers/dg_3d.jl | 36 +++++-- test/test_adevction_mortar.jl | 26 ++++- test/test_advection_basic.jl | 11 ++ test/test_euler_ec.jl | 11 ++ test/test_euler_source_terms.jl | 11 ++ test/test_hypdiff_nonperiodic.jl | 11 ++ test/test_script.jl | 53 +++------- 9 files changed, 272 insertions(+), 67 deletions(-) diff --git a/src/auxiliary/helpers.jl b/src/auxiliary/helpers.jl index 8e7e7822..c099e1a8 100644 --- a/src/auxiliary/helpers.jl +++ b/src/auxiliary/helpers.jl @@ -25,9 +25,9 @@ end # Helper function for checking `cache.mortars` @inline function check_cache_mortars(cache) if iszero(length(cache.mortars.orientations)) - return True() - else return False() + else + return True() end end diff --git a/src/solvers/dg_2d.jl b/src/solvers/dg_2d.jl index 3f3e31a4..5648297b 100644 --- a/src/solvers/dg_2d.jl +++ b/src/solvers/dg_2d.jl @@ -442,29 +442,29 @@ function prolong_mortars_large2small_kernel!(u_upper, u_lower, u, forward_upper, u2 = size(u, 2) @inbounds begin - for ii in axes(forward_upper, 2) - u_upper[leftright, i, j, k] += forward_upper[j, ii] * + for jj in axes(forward_upper, 2) + u_upper[leftright, i, j, k] += forward_upper[j, jj] * u[i, - isequal(orientation, 1) * u2 + isequal(orientation, 2) * ii, - isequal(orientation, 1) * ii + isequal(orientation, 2) * u2, + isequal(orientation, 1) * u2 + isequal(orientation, 2) * jj, + isequal(orientation, 1) * jj + isequal(orientation, 2) * u2, large_element] * isequal(large_side, 1) - u_lower[leftright, i, j, k] += forward_lower[j, ii] * + u_lower[leftright, i, j, k] += forward_lower[j, jj] * u[i, - isequal(orientation, 1) * u2 + isequal(orientation, 2) * ii, - isequal(orientation, 1) * ii + isequal(orientation, 2) * u2, + isequal(orientation, 1) * u2 + isequal(orientation, 2) * jj, + isequal(orientation, 1) * jj + isequal(orientation, 2) * u2, large_element] * isequal(large_side, 1) end - for ii in axes(forward_lower, 2) - u_upper[leftright, i, j, k] += forward_upper[j, ii] * + for jj in axes(forward_lower, 2) + u_upper[leftright, i, j, k] += forward_upper[j, jj] * u[i, - isequal(orientation, 1) * 1 + isequal(orientation, 2) * ii, - isequal(orientation, 1) * ii + isequal(orientation, 2) * 1, + isequal(orientation, 1) * 1 + isequal(orientation, 2) * jj, + isequal(orientation, 1) * jj + isequal(orientation, 2) * 1, large_element] * isequal(large_side, 2) - u_lower[leftright, i, j, k] += forward_lower[j, ii] * + u_lower[leftright, i, j, k] += forward_lower[j, jj] * u[i, - isequal(orientation, 1) * 1 + isequal(orientation, 2) * ii, - isequal(orientation, 1) * ii + isequal(orientation, 2) * 1, + isequal(orientation, 1) * 1 + isequal(orientation, 2) * jj, + isequal(orientation, 1) * jj + isequal(orientation, 2) * 1, large_element] * isequal(large_side, 2) end end @@ -473,6 +473,82 @@ function prolong_mortars_large2small_kernel!(u_upper, u_lower, u, forward_upper, return nothing end +# Kernel for calculating mortar fluxes +function mortar_flux_kernel!(fstar_upper, fstar_lower, u_upper, u_lower, orientations, equations, + surface_flux::Any) + j = (blockIdx().x - 1) * blockDim().x + threadIdx().x + k = (blockIdx().y - 1) * blockDim().y + threadIdx().y + + if (j <= size(u_upper, 3) && k <= length(orientations)) + u_ll_upper, u_rr_upper = get_surface_node_vars(u_upper, equations, j, k) + u_ll_lower, u_rr_lower = get_surface_node_vars(u_lower, equations, j, k) + orientation = orientations[k] + + flux_upper_node = surface_flux(u_ll_upper, u_rr_upper, orientation, equations) + flux_lower_node = surface_flux(u_ll_lower, u_rr_lower, orientation, equations) + + @inbounds begin + for i in axes(fstar_upper, 1) + fstar_upper[i, j, k] = flux_upper_node[i] + fstar_lower[i, j, k] = flux_lower_node[i] + end + end + end + + return nothing +end + +# Kernel for copying mortar fluxes small to small and small to large +function mortar_flux_copy_to_kernel!(surface_flux_values, tmp_surface_flux_values, fstar_upper, + fstar_lower, reverse_upper, reverse_lower, neighbor_ids, + large_sides, orientations) + i = (blockIdx().x - 1) * blockDim().x + threadIdx().x + j = (blockIdx().y - 1) * blockDim().y + threadIdx().y + k = (blockIdx().z - 1) * blockDim().z + threadIdx().z + + if (i <= size(surface_flux_values, 1) && j <= size(surface_flux_values, 2) && + k <= length(orientations)) + large_element = neighbor_ids[3, k] + upper_element = neighbor_ids[2, k] + lower_element = neighbor_ids[1, k] + + large_side = large_sides[k] + orientation = orientations[k] + + # Use math expression to enhance performance (against control flow), it is equivalent to, + # `(2 - large_side) * (2 - orientation) * 1 + + # (2 - large_side) * (orientation - 1) * 3 + + # (large_side - 1) * (2 - orientation) * 2 + + # (large_side - 1) * (orientation - 1) * 4`. + direction = large_side + 2 * orientation - 2 + + surface_flux_values[i, j, direction, upper_element] = fstar_upper[i, j, k] + surface_flux_values[i, j, direction, lower_element] = fstar_lower[i, j, k] + + # Use math expression to enhance performance (against control flow), it is equivalent to, + # `(2 - large_side) * (2 - orientation) * 2 + + # (2 - large_side) * (orientation - 1) * 4 + + # (large_side - 1) * (2 - orientation) * 1 + + # (large_side - 1) * (orientation - 1) * 3`. + direction = 2 * orientation - large_side + 1 + + @inbounds begin + for ii in axes(reverse_upper, 2) # i.e., ` for ii in axes(reverse_lower, 2)` + tmp_surface_flux_values[i, j, direction, large_element] += reverse_upper[j, ii] * + fstar_upper[i, ii, k] + + reverse_lower[j, ii] * + fstar_lower[i, ii, k] + end + + surface_flux_values[i, j, direction, large_element] = tmp_surface_flux_values[i, j, + direction, + large_element] + end + end + + return nothing +end + # Kernel for calculating surface integrals function surface_integral_kernel!(du, factor_arr, surface_flux_values, equations::AbstractEquations{2}) @@ -825,17 +901,18 @@ function cuda_boundary_flux!(t, mesh::TreeMesh{2}, boundary_conditions::NamedTup end # Dummy function for asserting mortars -function cuda_prolong2mortars!(u, mesh::TreeMesh{2}, cache_mortars::True, dg::DGSEM, cache) +function cuda_prolong2mortars!(u, mesh::TreeMesh{2}, cache_mortars::False, dg::DGSEM, cache) @assert iszero(length(cache.mortars.orientations)) end # Pack kernels for prolonging solution to mortars -function cuda_prolong2mortars!(u, mesh::TreeMesh{2}, cache_mortars::False, dg::DGSEM, cache) +function cuda_prolong2mortars!(u, mesh::TreeMesh{2}, cache_mortars::True, dg::DGSEM, cache) neighbor_ids = CuArray{Int64}(cache.mortars.neighbor_ids) large_sides = CuArray{Int64}(cache.mortars.large_sides) orientations = CuArray{Int64}(cache.mortars.orientations) - u_upper = zero(CuArray{Float64}(cache.mortars.u_upper)) - u_lower = zero(CuArray{Float64}(cache.mortars.u_lower)) + + u_upper = zero(CuArray{Float64}(cache.mortars.u_upper)) # NaN to zero + u_lower = zero(CuArray{Float64}(cache.mortars.u_lower)) # NaN to zero forward_upper = CuArray{Float64}(dg.mortar.forward_upper) forward_lower = CuArray{Float64}(dg.mortar.forward_lower) @@ -871,6 +948,66 @@ function cuda_prolong2mortars!(u, mesh::TreeMesh{2}, cache_mortars::False, dg::D return nothing end +# Dummy function for asserting mortar fluxes +function cuda_mortar_flux!(mesh::TreeMesh{2}, cache_mortars::False, nonconservative_terms, + equations, dg::DGSEM, cache) + @assert iszero(length(cache.mortars.orientations)) +end + +# Pack kernels for calculating mortar fluxes +function cuda_mortar_flux!(mesh::TreeMesh{2}, cache_mortars::True, nonconservative_terms::False, + equations, dg::DGSEM, cache) + surface_flux = dg.surface_integral.surface_flux + + neighbor_ids = CuArray{Int64}(cache.mortars.neighbor_ids) + large_sides = CuArray{Int64}(cache.mortars.large_sides) + orientations = CuArray{Int64}(cache.mortars.orientations) + + u_upper = CuArray{Float64}(cache.mortars.u_upper) + u_lower = CuArray{Float64}(cache.mortars.u_lower) + reverse_upper = CuArray{Float64}(dg.mortar.reverse_upper) + reverse_lower = CuArray{Float64}(dg.mortar.reverse_lower) + + surface_flux_values = CuArray{Float64}(cache.elements.surface_flux_values) + tmp_surface_flux_values = zero(similar(surface_flux_values)) + + fstar_upper = CuArray{Float64}(undef, size(u_upper, 2), size(u_upper, 3), length(orientations)) + fstar_lower = CuArray{Float64}(undef, size(u_upper, 2), size(u_upper, 3), length(orientations)) + + size_arr = CuArray{Float64}(undef, size(u_upper, 3), length(orientations)) + + mortar_flux_kernel = @cuda launch=false mortar_flux_kernel!(fstar_upper, fstar_lower, u_upper, + u_lower, orientations, equations, + surface_flux) + mortar_flux_kernel(fstar_upper, fstar_lower, u_upper, u_lower, orientations, equations, + surface_flux; + configurator_2d(mortar_flux_kernel, size_arr)...) + + size_arr = CuArray{Float64}(undef, size(surface_flux_values, 1), size(surface_flux_values, 2), + length(orientations)) + + mortar_flux_copy_to_kernel = @cuda launch=false mortar_flux_copy_to_kernel!(surface_flux_values, + tmp_surface_flux_values, + fstar_upper, + fstar_lower, + reverse_upper, + reverse_lower, + neighbor_ids, + large_sides, + orientations) + mortar_flux_copy_to_kernel(surface_flux_values, tmp_surface_flux_values, fstar_upper, + fstar_lower, + reverse_upper, reverse_lower, neighbor_ids, large_sides, + orientations; + configurator_3d(mortar_flux_copy_to_kernel, size_arr)...) + + cache.elements.surface_flux_values = surface_flux_values # copy back to host automatically +end + +function cuda_mortar_flux!(mesh::TreeMesh{2}, cache_mortars::True, nonconservative_terms::True, + equations, dg::DGSEM, cache) +end + # Pack kernels for calculating surface integrals function cuda_surface_integral!(du, mesh::TreeMesh{2}, equations, dg::DGSEM, cache) factor_arr = CuArray{Float64}([ @@ -941,6 +1078,9 @@ function rhs_gpu!(du_cpu, u_cpu, t, mesh::TreeMesh{2}, equations, initial_condit cuda_prolong2mortars!(u, mesh, check_cache_mortars(cache), dg, cache) + cuda_mortar_flux!(mesh, check_cache_mortars(cache), have_nonconservative_terms(equations), + equations, dg, cache) + cuda_surface_integral!(du, mesh, equations, dg, cache) cuda_jacobian!(du, mesh, equations, cache) diff --git a/src/solvers/dg_3d.jl b/src/solvers/dg_3d.jl index 5918e17d..f30f642a 100644 --- a/src/solvers/dg_3d.jl +++ b/src/solvers/dg_3d.jl @@ -755,19 +755,20 @@ function cuda_boundary_flux!(t, mesh::TreeMesh{3}, boundary_conditions::NamedTup end # Dummy function for asserting mortars -function cuda_prolong2mortars!(u, mesh::TreeMesh{3}, cache_mortars::True, dg::DGSEM, cache) +function cuda_prolong2mortars!(u, mesh::TreeMesh{3}, cache_mortars::False, dg::DGSEM, cache) @assert iszero(length(cache.mortars.orientations)) end # Pack kernels for prolonging solution to mortars -function cuda_prolong2mortars!(u, mesh::TreeMesh{3}, cache_mortars::False, dg::DGSEM, cache) +function cuda_prolong2mortars!(u, mesh::TreeMesh{3}, cache_mortars::True, dg::DGSEM, cache) neighbor_ids = CuArray{Int64}(cache.mortars.neighbor_ids) large_sides = CuArray{Int64}(cache.mortars.large_sides) orientations = CuArray{Int64}(cache.mortars.orientations) - u_upper_left = zero(CuArray{Float64}(cache.mortars.u_upper_left)) - u_upper_right = zero(CuArray{Float64}(cache.mortars.u_upper_right)) - u_lower_left = zero(CuArray{Float64}(cache.mortars.u_lower_left)) - u_lower_right = zero(CuArray{Float64}(cache.mortars.u_lower_right)) + + u_upper_left = zero(CuArray{Float64}(cache.mortars.u_upper_left)) # NaN to zero + u_upper_right = zero(CuArray{Float64}(cache.mortars.u_upper_right)) # NaN to zero + u_lower_left = zero(CuArray{Float64}(cache.mortars.u_lower_left)) # NaN to zero + u_lower_right = zero(CuArray{Float64}(cache.mortars.u_lower_right)) # NaN to zero forward_upper = CuArray{Float64}(dg.mortar.forward_upper) forward_lower = CuArray{Float64}(dg.mortar.forward_lower) @@ -787,10 +788,10 @@ function cuda_prolong2mortars!(u, mesh::TreeMesh{3}, cache_mortars::False, dg::D neighbor_ids, large_sides, orientations; configurator_3d(prolong_mortars_small2small_kernel, size_arr)...) - tmp_upper_left = zero(similar(u_upper_left)) - tmp_upper_right = zero(similar(u_upper_right)) - tmp_lower_left = zero(similar(u_lower_left)) - tmp_lower_right = zero(similar(u_lower_right)) + tmp_upper_left = zero(similar(u_upper_left)) # undef to zero + tmp_upper_right = zero(similar(u_upper_right)) # undef to zero + tmp_lower_left = zero(similar(u_lower_left)) # undef to zero + tmp_lower_right = zero(similar(u_lower_right)) # undef to zero size_arr = CuArray{Float64}(undef, size(u_upper_left, 2), size(u_upper_left, 3)^2, size(u_upper_left, 5)) @@ -825,6 +826,21 @@ function cuda_prolong2mortars!(u, mesh::TreeMesh{3}, cache_mortars::False, dg::D return nothing end +# Dummy function for asserting mortar fluxes +function cuda_mortar_flux!(mesh::TreeMesh{3}, cache_mortars::False, nonconservative_terms, + equations, dg::DGSEM, cache) + @assert iszero(length(cache.mortars.orientations)) +end + +# Pack kernels for calculating mortar fluxes +function cuda_mortar_flux!(mesh::TreeMesh{3}, cache_mortars::True, nonconservative_terms::False, + equations, dg::DGSEM, cache) +end + +function cuda_mortar_flux!(mesh::TreeMesh{3}, cache_mortars::True, nonconservative_terms::True, + equations, dg::DGSEM, cache) +end + # Pack kernels for calculating surface integrals function cuda_surface_integral!(du, mesh::TreeMesh{3}, equations, dg::DGSEM, cache) factor_arr = CuArray{Float64}([ diff --git a/test/test_adevction_mortar.jl b/test/test_adevction_mortar.jl index bdcd5725..255602ed 100644 --- a/test/test_adevction_mortar.jl +++ b/test/test_adevction_mortar.jl @@ -113,7 +113,31 @@ isdir(outdir) && rm(outdir, recursive = true) @test u_upper_gpu ≈ u_upper @test u_lower_gpu ≈ u_lower - # To be implemented + # Test `cuda_mortar_flux!` + TrixiGPU.cuda_mortar_flux!(mesh_gpu, TrixiGPU.check_cache_mortars(cache_gpu), + Trixi.have_nonconservative_terms(equations_gpu), equations_gpu, + solver_gpu, cache_gpu) + Trixi.calc_mortar_flux!(cache.elements.surface_flux_values, mesh, + Trixi.have_nonconservative_terms(equations), equations, + solver.mortar, solver.surface_integral, solver, cache) + surface_flux_values_gpu = replace(cache_gpu.elements.surface_flux_values, NaN => 0.0) + surface_flux_values = replace(cache.elements.surface_flux_values, NaN => 0.0) + @test surface_flux_values_gpu ≈ surface_flux_values + + # Test `cuda_surface_integral!` + TrixiGPU.cuda_surface_integral!(du_gpu, mesh_gpu, equations_gpu, solver_gpu, cache_gpu) + Trixi.calc_surface_integral!(du, u, mesh, equations, solver.surface_integral, solver, cache) + @test CUDA.@allowscalar du_gpu ≈ du + + # Test `cuda_jacobian!` + TrixiGPU.cuda_jacobian!(du_gpu, mesh_gpu, equations_gpu, cache_gpu) + Trixi.apply_jacobian!(du, mesh, equations, solver, cache) + @test CUDA.@allowscalar du_gpu ≈ du + + # Test `cuda_sources!` + TrixiGPU.cuda_sources!(du_gpu, u_gpu, t_gpu, source_terms_gpu, equations_gpu, cache_gpu) + Trixi.calc_sources!(du, u, t, source_terms, equations, solver, cache) + @test CUDA.@allowscalar du_gpu ≈ du # Copy data back to host du_cpu, u_cpu = TrixiGPU.copy_to_host!(du_gpu, u_gpu) diff --git a/test/test_advection_basic.jl b/test/test_advection_basic.jl index 8dfb2f5f..6f66ae52 100644 --- a/test/test_advection_basic.jl +++ b/test/test_advection_basic.jl @@ -208,6 +208,17 @@ isdir(outdir) && rm(outdir, recursive = true) @test u_upper_gpu ≈ u_upper @test u_lower_gpu ≈ u_lower + # Test `cuda_mortar_flux!` + TrixiGPU.cuda_mortar_flux!(mesh_gpu, TrixiGPU.check_cache_mortars(cache_gpu), + Trixi.have_nonconservative_terms(equations_gpu), equations_gpu, + solver_gpu, cache_gpu) + Trixi.calc_mortar_flux!(cache.elements.surface_flux_values, mesh, + Trixi.have_nonconservative_terms(equations), equations, + solver.mortar, solver.surface_integral, solver, cache) + surface_flux_values_gpu = replace(cache_gpu.elements.surface_flux_values, NaN => 0.0) + surface_flux_values = replace(cache.elements.surface_flux_values, NaN => 0.0) + @test surface_flux_values_gpu ≈ surface_flux_values + # Test `cuda_surface_integral!` TrixiGPU.cuda_surface_integral!(du_gpu, mesh_gpu, equations_gpu, solver_gpu, cache_gpu) Trixi.calc_surface_integral!(du, u, mesh, equations, solver.surface_integral, solver, cache) diff --git a/test/test_euler_ec.jl b/test/test_euler_ec.jl index 47cd4e50..a4a138ac 100644 --- a/test/test_euler_ec.jl +++ b/test/test_euler_ec.jl @@ -211,6 +211,17 @@ isdir(outdir) && rm(outdir, recursive = true) @test u_upper_gpu ≈ u_upper @test u_lower_gpu ≈ u_lower + # Test `cuda_mortar_flux!` + TrixiGPU.cuda_mortar_flux!(mesh_gpu, TrixiGPU.check_cache_mortars(cache_gpu), + Trixi.have_nonconservative_terms(equations_gpu), equations_gpu, + solver_gpu, cache_gpu) + Trixi.calc_mortar_flux!(cache.elements.surface_flux_values, mesh, + Trixi.have_nonconservative_terms(equations), equations, + solver.mortar, solver.surface_integral, solver, cache) + surface_flux_values_gpu = replace(cache_gpu.elements.surface_flux_values, NaN => 0.0) + surface_flux_values = replace(cache.elements.surface_flux_values, NaN => 0.0) + @test surface_flux_values_gpu ≈ surface_flux_values + # Test `cuda_surface_integral!` TrixiGPU.cuda_surface_integral!(du_gpu, mesh_gpu, equations_gpu, solver_gpu, cache_gpu) Trixi.calc_surface_integral!(du, u, mesh, equations, solver.surface_integral, solver, cache) diff --git a/test/test_euler_source_terms.jl b/test/test_euler_source_terms.jl index 41559d80..5c3ee822 100644 --- a/test/test_euler_source_terms.jl +++ b/test/test_euler_source_terms.jl @@ -209,6 +209,17 @@ isdir(outdir) && rm(outdir, recursive = true) @test u_upper_gpu ≈ u_upper @test u_lower_gpu ≈ u_lower + # Test `cuda_mortar_flux!` + TrixiGPU.cuda_mortar_flux!(mesh_gpu, TrixiGPU.check_cache_mortars(cache_gpu), + Trixi.have_nonconservative_terms(equations_gpu), equations_gpu, + solver_gpu, cache_gpu) + Trixi.calc_mortar_flux!(cache.elements.surface_flux_values, mesh, + Trixi.have_nonconservative_terms(equations), equations, + solver.mortar, solver.surface_integral, solver, cache) + surface_flux_values_gpu = replace(cache_gpu.elements.surface_flux_values, NaN => 0.0) + surface_flux_values = replace(cache.elements.surface_flux_values, NaN => 0.0) + @test surface_flux_values_gpu ≈ surface_flux_values + # Test `cuda_surface_integral!` TrixiGPU.cuda_surface_integral!(du_gpu, mesh_gpu, equations_gpu, solver_gpu, cache_gpu) Trixi.calc_surface_integral!(du, u, mesh, equations, solver.surface_integral, solver, cache) diff --git a/test/test_hypdiff_nonperiodic.jl b/test/test_hypdiff_nonperiodic.jl index 5bf6600e..77b05cac 100644 --- a/test/test_hypdiff_nonperiodic.jl +++ b/test/test_hypdiff_nonperiodic.jl @@ -221,6 +221,17 @@ isdir(outdir) && rm(outdir, recursive = true) @test u_upper_gpu ≈ u_upper @test u_lower_gpu ≈ u_lower + # Test `cuda_mortar_flux!` + TrixiGPU.cuda_mortar_flux!(mesh_gpu, TrixiGPU.check_cache_mortars(cache_gpu), + Trixi.have_nonconservative_terms(equations_gpu), equations_gpu, + solver_gpu, cache_gpu) + Trixi.calc_mortar_flux!(cache.elements.surface_flux_values, mesh, + Trixi.have_nonconservative_terms(equations), equations, + solver.mortar, solver.surface_integral, solver, cache) + surface_flux_values_gpu = replace(cache_gpu.elements.surface_flux_values, NaN => 0.0) + surface_flux_values = replace(cache.elements.surface_flux_values, NaN => 0.0) + @test surface_flux_values_gpu ≈ surface_flux_values + # Test `cuda_surface_integral!` TrixiGPU.cuda_surface_integral!(du_gpu, mesh_gpu, equations_gpu, solver_gpu, cache_gpu) Trixi.calc_surface_integral!(du, u, mesh, equations, solver.surface_integral, solver, cache) diff --git a/test/test_script.jl b/test/test_script.jl index 779524bd..8bd25ce4 100644 --- a/test/test_script.jl +++ b/test/test_script.jl @@ -3,27 +3,25 @@ using OrdinaryDiffEq using CUDA using Test -advection_velocity = (0.2, -0.7, 0.5) -equations = LinearScalarAdvectionEquation3D(advection_velocity) +advection_velocity = (0.2, -0.7) +equations = LinearScalarAdvectionEquation2D(advection_velocity) initial_condition = initial_condition_convergence_test solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs) -coordinates_min = (-1.0, -1.0, -1.0) -coordinates_max = (1.0, 1.0, 1.0) -refinement_patches = ((type = "box", coordinates_min = (0.0, -1.0, -1.0), - coordinates_max = (1.0, 1.0, 1.0)), - (type = "box", coordinates_min = (0.0, -0.5, -0.5), - coordinates_max = (0.5, 0.5, 0.5))) +coordinates_min = (-1.0, -1.0) +coordinates_max = (1.0, 1.0) +refinement_patches = ((type = "box", coordinates_min = (0.0, -1.0), + coordinates_max = (1.0, 1.0)),) mesh = TreeMesh(coordinates_min, coordinates_max, initial_refinement_level = 2, refinement_patches = refinement_patches, n_cells_max = 10_000) semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) - (; mesh, equations, initial_condition, boundary_conditions, source_terms, solver, cache) = semi -# Get copy for GPU to avoid overwriting during tests + +# Get deep copy for GPU to avoid overwriting during tests mesh_gpu, equations_gpu = deepcopy(mesh), deepcopy(equations) initial_condition_gpu, boundary_conditions_gpu, source_terms_gpu = deepcopy(initial_condition), deepcopy(boundary_conditions), @@ -31,7 +29,7 @@ initial_condition_gpu, boundary_conditions_gpu, source_terms_gpu = deepcopy(init solver_gpu, cache_gpu = deepcopy(solver), deepcopy(cache) t = t_gpu = 0.0 -tspan = (0.0, 5.0) +tspan = (0.0, 1.0) ode = semidiscretize(semi, tspan) u_ode = copy(ode.u0) @@ -72,33 +70,16 @@ TrixiGPU.cuda_boundary_flux!(t_gpu, mesh_gpu, boundary_conditions_gpu, equations Trixi.calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations, solver.surface_integral, solver) +# Test `cuda_prolong2mortars!` TrixiGPU.cuda_prolong2mortars!(u_gpu, mesh_gpu, TrixiGPU.check_cache_mortars(cache_gpu), solver_gpu, cache_gpu) Trixi.prolong2mortars!(cache, u, mesh, equations, solver.mortar, solver.surface_integral, solver) -u_upper_left = cache_gpu.mortars.u_upper_left -u_upper_right = cache_gpu.mortars.u_upper_right -u_lower_left = cache_gpu.mortars.u_lower_left -u_lower_right = cache_gpu.mortars.u_lower_right - -# u_upper_left1 = cache.mortars.u_upper_left -# u_upper_right1 = cache.mortars.u_upper_right -# u_lower_left1 = cache.mortars.u_lower_left -# u_lower_right1 = cache.mortars.u_lower_right - -# @test cache_gpu.mortars.u_upper == cache.mortars.u_upper -# @test cache_gpu.mortars.u_lower == cache.mortars.u_lower - -# # Test `cuda_surface_integral!` -# TrixiGPU.cuda_surface_integral!(du_gpu, mesh_gpu, equations_gpu, solver_gpu, cache_gpu) -# Trixi.calc_surface_integral!(du, u, mesh, equations, solver.surface_integral, solver, cache) - -# # Test `cuda_jacobian!` -# TrixiGPU.cuda_jacobian!(du_gpu, mesh_gpu, equations_gpu, cache_gpu) -# Trixi.apply_jacobian!(du, mesh, equations, solver, cache) - -# # Test `cuda_sources!` -# TrixiGPU.cuda_sources!(du_gpu, u_gpu, t_gpu, source_terms_gpu, equations_gpu, cache_gpu) -# Trixi.calc_sources!(du, u, t, source_terms, equations, solver, cache) -# @test CUDA.@allowscalar du ≈ du_gpu +TrixiGPU.cuda_mortar_flux!(mesh_gpu, TrixiGPU.check_cache_mortars(cache_gpu), + Trixi.have_nonconservative_terms(equations_gpu), equations_gpu, + solver_gpu, cache_gpu) +Trixi.calc_mortar_flux!(cache.elements.surface_flux_values, mesh, + Trixi.have_nonconservative_terms(equations), equations, + solver.mortar, solver.surface_integral, solver, cache) +@test cache_gpu.elements.surface_flux_values ≈ cache.elements.surface_flux_values From a3782487412a57be38c370d12d3399d75ad42ed6 Mon Sep 17 00:00:00 2001 From: huiyuxie Date: Thu, 29 Aug 2024 22:55:47 -1000 Subject: [PATCH 2/4] add mortar flux kernel to 3D solver --- src/solvers/dg_3d.jl | 228 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 225 insertions(+), 3 deletions(-) diff --git a/src/solvers/dg_3d.jl b/src/solvers/dg_3d.jl index f30f642a..0e66c142 100644 --- a/src/solvers/dg_3d.jl +++ b/src/solvers/dg_3d.jl @@ -479,6 +479,158 @@ function prolong_mortars_large2small_kernel!(u_upper_left, u_upper_right, u_lowe return nothing end +# Kernel for calculating mortar fluxes +function mortar_flux_kernel!(fstar_upper_left, fstar_upper_right, fstar_lower_left, + fstar_lower_right, u_upper_left, u_upper_right, u_lower_left, + u_lower_right, orientations, equations, surface_flux::Any) + i = (blockIdx().x - 1) * blockDim().x + threadIdx().x + j = (blockIdx().y - 1) * blockDim().y + threadIdx().y + k = (blockIdx().z - 1) * blockDim().z + threadIdx().z + + if (i <= size(u_upper_left, 3) && j <= size(u_upper_left, 4) && k <= length(orientations)) + u_ll_upper_left, u_rr_upper_left = get_surface_node_vars(u_upper_left, equations, i, j, k) + u_ll_upper_right, u_rr_upper_right = get_surface_node_vars(u_upper_right, equations, i, j, + k) + u_ll_lower_left, u_rr_lower_left = get_surface_node_vars(u_lower_left, equations, i, j, k) + u_ll_lower_right, u_rr_lower_right = get_surface_node_vars(u_lower_right, equations, i, j, + k) + + orientation = orientations[k] + + flux_upper_left_node = surface_flux(u_ll_upper_left, u_rr_upper_left, orientation, + equations) + flux_upper_right_node = surface_flux(u_ll_upper_right, u_rr_upper_right, orientation, + equations) + flux_lower_left_node = surface_flux(u_ll_lower_left, u_rr_lower_left, orientation, + equations) + flux_lower_right_node = surface_flux(u_ll_lower_right, u_rr_lower_right, orientation, + equations) + + @inbounds begin + for ii in axes(fstar_upper_left, 1) + fstar_upper_left[ii, i, j, k] = flux_upper_left_node[ii] + fstar_upper_right[ii, i, j, k] = flux_upper_right_node[ii] + fstar_lower_left[ii, i, j, k] = flux_lower_left_node[ii] + fstar_lower_right[ii, i, j, k] = flux_lower_right_node[ii] + end + end + end + + return nothing +end + +# Kernel for copying mortar fluxes small to small and small to large +function mortar_flux_copy_to_kernel!(surface_flux_values, tmp_surface_flux_values, tmp_upper_left, + tmp_upper_right, tmp_lower_left, tmp_lower_right, + fstar_upper_left, + fstar_upper_right, fstar_lower_left, fstar_lower_right, + reverse_upper, + reverse_lower, neighbor_ids, large_sides, orientations) + i = (blockIdx().x - 1) * blockDim().x + threadIdx().x + j = (blockIdx().y - 1) * blockDim().y + threadIdx().y + k = (blockIdx().z - 1) * blockDim().z + threadIdx().z + + if (i <= size(surface_flux_values, 1) && j <= size(surface_flux_values, 2)^2 && + k <= length(orientations)) + j1 = div(j - 1, size(surface_flux_values, 2)) + 1 + j2 = rem(j - 1, size(surface_flux_values, 2)) + 1 + + lower_left_element = neighbor_ids[1, k] + lower_right_element = neighbor_ids[2, k] + upper_left_element = neighbor_ids[3, k] + upper_right_element = neighbor_ids[4, k] + large_element = neighbor_ids[5, k] + + large_side = large_sides[k] + orientation = orientations[k] + + # Use simple math expression to enhance the performance (against control flow), + # it is equivalent to, `isequal(large_side, 1) * isequal(orientation, 1) * 1 + + # isequal(large_side, 1) * isequal(orientation, 2) * 3 + + # isequal(large_side, 1) * isequal(orientation, 3) * 5 + + # isequal(large_side, 2) * isequal(orientation, 1) * 2 + + # isequal(large_side, 2) * isequal(orientation, 2) * 4 + + # isequal(large_side, 2) * isequal(orientation, 3) * 6`. + # Please also check the original code in Trixi.jl when you modify this code. + direction = 2 * orientation + large_side - 2 + + surface_flux_values[i, j1, j2, direction, upper_left_element] = fstar_upper_left[i, j1, j2, + k] + surface_flux_values[i, j1, j2, direction, upper_right_element] = fstar_upper_right[i, j1, + j2, k] + surface_flux_values[i, j1, j2, direction, lower_left_element] = fstar_lower_left[i, j1, j2, + k] + surface_flux_values[i, j1, j2, direction, lower_right_element] = fstar_lower_right[i, j1, + j2, k] + + # Use simple math expression to enhance the performance (against control flow), + # it is equivalent to, `isequal(large_side, 1) * isequal(orientation, 1) * 2 + + # isequal(large_side, 1) * isequal(orientation, 2) * 4 + + # isequal(large_side, 1) * isequal(orientation, 3) * 6 + + # isequal(large_side, 2) * isequal(orientation, 1) * 1 + + # isequal(large_side, 2) * isequal(orientation, 2) * 3 + + # isequal(large_side, 2) * isequal(orientation, 3) * 5`. + # Please also check the original code in Trixi.jl when you modify this code. + direction = 2 * orientation - large_side + 1 + + @inbounds begin + for j1j1 in axes(reverse_upper, 2) + tmp_upper_left[i, j1, j2, direction, large_element] += reverse_lower[j1, j1j1] * + fstar_upper_left[i, j1j1, j2, + k] + tmp_upper_right[i, j1, j2, direction, large_element] += reverse_upper[j1, j1j1] * + fstar_upper_right[i, j1j1, + j2, k] + tmp_lower_left[i, j1, j2, direction, large_element] += reverse_lower[j1, j1j1] * + fstar_lower_left[i, j1j1, j2, + k] + tmp_lower_right[i, j1, j2, direction, large_element] += reverse_upper[j1, j1j1] * + fstar_lower_right[i, j1j1, + j2, k] + end + + for j2j2 in axes(reverse_lower, 2) + tmp_surface_flux_values[i, j1, j2, direction, large_element] += reverse_upper[j2, + j2j2] * + tmp_upper_left[i, + j1, + j2j2, + direction, + large_element] + tmp_surface_flux_values[i, j1, j2, direction, large_element] += reverse_upper[j2, + j2j2] * + tmp_upper_right[i, + j1, + j2j2, + direction, + large_element] + tmp_surface_flux_values[i, j1, j2, direction, large_element] += reverse_lower[j2, + j2j2] * + tmp_lower_left[i, + j1, + j2j2, + direction, + large_element] + tmp_surface_flux_values[i, j1, j2, direction, large_element] += reverse_lower[j2, + j2j2] * + tmp_lower_right[i, + j1, + j2j2, + direction, + large_element] + end + + surface_flux_values[i, j1, j2, direction, large_element] = tmp_surface_flux_values[i, + j1, + j2, + direction, + large_element] + end + end + + return nothing +end + # Kernel for calculating surface integrals function surface_integral_kernel!(du, factor_arr, surface_flux_values, equations::AbstractEquations{3}) @@ -793,9 +945,6 @@ function cuda_prolong2mortars!(u, mesh::TreeMesh{3}, cache_mortars::True, dg::DG tmp_lower_left = zero(similar(u_lower_left)) # undef to zero tmp_lower_right = zero(similar(u_lower_right)) # undef to zero - size_arr = CuArray{Float64}(undef, size(u_upper_left, 2), size(u_upper_left, 3)^2, - size(u_upper_left, 5)) - prolong_mortars_large2small_kernel = @cuda launch=false prolong_mortars_large2small_kernel!(u_upper_left, u_upper_right, u_lower_left, @@ -835,6 +984,76 @@ end # Pack kernels for calculating mortar fluxes function cuda_mortar_flux!(mesh::TreeMesh{3}, cache_mortars::True, nonconservative_terms::False, equations, dg::DGSEM, cache) + surface_flux = dg.surface_integral.surface_flux + + neighbor_ids = CuArray{Int64}(cache.mortars.neighbor_ids) + large_sides = CuArray{Int64}(cache.mortars.large_sides) + orientations = CuArray{Int64}(cache.mortars.orientations) + + u_upper_left = CuArray{Float64}(cache.mortars.u_upper_left) + u_upper_right = CuArray{Float64}(cache.mortars.u_upper_right) + u_lower_left = CuArray{Float64}(cache.mortars.u_lower_left) + u_lower_right = CuArray{Float64}(cache.mortars.u_lower_right) + reverse_upper = CuArray{Float64}(dg.mortar.reverse_upper) + reverse_lower = CuArray{Float64}(dg.mortar.reverse_lower) + + surface_flux_values = CuArray{Float64}(cache.elements.surface_flux_values) + tmp_surface_flux_values = zero(similar(surface_flux_values)) # undef to zero + + fstar_upper_left = CuArray{Float64}(undef, size(u_upper_left, 2), size(u_upper_left, 3), + size(u_upper_left, 4), length(orientations)) + fstar_upper_right = CuArray{Float64}(undef, size(u_upper_left, 2), size(u_upper_left, 3), + size(u_upper_left, 4), length(orientations)) + fstar_lower_left = CuArray{Float64}(undef, size(u_upper_left, 2), size(u_upper_left, 3), + size(u_upper_left, 4), length(orientations)) + fstar_lower_right = CuArray{Float64}(undef, size(u_upper_left, 2), size(u_upper_left, 3), + size(u_upper_left, 4), length(orientations)) + + size_arr = CuArray{Float64}(undef, size(u_upper_left, 3), size(u_upper_left, 4), + length(orientations)) + + mortar_flux_kernel = @cuda launch=false mortar_flux_kernel!(fstar_upper_left, fstar_upper_right, + fstar_lower_left, fstar_lower_right, + u_upper_left, u_upper_right, + u_lower_left, u_lower_right, + orientations, equations, + surface_flux) + mortar_flux_kernel(fstar_upper_left, fstar_upper_right, fstar_lower_left, fstar_lower_right, + u_upper_left, u_upper_right, u_lower_left, u_lower_right, orientations, + equations, surface_flux; + configurator_3d(mortar_flux_kernel, size_arr)...) + + tmp_upper_left = zero(similar(surface_flux_values)) # undef to zero + tmp_upper_right = zero(similar(surface_flux_values)) # undef to zero + tmp_lower_left = zero(similar(surface_flux_values)) # undef to zero + tmp_lower_right = zero(similar(surface_flux_values)) # undef to zero + + size_arr = CuArray{Float64}(undef, size(surface_flux_values, 1), size(surface_flux_values, 2)^2, + length(orientations)) + + mortar_flux_copy_to_kernel = @cuda launch=false mortar_flux_copy_to_kernel!(surface_flux_values, + tmp_surface_flux_values, + tmp_upper_left, + tmp_upper_right, + tmp_lower_left, + tmp_lower_right, + fstar_upper_left, + fstar_upper_right, + fstar_lower_left, + fstar_lower_right, + reverse_upper, + reverse_lower, + neighbor_ids, + large_sides, + orientations) + mortar_flux_copy_to_kernel(surface_flux_values, tmp_surface_flux_values, tmp_upper_left, + tmp_upper_right, tmp_lower_left, tmp_lower_right, fstar_upper_left, + fstar_upper_right, fstar_lower_left, fstar_lower_right, + reverse_upper, + reverse_lower, neighbor_ids, large_sides, orientations; + configurator_3d(mortar_flux_copy_to_kernel, size_arr)...) + + cache.elements.surface_flux_values = surface_flux_values # copy back to host automatically end function cuda_mortar_flux!(mesh::TreeMesh{3}, cache_mortars::True, nonconservative_terms::True, @@ -911,6 +1130,9 @@ function rhs_gpu!(du_cpu, u_cpu, t, mesh::TreeMesh{3}, equations, initial_condit cuda_prolong2mortars!(u, mesh, cache.mortars, dg, cache) + cuda_mortar_flux!(mesh, cache.mortars, have_nonconservative_terms(equations), + equations, dg, cache) + cuda_surface_integral!(du, mesh, equations, dg, cache) cuda_jacobian!(du, mesh, equations, cache) From 52074eb8bb1a10985ba199c665948e22dfe8c55e Mon Sep 17 00:00:00 2001 From: huiyuxie Date: Thu, 29 Aug 2024 23:00:35 -1000 Subject: [PATCH 3/4] add tests for mortar flux kernel --- test/test_adevction_mortar.jl | 26 ++++++++++++++++++++++++- test/test_advection_basic.jl | 11 +++++++++++ test/test_euler_ec.jl | 11 +++++++++++ test/test_euler_source_terms.jl | 11 +++++++++++ test/test_hypdiff_nonperiodic.jl | 11 +++++++++++ test/test_script.jl | 16 ++++++++------- test/test_shallowwater_well_balanced.jl | 11 +++++++++++ 7 files changed, 89 insertions(+), 8 deletions(-) diff --git a/test/test_adevction_mortar.jl b/test/test_adevction_mortar.jl index 255602ed..6b106b96 100644 --- a/test/test_adevction_mortar.jl +++ b/test/test_adevction_mortar.jl @@ -245,7 +245,31 @@ isdir(outdir) && rm(outdir, recursive = true) @test u_lower_left_gpu ≈ u_lower_left @test u_lower_right_gpu ≈ u_lower_right - # To be implemented + # Test `cuda_mortar_flux!` + TrixiGPU.cuda_mortar_flux!(mesh_gpu, TrixiGPU.check_cache_mortars(cache_gpu), + Trixi.have_nonconservative_terms(equations_gpu), equations_gpu, + solver_gpu, cache_gpu) + Trixi.calc_mortar_flux!(cache.elements.surface_flux_values, mesh, + Trixi.have_nonconservative_terms(equations), equations, + solver.mortar, solver.surface_integral, solver, cache) + surface_flux_values_gpu = replace(cache_gpu.elements.surface_flux_values, NaN => 0.0) + surface_flux_values = replace(cache.elements.surface_flux_values, NaN => 0.0) + @test surface_flux_values_gpu ≈ surface_flux_values + + # Test `cuda_surface_integral!` + TrixiGPU.cuda_surface_integral!(du_gpu, mesh_gpu, equations_gpu, solver_gpu, cache_gpu) + Trixi.calc_surface_integral!(du, u, mesh, equations, solver.surface_integral, solver, cache) + @test CUDA.@allowscalar du_gpu ≈ du + + # Test `cuda_jacobian!` + TrixiGPU.cuda_jacobian!(du_gpu, mesh_gpu, equations_gpu, cache_gpu) + Trixi.apply_jacobian!(du, mesh, equations, solver, cache) + @test CUDA.@allowscalar du_gpu ≈ du + + # Test `cuda_sources!` + TrixiGPU.cuda_sources!(du_gpu, u_gpu, t_gpu, source_terms_gpu, equations_gpu, cache_gpu) + Trixi.calc_sources!(du, u, t, source_terms, equations, solver, cache) + @test CUDA.@allowscalar du_gpu ≈ du # Copy data back to host du_cpu, u_cpu = TrixiGPU.copy_to_host!(du_gpu, u_gpu) diff --git a/test/test_advection_basic.jl b/test/test_advection_basic.jl index 6f66ae52..7d8de9b3 100644 --- a/test/test_advection_basic.jl +++ b/test/test_advection_basic.jl @@ -335,6 +335,17 @@ isdir(outdir) && rm(outdir, recursive = true) @test u_lower_left_gpu ≈ u_lower_left @test u_lower_right_gpu ≈ u_lower_right + # Test `cuda_mortar_flux!` + TrixiGPU.cuda_mortar_flux!(mesh_gpu, TrixiGPU.check_cache_mortars(cache_gpu), + Trixi.have_nonconservative_terms(equations_gpu), equations_gpu, + solver_gpu, cache_gpu) + Trixi.calc_mortar_flux!(cache.elements.surface_flux_values, mesh, + Trixi.have_nonconservative_terms(equations), equations, + solver.mortar, solver.surface_integral, solver, cache) + surface_flux_values_gpu = replace(cache_gpu.elements.surface_flux_values, NaN => 0.0) + surface_flux_values = replace(cache.elements.surface_flux_values, NaN => 0.0) + @test surface_flux_values_gpu ≈ surface_flux_values + # Test `cuda_surface_integral!` TrixiGPU.cuda_surface_integral!(du_gpu, mesh_gpu, equations_gpu, solver_gpu, cache_gpu) Trixi.calc_surface_integral!(du, u, mesh, equations, solver.surface_integral, solver, cache) diff --git a/test/test_euler_ec.jl b/test/test_euler_ec.jl index a4a138ac..a8c9a25c 100644 --- a/test/test_euler_ec.jl +++ b/test/test_euler_ec.jl @@ -339,6 +339,17 @@ isdir(outdir) && rm(outdir, recursive = true) @test u_lower_left_gpu ≈ u_lower_left @test u_lower_right_gpu ≈ u_lower_right + # Test `cuda_mortar_flux!` + TrixiGPU.cuda_mortar_flux!(mesh_gpu, TrixiGPU.check_cache_mortars(cache_gpu), + Trixi.have_nonconservative_terms(equations_gpu), equations_gpu, + solver_gpu, cache_gpu) + Trixi.calc_mortar_flux!(cache.elements.surface_flux_values, mesh, + Trixi.have_nonconservative_terms(equations), equations, + solver.mortar, solver.surface_integral, solver, cache) + surface_flux_values_gpu = replace(cache_gpu.elements.surface_flux_values, NaN => 0.0) + surface_flux_values = replace(cache.elements.surface_flux_values, NaN => 0.0) + @test surface_flux_values_gpu ≈ surface_flux_values + # Test `cuda_surface_integral!` TrixiGPU.cuda_surface_integral!(du_gpu, mesh_gpu, equations_gpu, solver_gpu, cache_gpu) Trixi.calc_surface_integral!(du, u, mesh, equations, solver.surface_integral, solver, cache) diff --git a/test/test_euler_source_terms.jl b/test/test_euler_source_terms.jl index 5c3ee822..7a62255d 100644 --- a/test/test_euler_source_terms.jl +++ b/test/test_euler_source_terms.jl @@ -338,6 +338,17 @@ isdir(outdir) && rm(outdir, recursive = true) @test u_lower_left_gpu ≈ u_lower_left @test u_lower_right_gpu ≈ u_lower_right + # Test `cuda_mortar_flux!` + TrixiGPU.cuda_mortar_flux!(mesh_gpu, TrixiGPU.check_cache_mortars(cache_gpu), + Trixi.have_nonconservative_terms(equations_gpu), equations_gpu, + solver_gpu, cache_gpu) + Trixi.calc_mortar_flux!(cache.elements.surface_flux_values, mesh, + Trixi.have_nonconservative_terms(equations), equations, + solver.mortar, solver.surface_integral, solver, cache) + surface_flux_values_gpu = replace(cache_gpu.elements.surface_flux_values, NaN => 0.0) + surface_flux_values = replace(cache.elements.surface_flux_values, NaN => 0.0) + @test surface_flux_values_gpu ≈ surface_flux_values + # Test `cuda_surface_integral!` TrixiGPU.cuda_surface_integral!(du_gpu, mesh_gpu, equations_gpu, solver_gpu, cache_gpu) Trixi.calc_surface_integral!(du, u, mesh, equations, solver.surface_integral, solver, cache) diff --git a/test/test_hypdiff_nonperiodic.jl b/test/test_hypdiff_nonperiodic.jl index 77b05cac..a6e9b21f 100644 --- a/test/test_hypdiff_nonperiodic.jl +++ b/test/test_hypdiff_nonperiodic.jl @@ -357,6 +357,17 @@ isdir(outdir) && rm(outdir, recursive = true) @test u_lower_left_gpu ≈ u_lower_left @test u_lower_right_gpu ≈ u_lower_right + # Test `cuda_mortar_flux!` + TrixiGPU.cuda_mortar_flux!(mesh_gpu, TrixiGPU.check_cache_mortars(cache_gpu), + Trixi.have_nonconservative_terms(equations_gpu), equations_gpu, + solver_gpu, cache_gpu) + Trixi.calc_mortar_flux!(cache.elements.surface_flux_values, mesh, + Trixi.have_nonconservative_terms(equations), equations, + solver.mortar, solver.surface_integral, solver, cache) + surface_flux_values_gpu = replace(cache_gpu.elements.surface_flux_values, NaN => 0.0) + surface_flux_values = replace(cache.elements.surface_flux_values, NaN => 0.0) + @test surface_flux_values_gpu ≈ surface_flux_values + # Test `cuda_surface_integral!` TrixiGPU.cuda_surface_integral!(du_gpu, mesh_gpu, equations_gpu, solver_gpu, cache_gpu) Trixi.calc_surface_integral!(du, u, mesh, equations, solver.surface_integral, solver, cache) diff --git a/test/test_script.jl b/test/test_script.jl index 8bd25ce4..78eec607 100644 --- a/test/test_script.jl +++ b/test/test_script.jl @@ -3,16 +3,18 @@ using OrdinaryDiffEq using CUDA using Test -advection_velocity = (0.2, -0.7) -equations = LinearScalarAdvectionEquation2D(advection_velocity) +advection_velocity = (0.2, -0.7, 0.5) +equations = LinearScalarAdvectionEquation3D(advection_velocity) initial_condition = initial_condition_convergence_test solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs) -coordinates_min = (-1.0, -1.0) -coordinates_max = (1.0, 1.0) -refinement_patches = ((type = "box", coordinates_min = (0.0, -1.0), - coordinates_max = (1.0, 1.0)),) +coordinates_min = (-1.0, -1.0, -1.0) +coordinates_max = (1.0, 1.0, 1.0) +refinement_patches = ((type = "box", coordinates_min = (0.0, -1.0, -1.0), + coordinates_max = (1.0, 1.0, 1.0)), + (type = "box", coordinates_min = (0.0, -0.5, -0.5), + coordinates_max = (0.5, 0.5, 0.5))) mesh = TreeMesh(coordinates_min, coordinates_max, initial_refinement_level = 2, refinement_patches = refinement_patches, @@ -29,7 +31,7 @@ initial_condition_gpu, boundary_conditions_gpu, source_terms_gpu = deepcopy(init solver_gpu, cache_gpu = deepcopy(solver), deepcopy(cache) t = t_gpu = 0.0 -tspan = (0.0, 1.0) +tspan = (0.0, 5.0) ode = semidiscretize(semi, tspan) u_ode = copy(ode.u0) diff --git a/test/test_shallowwater_well_balanced.jl b/test/test_shallowwater_well_balanced.jl index 18345712..9ab2fca9 100644 --- a/test/test_shallowwater_well_balanced.jl +++ b/test/test_shallowwater_well_balanced.jl @@ -238,6 +238,17 @@ isdir(outdir) && rm(outdir, recursive = true) @test u_upper_gpu ≈ u_upper @test u_lower_gpu ≈ u_lower + # Test `cuda_mortar_flux!` + TrixiGPU.cuda_mortar_flux!(mesh_gpu, TrixiGPU.check_cache_mortars(cache_gpu), + Trixi.have_nonconservative_terms(equations_gpu), equations_gpu, + solver_gpu, cache_gpu) + Trixi.calc_mortar_flux!(cache.elements.surface_flux_values, mesh, + Trixi.have_nonconservative_terms(equations), equations, + solver.mortar, solver.surface_integral, solver, cache) + surface_flux_values_gpu = replace(cache_gpu.elements.surface_flux_values, NaN => 0.0) + surface_flux_values = replace(cache.elements.surface_flux_values, NaN => 0.0) + @test surface_flux_values_gpu ≈ surface_flux_values + # Error when testing please check # # Test `cuda_surface_integral!` From 8e31fddcabc6be88d144277c1bdb9bed2e6c76e6 Mon Sep 17 00:00:00 2001 From: huiyuxie Date: Thu, 29 Aug 2024 23:26:36 -1000 Subject: [PATCH 4/4] add examples and update README.md --- README.md | 4 +-- examples/advection_mortar_2d.jl | 58 ++++++++++++++++++++++++++++++ examples/advection_mortar_3d.jl | 62 +++++++++++++++++++++++++++++++++ src/solvers/dg_3d.jl | 4 +-- 4 files changed, 124 insertions(+), 4 deletions(-) create mode 100644 examples/advection_mortar_2d.jl create mode 100644 examples/advection_mortar_3d.jl diff --git a/README.md b/README.md index c3dcb1ba..acfaa0df 100644 --- a/README.md +++ b/README.md @@ -73,8 +73,8 @@ Our current focus is on the semidiscretization of PDEs. The table below shows th # GPU Kernels to be Implemented Kernels left to be implemented on `TreeMesh` with `DGSEM`: - 1D Kernels: 1) `calc_volume_integral!` - `volume_integral::VolumeIntegralShockCapturingHG` -- 2D Kernels: 1) `calc_volume_integral!` - `volume_integral::VolumeIntegralShockCapturingHG`, 2) `calc_mortar_flux!` -- 3D Kernels: 1) `calc_volume_integral!` - `volume_integral::VolumeIntegralShockCapturingHG`, 2) `calc_mortar_flux!` +- 2D Kernels: 1) `calc_volume_integral!` - `volume_integral::VolumeIntegralShockCapturingHG`, 2) `calc_mortar_flux!`- `nonconservative_terms::True` +- 3D Kernels: 1) `calc_volume_integral!` - `volume_integral::VolumeIntegralShockCapturingHG`, 2) `calc_mortar_flux!` - `nonconservative_terms::True` # Show Your Support! We always welcome new people to join us, please feel free to contribute. Also, if you find this package interesting and inspiring, please give it a star. Thanks! diff --git a/examples/advection_mortar_2d.jl b/examples/advection_mortar_2d.jl new file mode 100644 index 00000000..68a2500f --- /dev/null +++ b/examples/advection_mortar_2d.jl @@ -0,0 +1,58 @@ +using Trixi, TrixiGPU +using OrdinaryDiffEq + +# The example is taken from the Trixi.jl + +############################################################################### +# semidiscretization of the linear advection equation + +advection_velocity = (0.2, -0.7) +equations = LinearScalarAdvectionEquation2D(advection_velocity) + +initial_condition = initial_condition_convergence_test +solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs) + +coordinates_min = (-1.0, -1.0) +coordinates_max = (1.0, 1.0) +refinement_patches = ((type = "box", coordinates_min = (0.0, -1.0), + coordinates_max = (1.0, 1.0)),) +mesh = TreeMesh(coordinates_min, coordinates_max, + initial_refinement_level = 2, + refinement_patches = refinement_patches, + n_cells_max = 10_000) + +semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) + +############################################################################### +# ODE solvers, callbacks etc. + +tspan = (0.0, 1.0) +ode = semidiscretize_gpu(semi, tspan) # from TrixiGPU.jl + +summary_callback = SummaryCallback() + +analysis_interval = 100 +analysis_callback = AnalysisCallback(semi, interval = analysis_interval, + extra_analysis_integrals = (entropy,)) + +alive_callback = AliveCallback(analysis_interval = analysis_interval) + +save_solution = SaveSolutionCallback(interval = 100, + save_initial_solution = true, + save_final_solution = true, + solution_variables = cons2prim) + +stepsize_callback = StepsizeCallback(cfl = 1.6) + +callbacks = CallbackSet(summary_callback, + analysis_callback, alive_callback, + save_solution, + stepsize_callback) + +############################################################################### +# run the simulation + +sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false), + dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback + save_everystep = false, callback = callbacks); +summary_callback() # print the timer summary diff --git a/examples/advection_mortar_3d.jl b/examples/advection_mortar_3d.jl new file mode 100644 index 00000000..70e28efa --- /dev/null +++ b/examples/advection_mortar_3d.jl @@ -0,0 +1,62 @@ + +using Trixi, TrixiGPU +using OrdinaryDiffEq + +# The example is taken from the Trixi.jl + +############################################################################### +# semidiscretization of the linear advection equation + +advection_velocity = (0.2, -0.7, 0.5) +equations = LinearScalarAdvectionEquation3D(advection_velocity) + +initial_condition = initial_condition_convergence_test +solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs) + +coordinates_min = (-1.0, -1.0, -1.0) +coordinates_max = (1.0, 1.0, 1.0) +refinement_patches = ((type = "box", coordinates_min = (0.0, -1.0, -1.0), + coordinates_max = (1.0, 1.0, 1.0)), + (type = "box", coordinates_min = (0.0, -0.5, -0.5), + coordinates_max = (0.5, 0.5, 0.5))) +mesh = TreeMesh(coordinates_min, coordinates_max, + initial_refinement_level = 2, + refinement_patches = refinement_patches, + n_cells_max = 10_000) + +semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) + +############################################################################### +# ODE solvers, callbacks etc. + +tspan = (0.0, 5.0) +ode = semidiscretize_gpu(semi, tspan) # from TrixiGPU.jl + +summary_callback = SummaryCallback() + +analysis_interval = 100 +analysis_callback = AnalysisCallback(semi, interval = analysis_interval, + extra_analysis_integrals = (entropy,)) + +alive_callback = AliveCallback(analysis_interval = analysis_interval) + +save_solution = SaveSolutionCallback(interval = 100, + save_initial_solution = true, + save_final_solution = true, + solution_variables = cons2prim) + +stepsize_callback = StepsizeCallback(cfl = 1.2) + +callbacks = CallbackSet(summary_callback, + analysis_callback, + alive_callback, + save_solution, + stepsize_callback) + +############################################################################### +# run the simulation + +sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false), + dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback + save_everystep = false, callback = callbacks); +summary_callback() # print the timer summary diff --git a/src/solvers/dg_3d.jl b/src/solvers/dg_3d.jl index 0e66c142..5c9f45d0 100644 --- a/src/solvers/dg_3d.jl +++ b/src/solvers/dg_3d.jl @@ -1128,9 +1128,9 @@ function rhs_gpu!(du_cpu, u_cpu, t, mesh::TreeMesh{3}, equations, initial_condit cuda_boundary_flux!(t, mesh, boundary_conditions, equations, dg, cache) - cuda_prolong2mortars!(u, mesh, cache.mortars, dg, cache) + cuda_prolong2mortars!(u, mesh, check_cache_mortars(cache), dg, cache) - cuda_mortar_flux!(mesh, cache.mortars, have_nonconservative_terms(equations), + cuda_mortar_flux!(mesh, check_cache_mortars(cache), have_nonconservative_terms(equations), equations, dg, cache) cuda_surface_integral!(du, mesh, equations, dg, cache)