From db199a5d411408349aa87cb460075235ed22c864 Mon Sep 17 00:00:00 2001 From: huiyuxie Date: Sat, 14 Sep 2024 18:25:26 -1000 Subject: [PATCH] Fix interface flux with true noncons --- src/solvers/dg_1d.jl | 44 ++++++++-------- src/solvers/dg_2d.jl | 44 ++++++++-------- src/solvers/dg_3d.jl | 8 +++ test/test_script.jl | 119 +++++++++++++++++++++---------------------- 4 files changed, 109 insertions(+), 106 deletions(-) diff --git a/src/solvers/dg_1d.jl b/src/solvers/dg_1d.jl index 1ff1b53..3a3b637 100644 --- a/src/solvers/dg_1d.jl +++ b/src/solvers/dg_1d.jl @@ -382,23 +382,23 @@ function surface_flux_kernel!(surface_flux_arr, interfaces_u, equations::Abstrac end # Kernel for calculating surface and both nonconservative fluxes -function surface_noncons_flux_kernel!(surface_flux_arr, interfaces_u, noncons_left_arr, - noncons_right_arr, equations::AbstractEquations{1}, +function surface_noncons_flux_kernel!(surface_flux_arr, noncons_left_arr, noncons_right_arr, + interfaces_u, equations::AbstractEquations{1}, surface_flux::Any, nonconservative_flux::Any) - k = (blockIdx().x - 1) * blockDim().x + threadIdx().x + j = (blockIdx().x - 1) * blockDim().x + threadIdx().x - if (k <= size(surface_flux_arr, 3)) - u_ll, u_rr = get_surface_node_vars(interfaces_u, equations, k) + if (j <= size(surface_flux_arr, 2)) + u_ll, u_rr = get_surface_node_vars(interfaces_u, equations, j) surface_flux_node = surface_flux(u_ll, u_rr, 1, equations) noncons_left_node = nonconservative_flux(u_ll, u_rr, 1, equations) noncons_right_node = nonconservative_flux(u_rr, u_ll, 1, equations) @inbounds begin - for jj in axes(surface_flux_arr, 2) - surface_flux_arr[1, jj, k] = surface_flux_node[jj] - noncons_left_arr[1, jj, k] = noncons_left_node[jj] - noncons_right_arr[1, jj, k] = noncons_right_node[jj] + for ii in axes(surface_flux_arr, 1) + surface_flux_arr[ii, j] = surface_flux_node[ii] + noncons_left_arr[ii, j] = noncons_left_node[ii] + noncons_right_arr[ii, j] = noncons_right_node[ii] end end end @@ -428,17 +428,17 @@ end function interface_flux_kernel!(surface_flux_values, surface_flux_arr, noncons_left_arr, noncons_right_arr, neighbor_ids) i = (blockIdx().x - 1) * blockDim().x + threadIdx().x - k = (blockIdx().y - 1) * blockDim().y + threadIdx().y + j = (blockIdx().y - 1) * blockDim().y + threadIdx().y - if (i <= size(surface_flux_values, 1) && k <= size(surface_flux_arr, 3)) - left_id = neighbor_ids[1, k] - right_id = neighbor_ids[2, k] + if (i <= size(surface_flux_values, 1) && j <= size(surface_flux_arr, 2)) + left_id = neighbor_ids[1, j] + right_id = neighbor_ids[2, j] @inbounds begin - surface_flux_values[i, 2, left_id] = surface_flux_arr[1, i, k] + - 0.5 * noncons_left_arr[1, i, k] # change back to `Float32` - surface_flux_values[i, 1, right_id] = surface_flux_arr[1, i, k] + - 0.5 * noncons_right_arr[1, i, k] # change back to `Float32` + surface_flux_values[i, 2, left_id] = surface_flux_arr[i, j] + + 0.5 * noncons_left_arr[i, j] # change back to `Float32` + surface_flux_values[i, 1, right_id] = surface_flux_arr[i, j] + + 0.5 * noncons_right_arr[i, j] # change back to `Float32` end end @@ -883,21 +883,21 @@ function cuda_interface_flux!(mesh::TreeMesh{1}, nonconservative_terms::True, eq neighbor_ids = CuArray{Int64}(cache.interfaces.neighbor_ids) interfaces_u = CuArray{Float64}(cache.interfaces.u) - surface_flux_arr = CuArray{Float64}(undef, 1, size(interfaces_u)[2:end]...) - noncons_left_arr = CuArray{Float64}(undef, 1, size(interfaces_u)[2:end]...) - noncons_right_arr = CuArray{Float64}(undef, 1, size(interfaces_u)[2:end]...) + surface_flux_arr = CuArray{Float64}(undef, size(interfaces_u)[2:end]...) + noncons_left_arr = CuArray{Float64}(undef, size(interfaces_u)[2:end]...) + noncons_right_arr = CuArray{Float64}(undef, size(interfaces_u)[2:end]...) surface_flux_values = CuArray{Float64}(cache.elements.surface_flux_values) size_arr = CuArray{Float64}(undef, size(interfaces_u, 3)) surface_noncons_flux_kernel = @cuda launch=false surface_noncons_flux_kernel!(surface_flux_arr, - interfaces_u, noncons_left_arr, noncons_right_arr, + interfaces_u, equations, surface_flux, nonconservative_flux) - surface_noncons_flux_kernel(surface_flux_arr, interfaces_u, noncons_left_arr, noncons_right_arr, + surface_noncons_flux_kernel(surface_flux_arr, noncons_left_arr, noncons_right_arr, interfaces_u, equations, surface_flux, nonconservative_flux; configurator_1d(surface_noncons_flux_kernel, size_arr)...) diff --git a/src/solvers/dg_2d.jl b/src/solvers/dg_2d.jl index 3c6b015..0335e33 100644 --- a/src/solvers/dg_2d.jl +++ b/src/solvers/dg_2d.jl @@ -367,15 +367,14 @@ function surface_flux_kernel!(surface_flux_arr, interfaces_u, orientations, end # Kernel for calculating surface and both nonconservative fluxes -function surface_noncons_flux_kernel!(surface_flux_arr, interfaces_u, noncons_left_arr, - noncons_right_arr, orientations, - equations::AbstractEquations{2}, surface_flux::Any, - nonconservative_flux::Any) - j2 = (blockIdx().x - 1) * blockDim().x + threadIdx().x +function surface_noncons_flux_kernel!(surface_flux_arr, noncons_left_arr, noncons_right_arr, + interfaces_u, orientations, equations::AbstractEquations{2}, + surface_flux::Any, nonconservative_flux::Any) + j = (blockIdx().x - 1) * blockDim().x + threadIdx().x k = (blockIdx().y - 1) * blockDim().y + threadIdx().y - if (j2 <= size(surface_flux_arr, 3) && k <= size(surface_flux_arr, 4)) - u_ll, u_rr = get_surface_node_vars(interfaces_u, equations, j2, k) + if (j <= size(surface_flux_arr, 2) && k <= size(surface_flux_arr, 3)) + u_ll, u_rr = get_surface_node_vars(interfaces_u, equations, j, k) orientation = orientations[k] surface_flux_node = surface_flux(u_ll, u_rr, orientation, equations) @@ -383,10 +382,10 @@ function surface_noncons_flux_kernel!(surface_flux_arr, interfaces_u, noncons_le noncons_right_node = nonconservative_flux(u_rr, u_ll, orientation, equations) @inbounds begin - for j1j1 in axes(surface_flux_arr, 2) - surface_flux_arr[1, j1j1, j2, k] = surface_flux_node[j1j1] - noncons_left_arr[1, j1j1, j2, k] = noncons_left_node[j1j1] - noncons_right_arr[1, j1j1, j2, k] = noncons_right_node[j1j1] + for ii in axes(surface_flux_arr, 1) + surface_flux_arr[ii, j, k] = surface_flux_node[ii] + noncons_left_arr[ii, j, k] = noncons_left_node[ii] + noncons_right_arr[ii, j, k] = noncons_right_node[ii] end end end @@ -426,9 +425,8 @@ function interface_flux_kernel!(surface_flux_values, surface_flux_arr, noncons_l j = (blockIdx().y - 1) * blockDim().y + threadIdx().y k = (blockIdx().z - 1) * blockDim().z + threadIdx().z - if (i <= size(surface_flux_values, 1) && - j <= size(surface_flux_arr, 3) && - k <= size(surface_flux_arr, 4)) + if (i <= size(surface_flux_values, 1) && j <= size(surface_flux_arr, 2) && + k <= size(surface_flux_arr, 3)) left_id = neighbor_ids[1, k] right_id = neighbor_ids[2, k] @@ -436,12 +434,12 @@ function interface_flux_kernel!(surface_flux_values, surface_flux_arr, noncons_l right_direction = 2 * orientations[k] - 1 @inbounds begin - surface_flux_values[i, j, left_direction, left_id] = surface_flux_arr[1, i, j, k] + + surface_flux_values[i, j, left_direction, left_id] = surface_flux_arr[i, j, k] + 0.5 * # change back to `Float32` - noncons_left_arr[1, i, j, k] - surface_flux_values[i, j, right_direction, right_id] = surface_flux_arr[1, i, j, k] + + noncons_left_arr[i, j, k] + surface_flux_values[i, j, right_direction, right_id] = surface_flux_arr[i, j, k] + 0.5 * # change back to `Float32` - noncons_right_arr[1, i, j, k] + noncons_right_arr[i, j, k] end end @@ -1075,22 +1073,22 @@ function cuda_interface_flux!(mesh::TreeMesh{2}, nonconservative_terms::True, eq neighbor_ids = CuArray{Int64}(cache.interfaces.neighbor_ids) orientations = CuArray{Int64}(cache.interfaces.orientations) interfaces_u = CuArray{Float64}(cache.interfaces.u) - surface_flux_arr = CuArray{Float64}(undef, 1, size(interfaces_u)[2:end]...) - noncons_left_arr = CuArray{Float64}(undef, 1, size(interfaces_u)[2:end]...) - noncons_right_arr = CuArray{Float64}(undef, 1, size(interfaces_u)[2:end]...) + surface_flux_arr = CuArray{Float64}(undef, size(interfaces_u)[2:end]...) + noncons_left_arr = CuArray{Float64}(undef, size(interfaces_u)[2:end]...) + noncons_right_arr = CuArray{Float64}(undef, size(interfaces_u)[2:end]...) surface_flux_values = CuArray{Float64}(cache.elements.surface_flux_values) size_arr = CuArray{Float64}(undef, size(interfaces_u, 3), size(interfaces_u, 4)) surface_noncons_flux_kernel = @cuda launch=false surface_noncons_flux_kernel!(surface_flux_arr, - interfaces_u, noncons_left_arr, noncons_right_arr, + interfaces_u, orientations, equations, surface_flux, nonconservative_flux) - surface_noncons_flux_kernel(surface_flux_arr, interfaces_u, noncons_left_arr, noncons_right_arr, + surface_noncons_flux_kernel(surface_flux_arr, noncons_left_arr, noncons_right_arr, interfaces_u, orientations, equations, surface_flux, nonconservative_flux; configurator_2d(surface_noncons_flux_kernel, size_arr)...) diff --git a/src/solvers/dg_3d.jl b/src/solvers/dg_3d.jl index a9b28a8..f75f392 100644 --- a/src/solvers/dg_3d.jl +++ b/src/solvers/dg_3d.jl @@ -1290,6 +1290,14 @@ function cuda_interface_flux!(mesh::TreeMesh{3}, nonconservative_terms::True, eq cache) surface_flux, nonconservative_flux = dg.surface_integral.surface_flux + neighbor_ids = CuArray{Int64}(cache.interfaces.neighbor_ids) + orientations = CuArray{Int64}(cache.interfaces.orientations) + interfaces_u = CuArray{Float64}(cache.interfaces.u) + surface_flux_arr = CuArray{Float64}(undef, size(interfaces_u)[2:end]...) + noncons_left_arr = CuArray{Float64}(undef, size(interfaces_u)[2:end]...) + noncons_right_arr = CuArray{Float64}(undef, size(interfaces_u)[2:end]...) + surface_flux_values = CuArray{Float64}(cache.elements.surface_flux_values) + return nothing end diff --git a/test/test_script.jl b/test/test_script.jl index c20dc37..1f85957 100644 --- a/test/test_script.jl +++ b/test/test_script.jl @@ -1,26 +1,23 @@ include("test_trixigpu.jl") -using CUDA +equations = ShallowWaterEquations2D(gravity_constant = 9.81) -equations = IdealGlmMhdEquations3D(5 / 3) +initial_condition = initial_condition_convergence_test # MMS EOC test -initial_condition = initial_condition_convergence_test - -volume_flux = (flux_hindenlang_gassner, flux_nonconservative_powell) +volume_flux = (flux_wintermeyer_etal, flux_nonconservative_wintermeyer_etal) solver = DGSEM(polydeg = 3, - surface_flux = (flux_lax_friedrichs, flux_nonconservative_powell), + surface_flux = (flux_lax_friedrichs, flux_nonconservative_fjordholm_etal), volume_integral = VolumeIntegralFluxDifferencing(volume_flux)) -coordinates_min = (-1.0, -1.0, -1.0) -coordinates_max = (1.0, 1.0, 1.0) +coordinates_min = (0.0, 0.0) +coordinates_max = (sqrt(2.0), sqrt(2.0)) mesh = TreeMesh(coordinates_min, coordinates_max, - initial_refinement_level = 2, - n_cells_max = 10_000) - -semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) + initial_refinement_level = 3, + n_cells_max = 10_000, + periodicity = true) -############################################################################### -# ODE solvers, callbacks etc. +semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver, + source_terms = source_terms_convergence_test) tspan = (0.0, 1.0) @@ -68,50 +65,50 @@ TrixiGPU.cuda_interface_flux!(mesh_gpu, Trixi.have_nonconservative_terms(equatio Trixi.calc_interface_flux!(cache.elements.surface_flux_values, mesh, Trixi.have_nonconservative_terms(equations), equations, solver.surface_integral, solver, cache) -# @test_approx cache_gpu.elements.surface_flux_values ≈ cache.elements.surface_flux_values - -# # Test `cuda_prolong2boundaries!` -# TrixiGPU.cuda_prolong2boundaries!(u_gpu, mesh_gpu, boundary_conditions_gpu, equations_gpu, -# cache_gpu) -# Trixi.prolong2boundaries!(cache, u, mesh, equations, solver.surface_integral, solver) -# @test_approx cache_gpu.boundaries.u ≈ cache.boundaries.u - -# # Test `cuda_boundary_flux!` -# TrixiGPU.cuda_boundary_flux!(t_gpu, mesh_gpu, boundary_conditions_gpu, -# Trixi.have_nonconservative_terms(equations_gpu), equations_gpu, -# solver_gpu, cache_gpu) -# Trixi.calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations, -# solver.surface_integral, solver) -# @test_approx cache_gpu.elements.surface_flux_values ≈ cache.elements.surface_flux_values - -# # Test `cuda_prolong2mortars!` -# TrixiGPU.cuda_prolong2mortars!(u_gpu, mesh_gpu, TrixiGPU.check_cache_mortars(cache_gpu), -# solver_gpu, cache_gpu) -# Trixi.prolong2mortars!(cache, u, mesh, equations, -# solver.mortar, solver.surface_integral, solver) -# @test_approx cache_gpu.mortars.u_upper ≈ cache.mortars.u_upper -# @test_approx cache_gpu.mortars.u_lower ≈ cache.mortars.u_lower - -# # Test `cuda_mortar_flux!` -# TrixiGPU.cuda_mortar_flux!(mesh_gpu, TrixiGPU.check_cache_mortars(cache_gpu), -# Trixi.have_nonconservative_terms(equations_gpu), equations_gpu, -# solver_gpu, cache_gpu) -# Trixi.calc_mortar_flux!(cache.elements.surface_flux_values, mesh, -# Trixi.have_nonconservative_terms(equations), equations, -# solver.mortar, solver.surface_integral, solver, cache) -# @test_approx cache_gpu.elements.surface_flux_values ≈ cache.elements.surface_flux_values - -# # Test `cuda_surface_integral!` -# TrixiGPU.cuda_surface_integral!(du_gpu, mesh_gpu, equations_gpu, solver_gpu, cache_gpu) -# Trixi.calc_surface_integral!(du, u, mesh, equations, solver.surface_integral, solver, cache) -# @test_approx du_gpu ≈ du - -# # Test `cuda_jacobian!` -# TrixiGPU.cuda_jacobian!(du_gpu, mesh_gpu, equations_gpu, cache_gpu) -# Trixi.apply_jacobian!(du, mesh, equations, solver, cache) -# @test_approx du_gpu ≈ du - -# # Test `cuda_sources!` -# TrixiGPU.cuda_sources!(du_gpu, u_gpu, t_gpu, source_terms_gpu, equations_gpu, cache_gpu) -# Trixi.calc_sources!(du, u, t, source_terms, equations, solver, cache) -# @test_approx du_gpu ≈ du +@test_approx cache_gpu.elements.surface_flux_values ≈ cache.elements.surface_flux_values + +# Test `cuda_prolong2boundaries!` +TrixiGPU.cuda_prolong2boundaries!(u_gpu, mesh_gpu, boundary_conditions_gpu, equations_gpu, + cache_gpu) +Trixi.prolong2boundaries!(cache, u, mesh, equations, solver.surface_integral, solver) +@test_approx cache_gpu.boundaries.u ≈ cache.boundaries.u + +# Test `cuda_boundary_flux!` +TrixiGPU.cuda_boundary_flux!(t_gpu, mesh_gpu, boundary_conditions_gpu, + Trixi.have_nonconservative_terms(equations_gpu), equations_gpu, + solver_gpu, cache_gpu) +Trixi.calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations, + solver.surface_integral, solver) +@test_approx cache_gpu.elements.surface_flux_values ≈ cache.elements.surface_flux_values + +# Test `cuda_prolong2mortars!` +TrixiGPU.cuda_prolong2mortars!(u_gpu, mesh_gpu, TrixiGPU.check_cache_mortars(cache_gpu), + solver_gpu, cache_gpu) +Trixi.prolong2mortars!(cache, u, mesh, equations, + solver.mortar, solver.surface_integral, solver) +@test_approx cache_gpu.mortars.u_upper ≈ cache.mortars.u_upper +@test_approx cache_gpu.mortars.u_lower ≈ cache.mortars.u_lower + +# Test `cuda_mortar_flux!` +TrixiGPU.cuda_mortar_flux!(mesh_gpu, TrixiGPU.check_cache_mortars(cache_gpu), + Trixi.have_nonconservative_terms(equations_gpu), equations_gpu, + solver_gpu, cache_gpu) +Trixi.calc_mortar_flux!(cache.elements.surface_flux_values, mesh, + Trixi.have_nonconservative_terms(equations), equations, + solver.mortar, solver.surface_integral, solver, cache) +@test_approx cache_gpu.elements.surface_flux_values ≈ cache.elements.surface_flux_values + +# Test `cuda_surface_integral!` +TrixiGPU.cuda_surface_integral!(du_gpu, mesh_gpu, equations_gpu, solver_gpu, cache_gpu) +Trixi.calc_surface_integral!(du, u, mesh, equations, solver.surface_integral, solver, cache) +@test_approx du_gpu ≈ du + +# Test `cuda_jacobian!` +TrixiGPU.cuda_jacobian!(du_gpu, mesh_gpu, equations_gpu, cache_gpu) +Trixi.apply_jacobian!(du, mesh, equations, solver, cache) +@test_approx du_gpu ≈ du + +# Test `cuda_sources!` +TrixiGPU.cuda_sources!(du_gpu, u_gpu, t_gpu, source_terms_gpu, equations_gpu, cache_gpu) +Trixi.calc_sources!(du, u, t, source_terms, equations, solver, cache) +@test_approx du_gpu ≈ du