From a015ecb40d34239f0c2f064dade5f7a633498074 Mon Sep 17 00:00:00 2001 From: huiyuxie Date: Sun, 8 Sep 2024 20:39:43 -1000 Subject: [PATCH 1/5] Start --- src/solvers/dg_1d.jl | 6 ++++++ src/solvers/dg_2d.jl | 6 ++++++ src/solvers/dg_3d.jl | 22 +++++++++++++++++++++- 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/src/solvers/dg_1d.jl b/src/solvers/dg_1d.jl index c8e7963..c72c1e7 100644 --- a/src/solvers/dg_1d.jl +++ b/src/solvers/dg_1d.jl @@ -645,6 +645,12 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms:: return nothing end +# Pack kernels for calculating volume integrals +function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms::True, equations, + volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM, cache) + return nothing +end + # Pack kernels for prolonging solution to interfaces function cuda_prolong2interfaces!(u, mesh::TreeMesh{1}, equations, cache) neighbor_ids = CuArray{Int64}(cache.interfaces.neighbor_ids) diff --git a/src/solvers/dg_2d.jl b/src/solvers/dg_2d.jl index 1feb5a6..928de48 100644 --- a/src/solvers/dg_2d.jl +++ b/src/solvers/dg_2d.jl @@ -996,6 +996,12 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms:: return nothing end +# Pack kernels for calculating volume integrals +function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms::True, equations, + volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM, cache) + return nothing +end + # Pack kernels for prolonging solution to interfaces function cuda_prolong2interfaces!(u, mesh::TreeMesh{2}, equations, cache) neighbor_ids = CuArray{Int64}(cache.interfaces.neighbor_ids) diff --git a/src/solvers/dg_3d.jl b/src/solvers/dg_3d.jl index 8c0197e..e280464 100644 --- a/src/solvers/dg_3d.jl +++ b/src/solvers/dg_3d.jl @@ -937,6 +937,7 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms, return nothing end +# Pack kernels for calculating volume integrals function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms::False, equations, volume_integral::VolumeIntegralFluxDifferencing, dg::DGSEM, cache) volume_flux = volume_integral.volume_flux @@ -971,12 +972,14 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms:: return nothing end +# Pack kernels for calculating volume integrals function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms::True, equations, volume_integral::VolumeIntegralFluxDifferencing, dg::DGSEM, cache) # Wait for the unmutable MHD implementation in Trixi.jl return nothing end +# Pack kernels for calculating volume integrals function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms::False, equations, volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM, cache) volume_flux_dg, volume_flux_fv = dg.volume_integral.volume_flux_dg, @@ -1073,6 +1076,13 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms:: return nothing end +# Pack kernels for calculating volume integrals +function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms::True, equations, + volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM, cache) + # Wait for the unmutable MHD implementation in Trixi.jl + return nothing +end + # Pack kernels to prolonging solution to interfaces function cuda_prolong2interfaces!(u, mesh::TreeMesh{3}, equations, cache) neighbor_ids = CuArray{Int64}(cache.interfaces.neighbor_ids) @@ -1169,7 +1179,7 @@ end # Pack kernels for calculating boundary fluxes function cuda_boundary_flux!(t, mesh::TreeMesh{3}, boundary_conditions::NamedTuple, - nonconservative_terms, equations, dg::DGSEM, cache) + nonconservative_terms::False, equations, dg::DGSEM, cache) surface_flux = dg.surface_integral.surface_flux n_boundaries_per_direction = CuArray{Int64}(cache.boundaries.n_boundaries_per_direction) @@ -1212,6 +1222,13 @@ function cuda_boundary_flux!(t, mesh::TreeMesh{3}, boundary_conditions::NamedTup return nothing end +# Pack kernels for calculating boundary fluxes +function cuda_boundary_flux!(t, mesh::TreeMesh{3}, boundary_conditions::NamedTuple, + nonconservative_terms::True, equations, dg::DGSEM, cache) + # Wait for the unmutable MHD implementation in Trixi.jl + return nothing +end + # Dummy function for asserting mortars function cuda_prolong2mortars!(u, mesh::TreeMesh{3}, cache_mortars::False, dg::DGSEM, cache) @assert iszero(length(cache.mortars.orientations)) @@ -1362,8 +1379,11 @@ function cuda_mortar_flux!(mesh::TreeMesh{3}, cache_mortars::True, nonconservati cache.elements.surface_flux_values = surface_flux_values # copy back to host automatically end +# Pack kernels for calculating mortar fluxes function cuda_mortar_flux!(mesh::TreeMesh{3}, cache_mortars::True, nonconservative_terms::True, equations, dg::DGSEM, cache) + # Wait for the unmutable MHD implementation in Trixi.jl + return nothing end # Pack kernels for calculating surface integrals From b892c5338aa7b443383d97aeb02a9efabcdb1ab9 Mon Sep 17 00:00:00 2001 From: huiyuxie Date: Tue, 10 Sep 2024 16:39:41 -1000 Subject: [PATCH 2/5] Add optimization comments --- src/solvers/common.jl | 2 +- src/solvers/dg_1d.jl | 107 +++++++++++++++++++ src/solvers/dg_2d.jl | 3 + src/solvers/dg_3d.jl | 3 + test/test_script.jl | 236 +++++++++++++++++++++++++----------------- 5 files changed, 254 insertions(+), 97 deletions(-) diff --git a/src/solvers/common.jl b/src/solvers/common.jl index 2bc6603..c7ce5bd 100644 --- a/src/solvers/common.jl +++ b/src/solvers/common.jl @@ -49,7 +49,7 @@ function pure_blended_element_count_kernel!(element_ids_dg, element_ids_dgfv, al if (i <= length(alpha)) dg_only = isapprox(alpha[i], 0, atol = atol) - if dg_only + if dg_only # bad element_ids_dg[i] = i else element_ids_dgfv[i] = i diff --git a/src/solvers/dg_1d.jl b/src/solvers/dg_1d.jl index c72c1e7..4486c8d 100644 --- a/src/solvers/dg_1d.jl +++ b/src/solvers/dg_1d.jl @@ -149,6 +149,9 @@ function volume_flux_dgfv_kernel!(volume_flux_arr, fstar1_L, fstar1_R, u, element_dgfv = element_ids_dgfv[k] # check if `element_dgfv` is zero + # The sets of `get_node_vars` operations may be combined + # into a single set of operation for better performance (to be explored) + u_node = get_node_vars(u, equations, j1, k) u_node1 = get_node_vars(u, equations, j2, k) @@ -645,9 +648,113 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms:: return nothing end +# Kernel for calculating pure DG and DG-FV volume fluxes +function volume_flux_dgfv_kernel!(volume_flux_arr, noncons_flux_arr, fstar1_L, fstar1_R, u, + element_ids_dgfv, derivative_split, + equations::AbstractEquations{1}, + volume_flux_dg::Any, nonconservative_flux_dg::Any, + volume_flux_fv::Any, nonconservative_flux_fv::Any) + j = (blockIdx().x - 1) * blockDim().x + threadIdx().x + k = (blockIdx().y - 1) * blockDim().y + threadIdx().y + + if (j <= size(u, 2)^2 && k <= size(u, 3)) + # length(element_ids_dgfv) == size(u, 3) + + j1 = div(j - 1, size(u, 2)) + 1 + j2 = rem(j - 1, size(u, 2)) + 1 + + element_dgfv = element_ids_dgfv[k] # check if `element_dgfv` is zero + + # The sets of `get_node_vars` operations may be combined + # into a single set of operation for better performance (to be explored) + + u_node = get_node_vars(u, equations, j1, k) + u_node1 = get_node_vars(u, equations, j2, k) + + volume_flux_node = volume_flux_dg(u_node, u_node1, 1, equations) + noncons_flux_node = nonconservative_flux_dg(u_node, u_node1, 1, equations) + + @inbounds begin + for ii in axes(u, 1) + volume_flux_arr[ii, j1, j2, k] = volume_flux_node[ii] + noncons_flux_arr[ii, j1, j2, k] = noncons_flux_node[ii] * derivative_split[j1, j2] + end + end + + if j1 !== 1 && j2 === 1 && element_dgfv !== 0 # bad + u_ll = get_node_vars(u, equations, j1 - 1, element_dgfv) + u_rr = get_node_vars(u, equations, j1, element_dgfv) + + f1_node = volume_flux_fv(u_ll, u_rr, 1, equations) + + f1_L_node = nonconservative_flux_fv(u_ll, u_rr, 1, equations) + f1_R_node = nonconservative_flux_fv(u_rr, u_ll, 1, equations) + + @inbounds begin + for ii in axes(u, 1) + fstar1_L[ii, j1, element_dgfv] = f1_node[ii] + 0.5 * f1_L_node[ii] + fstar1_R[ii, j1, element_dgfv] = f1_node[ii] + 0.5 * f1_R_node[ii] + end + end + end + end + + return nothing +end + # Pack kernels for calculating volume integrals function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms::True, equations, volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM, cache) + volume_flux_dg, nonconservative_flux_dg = dg.volume_integral.volume_flux_dg + volume_flux_fv, nonconservative_flux_fv = dg.volume_integral.volume_flux_fv + indicator = dg.volume_integral.indicator + + # TODO: Get copies of `u` and `du` on both device and host + alpha = indicator(Array(u), mesh, equations, dg, cache) + alpha = CuArray{Float64}(alpha) + + # For `Float64`, this gives 1.8189894035458565e-12 + # For `Float32`, this gives 1.1920929f-5 + atol = 1.8189894035458565e-12 # Ref: `pure_and_blended_element_ids!` in Trixi.jl + + element_ids_dg = zero(CuArray{Int64}(undef, length(alpha))) + element_ids_dgfv = zero(CuArray{Int64}(undef, length(alpha))) + + pure_blended_element_count_kernel = @cuda launch=false pure_blended_element_count_kernel!(element_ids_dg, + element_ids_dgfv, + alpha, + atol) + pure_blended_element_count_kernel(element_ids_dg, element_ids_dgfv, alpha, atol; + configurator_1d(pure_blended_element_count_kernel, alpha)...) + + derivative_split = dg.basis.derivative_split + set_diagonal_to_zero!(derivative_split) # temporarily set here, maybe move outside `rhs!` + + derivative_split = CuArray{Float64}(derivative_split) + volume_flux_arr = CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 3)) + noncons_flux_arr = CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 3)) + + inverse_weights = CuArray{Float64}(dg.basis.inverse_weights) + fstar1_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 3))) + fstar1_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 3))) + + size_arr = CuArray{Float64}(undef, size(u, 2)^2, size(u, 3)) + + volume_flux_dgfv_kernel = @cuda launch=false volume_flux_dgfv_kernel!(volume_flux_arr, + noncons_flux_arr, + fstar1_L, fstar1_R, u, + element_ids_dgfv, + derivative_split, + equations, volume_flux_dg, + nonconservative_flux_dg, + volume_flux_fv, + nonconservative_flux_fv) + volume_flux_dgfv_kernel(volume_flux_arr, noncons_flux_arr, fstar1_L, fstar1_R, u, + element_ids_dgfv, + derivative_split, equations, volume_flux_dg, nonconservative_flux_dg, + volume_flux_fv, nonconservative_flux_fv; + configurator_2d(volume_flux_dgfv_kernel, size_arr)...) + return nothing end diff --git a/src/solvers/dg_2d.jl b/src/solvers/dg_2d.jl index 928de48..c8cc424 100644 --- a/src/solvers/dg_2d.jl +++ b/src/solvers/dg_2d.jl @@ -191,6 +191,9 @@ function volume_flux_dgfv_kernel!(volume_flux_arr1, volume_flux_arr2, fstar1_L, element_dgfv = element_ids_dgfv[k] # check if `element_dgfv` is zero + # The sets of `get_node_vars` operations may be combined + # into a single set of operation for better performance (to be explored) + u_node = get_node_vars(u, equations, j1, j2, k) u_node1 = get_node_vars(u, equations, j3, j2, k) u_node2 = get_node_vars(u, equations, j1, j3, k) diff --git a/src/solvers/dg_3d.jl b/src/solvers/dg_3d.jl index e280464..5f3cb98 100644 --- a/src/solvers/dg_3d.jl +++ b/src/solvers/dg_3d.jl @@ -143,6 +143,9 @@ function volume_flux_dgfv_kernel!(volume_flux_arr1, volume_flux_arr2, volume_flu element_dgfv = element_ids_dgfv[k] # check if `element_dgfv` is zero + # The sets of `get_node_vars` operations may be combined + # into a single set of operation for better performance (to be explored) + u_node = get_node_vars(u, equations, j1, j2, j3, k) u_node1 = get_node_vars(u, equations, j4, j2, j3, k) u_node2 = get_node_vars(u, equations, j1, j4, j3, k) diff --git a/test/test_script.jl b/test/test_script.jl index 964a894..08e14df 100644 --- a/test/test_script.jl +++ b/test/test_script.jl @@ -3,37 +3,81 @@ using OrdinaryDiffEq using CUDA using Test -equations = CompressibleEulerEquations3D(1.4) +equations = ShallowWaterEquations1D(gravity_constant = 9.812, H0 = 1.75) -initial_condition = initial_condition_weak_blast_wave +# Initial condition with a truly discontinuous velocity and bottom topography. +# Works as intended for TreeMesh1D with `initial_refinement_level=3`. If the mesh +# refinement level is changed the initial condition below may need changed as well to +# ensure that the discontinuities lie on an element interface. +function initial_condition_stone_throw_discontinuous_bottom(x, t, + equations::ShallowWaterEquations1D) + + # Calculate primitive variables + + # flat lake + H = equations.H0 + + # Discontinuous velocity + v = 0.0 + if x[1] >= -0.75 && x[1] <= 0.0 + v = -1.0 + elseif x[1] >= 0.0 && x[1] <= 0.75 + v = 1.0 + end + + b = (1.5 / exp(0.5 * ((x[1] - 1.0)^2)) + + 0.75 / exp(0.5 * ((x[1] + 1.0)^2))) + + # Force a discontinuous bottom topography + if x[1] >= -1.5 && x[1] <= 0.0 + b = 0.5 + end + + return prim2cons(SVector(H, v, b), equations) +end + +initial_condition = initial_condition_stone_throw_discontinuous_bottom + +boundary_condition = boundary_condition_slip_wall + +############################################################################### +# Get the DG approximation space + +volume_flux = (flux_wintermeyer_etal, flux_nonconservative_wintermeyer_etal) +surface_flux = (FluxHydrostaticReconstruction(flux_lax_friedrichs, + hydrostatic_reconstruction_audusse_etal), + flux_nonconservative_audusse_etal) +basis = LobattoLegendreBasis(4) -surface_flux = flux_ranocha # OBS! Using a non-dissipative flux is only sensible to test EC, -# but not for real shock simulations -volume_flux = flux_ranocha -polydeg = 3 -basis = LobattoLegendreBasis(polydeg) indicator_sc = IndicatorHennemannGassner(equations, basis, alpha_max = 0.5, alpha_min = 0.001, alpha_smooth = true, - variable = density_pressure) + variable = waterheight_pressure) volume_integral = VolumeIntegralShockCapturingHG(indicator_sc; volume_flux_dg = volume_flux, volume_flux_fv = surface_flux) + solver = DGSEM(basis, surface_flux, volume_integral) -coordinates_min = (-2.0, -2.0, -2.0) -coordinates_max = (2.0, 2.0, 2.0) +############################################################################### +# Create the TreeMesh for the domain [-3, 3] + +coordinates_min = -3.0 +coordinates_max = 3.0 mesh = TreeMesh(coordinates_min, coordinates_max, initial_refinement_level = 3, - n_cells_max = 100_000) + n_cells_max = 10_000, + periodicity = false) -semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) +# create the semi discretization object +semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver, + boundary_conditions = boundary_condition) ############################################################################### -# ODE solvers, callbacks etc. +# ODE solver -tspan = (0.0, 0.4) +tspan = (0.0, 3.0) # Get CPU data (; mesh, equations, initial_condition, boundary_conditions, source_terms, solver, cache) = semi @@ -64,85 +108,85 @@ TrixiGPU.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu, Trixi.have_nonconservative_terms(equations_gpu), equations_gpu, solver_gpu.volume_integral, solver_gpu, cache_gpu) -Trixi.calc_volume_integral!(du, u, mesh, Trixi.have_nonconservative_terms(equations), - equations, solver.volume_integral, solver, cache) -@test CUDA.@allowscalar du_gpu ≈ du - -# Test `cuda_prolong2interfaces!` -TrixiGPU.cuda_prolong2interfaces!(u_gpu, mesh_gpu, equations_gpu, cache_gpu) -Trixi.prolong2interfaces!(cache, u, mesh, equations, solver.surface_integral, solver) -interfaces_u_gpu = replace(cache_gpu.interfaces.u, NaN => 0.0) -interfaces_u = replace(cache.interfaces.u, NaN => 0.0) -@test interfaces_u_gpu ≈ interfaces_u - -# Test `cuda_interface_flux!` -TrixiGPU.cuda_interface_flux!(mesh_gpu, Trixi.have_nonconservative_terms(equations_gpu), - equations_gpu, solver_gpu, cache_gpu) -Trixi.calc_interface_flux!(cache.elements.surface_flux_values, mesh, - Trixi.have_nonconservative_terms(equations), equations, - solver.surface_integral, solver, cache) -surface_flux_values_gpu = replace(cache_gpu.elements.surface_flux_values, NaN => 0.0) -surface_flux_values = replace(cache.elements.surface_flux_values, NaN => 0.0) -@test surface_flux_values_gpu ≈ surface_flux_values - -# Test `cuda_prolong2boundaries!` -TrixiGPU.cuda_prolong2boundaries!(u_gpu, mesh_gpu, boundary_conditions_gpu, equations_gpu, - cache_gpu) -Trixi.prolong2boundaries!(cache, u, mesh, equations, solver.surface_integral, solver) -boundaries_u_gpu = replace(cache_gpu.boundaries.u, NaN => 0.0) -boundaries_u = replace(cache.boundaries.u, NaN => 0.0) -@test boundaries_u_gpu ≈ boundaries_u - -# Test `cuda_boundary_flux!` -TrixiGPU.cuda_boundary_flux!(t_gpu, mesh_gpu, boundary_conditions_gpu, - Trixi.have_nonconservative_terms(equations_gpu), equations_gpu, - solver_gpu, cache_gpu) -Trixi.calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations, - solver.surface_integral, solver) -surface_flux_values_gpu = replace(cache_gpu.elements.surface_flux_values, NaN => 0.0) -surface_flux_values = replace(cache.elements.surface_flux_values, NaN => 0.0) -@test surface_flux_values_gpu ≈ surface_flux_values - -# Test `cuda_prolong2mortars!` -TrixiGPU.cuda_prolong2mortars!(u_gpu, mesh_gpu, TrixiGPU.check_cache_mortars(cache_gpu), - solver_gpu, cache_gpu) -Trixi.prolong2mortars!(cache, u, mesh, equations, - solver.mortar, solver.surface_integral, solver) -u_upper_left_gpu = replace(cache_gpu.mortars.u_upper_left, NaN => 0.0) -u_upper_right_gpu = replace(cache_gpu.mortars.u_upper_right, NaN => 0.0) -u_lower_left_gpu = replace(cache_gpu.mortars.u_lower_left, NaN => 0.0) -u_lower_right_gpu = replace(cache_gpu.mortars.u_lower_right, NaN => 0.0) -u_upper_left = replace(cache.mortars.u_upper_left, NaN => 0.0) -u_upper_right = replace(cache.mortars.u_upper_right, NaN => 0.0) -u_lower_left = replace(cache.mortars.u_lower_left, NaN => 0.0) -u_lower_right = replace(cache.mortars.u_lower_right, NaN => 0.0) -@test u_upper_left_gpu ≈ u_upper_left -@test u_upper_right_gpu ≈ u_upper_right -@test u_lower_left_gpu ≈ u_lower_left -@test u_lower_right_gpu ≈ u_lower_right - -# Test `cuda_mortar_flux!` -TrixiGPU.cuda_mortar_flux!(mesh_gpu, TrixiGPU.check_cache_mortars(cache_gpu), - Trixi.have_nonconservative_terms(equations_gpu), equations_gpu, - solver_gpu, cache_gpu) -Trixi.calc_mortar_flux!(cache.elements.surface_flux_values, mesh, - Trixi.have_nonconservative_terms(equations), equations, - solver.mortar, solver.surface_integral, solver, cache) -surface_flux_values_gpu = replace(cache_gpu.elements.surface_flux_values, NaN => 0.0) -surface_flux_values = replace(cache.elements.surface_flux_values, NaN => 0.0) -@test surface_flux_values_gpu ≈ surface_flux_values - -# Test `cuda_surface_integral!` -TrixiGPU.cuda_surface_integral!(du_gpu, mesh_gpu, equations_gpu, solver_gpu, cache_gpu) -Trixi.calc_surface_integral!(du, u, mesh, equations, solver.surface_integral, solver, cache) -@test CUDA.@allowscalar du_gpu ≈ du - -# Test `cuda_jacobian!` -TrixiGPU.cuda_jacobian!(du_gpu, mesh_gpu, equations_gpu, cache_gpu) -Trixi.apply_jacobian!(du, mesh, equations, solver, cache) -@test CUDA.@allowscalar du_gpu ≈ du - -# Test `cuda_sources!` -TrixiGPU.cuda_sources!(du_gpu, u_gpu, t_gpu, source_terms_gpu, equations_gpu, cache_gpu) -Trixi.calc_sources!(du, u, t, source_terms, equations, solver, cache) -@test CUDA.@allowscalar du_gpu ≈ du +# Trixi.calc_volume_integral!(du, u, mesh, Trixi.have_nonconservative_terms(equations), +# equations, solver.volume_integral, solver, cache) +# @test CUDA.@allowscalar du_gpu ≈ du + +# # Test `cuda_prolong2interfaces!` +# TrixiGPU.cuda_prolong2interfaces!(u_gpu, mesh_gpu, equations_gpu, cache_gpu) +# Trixi.prolong2interfaces!(cache, u, mesh, equations, solver.surface_integral, solver) +# interfaces_u_gpu = replace(cache_gpu.interfaces.u, NaN => 0.0) +# interfaces_u = replace(cache.interfaces.u, NaN => 0.0) +# @test interfaces_u_gpu ≈ interfaces_u + +# # Test `cuda_interface_flux!` +# TrixiGPU.cuda_interface_flux!(mesh_gpu, Trixi.have_nonconservative_terms(equations_gpu), +# equations_gpu, solver_gpu, cache_gpu) +# Trixi.calc_interface_flux!(cache.elements.surface_flux_values, mesh, +# Trixi.have_nonconservative_terms(equations), equations, +# solver.surface_integral, solver, cache) +# surface_flux_values_gpu = replace(cache_gpu.elements.surface_flux_values, NaN => 0.0) +# surface_flux_values = replace(cache.elements.surface_flux_values, NaN => 0.0) +# @test surface_flux_values_gpu ≈ surface_flux_values + +# # Test `cuda_prolong2boundaries!` +# TrixiGPU.cuda_prolong2boundaries!(u_gpu, mesh_gpu, boundary_conditions_gpu, equations_gpu, +# cache_gpu) +# Trixi.prolong2boundaries!(cache, u, mesh, equations, solver.surface_integral, solver) +# boundaries_u_gpu = replace(cache_gpu.boundaries.u, NaN => 0.0) +# boundaries_u = replace(cache.boundaries.u, NaN => 0.0) +# @test boundaries_u_gpu ≈ boundaries_u + +# # Test `cuda_boundary_flux!` +# TrixiGPU.cuda_boundary_flux!(t_gpu, mesh_gpu, boundary_conditions_gpu, +# Trixi.have_nonconservative_terms(equations_gpu), equations_gpu, +# solver_gpu, cache_gpu) +# Trixi.calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations, +# solver.surface_integral, solver) +# surface_flux_values_gpu = replace(cache_gpu.elements.surface_flux_values, NaN => 0.0) +# surface_flux_values = replace(cache.elements.surface_flux_values, NaN => 0.0) +# @test surface_flux_values_gpu ≈ surface_flux_values + +# # Test `cuda_prolong2mortars!` +# TrixiGPU.cuda_prolong2mortars!(u_gpu, mesh_gpu, TrixiGPU.check_cache_mortars(cache_gpu), +# solver_gpu, cache_gpu) +# Trixi.prolong2mortars!(cache, u, mesh, equations, +# solver.mortar, solver.surface_integral, solver) +# u_upper_left_gpu = replace(cache_gpu.mortars.u_upper_left, NaN => 0.0) +# u_upper_right_gpu = replace(cache_gpu.mortars.u_upper_right, NaN => 0.0) +# u_lower_left_gpu = replace(cache_gpu.mortars.u_lower_left, NaN => 0.0) +# u_lower_right_gpu = replace(cache_gpu.mortars.u_lower_right, NaN => 0.0) +# u_upper_left = replace(cache.mortars.u_upper_left, NaN => 0.0) +# u_upper_right = replace(cache.mortars.u_upper_right, NaN => 0.0) +# u_lower_left = replace(cache.mortars.u_lower_left, NaN => 0.0) +# u_lower_right = replace(cache.mortars.u_lower_right, NaN => 0.0) +# @test u_upper_left_gpu ≈ u_upper_left +# @test u_upper_right_gpu ≈ u_upper_right +# @test u_lower_left_gpu ≈ u_lower_left +# @test u_lower_right_gpu ≈ u_lower_right + +# # Test `cuda_mortar_flux!` +# TrixiGPU.cuda_mortar_flux!(mesh_gpu, TrixiGPU.check_cache_mortars(cache_gpu), +# Trixi.have_nonconservative_terms(equations_gpu), equations_gpu, +# solver_gpu, cache_gpu) +# Trixi.calc_mortar_flux!(cache.elements.surface_flux_values, mesh, +# Trixi.have_nonconservative_terms(equations), equations, +# solver.mortar, solver.surface_integral, solver, cache) +# surface_flux_values_gpu = replace(cache_gpu.elements.surface_flux_values, NaN => 0.0) +# surface_flux_values = replace(cache.elements.surface_flux_values, NaN => 0.0) +# @test surface_flux_values_gpu ≈ surface_flux_values + +# # Test `cuda_surface_integral!` +# TrixiGPU.cuda_surface_integral!(du_gpu, mesh_gpu, equations_gpu, solver_gpu, cache_gpu) +# Trixi.calc_surface_integral!(du, u, mesh, equations, solver.surface_integral, solver, cache) +# @test CUDA.@allowscalar du_gpu ≈ du + +# # Test `cuda_jacobian!` +# TrixiGPU.cuda_jacobian!(du_gpu, mesh_gpu, equations_gpu, cache_gpu) +# Trixi.apply_jacobian!(du, mesh, equations, solver, cache) +# @test CUDA.@allowscalar du_gpu ≈ du + +# # Test `cuda_sources!` +# TrixiGPU.cuda_sources!(du_gpu, u_gpu, t_gpu, source_terms_gpu, equations_gpu, cache_gpu) +# Trixi.calc_sources!(du, u, t, source_terms, equations, solver, cache) +# @test CUDA.@allowscalar du_gpu ≈ du From 144b2743d1e2a024017b7e004c2bf50b5dd76737 Mon Sep 17 00:00:00 2001 From: huiyuxie Date: Tue, 10 Sep 2024 18:04:42 -1000 Subject: [PATCH 3/5] Minor fix --- src/solvers/dg_1d.jl | 71 +++++++++++++++++-- src/solvers/dg_3d.jl | 5 +- ...odic.jl => test_shallowwater_dirichlet.jl} | 16 +---- 3 files changed, 71 insertions(+), 21 deletions(-) rename test/{test_shallowwater_nonperiodic.jl => test_shallowwater_dirichlet.jl} (96%) diff --git a/src/solvers/dg_1d.jl b/src/solvers/dg_1d.jl index 4486c8d..b6689f9 100644 --- a/src/solvers/dg_1d.jl +++ b/src/solvers/dg_1d.jl @@ -85,8 +85,9 @@ function symmetric_noncons_flux_kernel!(symmetric_flux_arr, noncons_flux_arr, u, @inbounds begin for ii in axes(u, 1) - symmetric_flux_arr[ii, j1, j2, k] = symmetric_flux_node[ii] - noncons_flux_arr[ii, j1, j2, k] = noncons_flux_node[ii] * derivative_split[j1, j2] + symmetric_flux_arr[ii, j1, j2, k] = symmetric_flux_node[ii] * + derivative_split[j1, j2] + noncons_flux_arr[ii, j1, j2, k] = noncons_flux_node[ii] end end end @@ -123,8 +124,8 @@ function volume_integral_kernel!(du, derivative_split, symmetric_flux_arr, nonco integral_contribution = 0.0 # change back to `Float32` for ii in axes(du, 2) - du[i, j, k] += derivative_split[j, ii] * symmetric_flux_arr[i, j, ii, k] - integral_contribution += noncons_flux_arr[i, j, ii, k] + du[i, j, k] += symmetric_flux_arr[i, j, ii, k] + integral_contribution += derivative_split[j, ii] * noncons_flux_arr[i, j, ii, k] end du[i, j, k] += 0.5 * integral_contribution # change back to `Float32` @@ -702,6 +703,63 @@ function volume_flux_dgfv_kernel!(volume_flux_arr, noncons_flux_arr, fstar1_L, f return nothing end +# # Kernel for calculating symmetric and nonconservative volume integrals +# function volume_integral_kernel!(du, derivative_split, symmetric_flux_arr, noncons_flux_arr) +# i = (blockIdx().x - 1) * blockDim().x + threadIdx().x +# j = (blockIdx().y - 1) * blockDim().y + threadIdx().y +# k = (blockIdx().z - 1) * blockDim().z + threadIdx().z + +# if (i <= size(du, 1) && j <= size(du, 2) && k <= size(du, 3)) +# @inbounds begin +# integral_contribution = 0.0 # change back to `Float32` + +# for ii in axes(du, 2) +# du[i, j, k] += derivative_split[j, ii] * symmetric_flux_arr[i, j, ii, k] +# integral_contribution += noncons_flux_arr[i, j, ii, k] +# end + +# du[i, j, k] += 0.5 * integral_contribution # change back to `Float32` +# end +# end + +# return nothing +# end + +# # Kernel for calculating DG volume integral contribution +# function volume_integral_dg_kernel!(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split, +# volume_flux_arr, noncons_flux_arr) +# i = (blockIdx().x - 1) * blockDim().x + threadIdx().x +# j = (blockIdx().y - 1) * blockDim().y + threadIdx().y +# k = (blockIdx().z - 1) * blockDim().z + threadIdx().z + +# if (i <= size(du, 1) && j <= size(du, 2) && k <= size(du, 3)) +# # length(element_ids_dg) == size(du, 3) +# # length(element_ids_dgfv) == size(du, 3) + +# element_dg = element_ids_dg[k] # check if `element_dg` is zero +# element_dgfv = element_ids_dgfv[k] # check if `element_dgfv` is zero +# alpha_element = alpha[k] + +# @inbounds begin +# if element_dg !== 0 # bad +# for ii in axes(du, 2) +# du[i, j, element_dg] += derivative_split[j, ii] * +# volume_flux_arr[i, j, ii, element_dg] +# end +# end + +# if element_dgfv !== 0 # bad +# for ii in axes(du, 2) +# du[i, j, element_dgfv] += (1 - alpha_element) * derivative_split[j, ii] * +# volume_flux_arr[i, j, ii, element_dgfv] +# end +# end +# end +# end + +# return nothing +# end + # Pack kernels for calculating volume integrals function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms::True, equations, volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM, cache) @@ -750,9 +808,8 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms:: volume_flux_fv, nonconservative_flux_fv) volume_flux_dgfv_kernel(volume_flux_arr, noncons_flux_arr, fstar1_L, fstar1_R, u, - element_ids_dgfv, - derivative_split, equations, volume_flux_dg, nonconservative_flux_dg, - volume_flux_fv, nonconservative_flux_fv; + element_ids_dgfv, derivative_split, equations, volume_flux_dg, + nonconservative_flux_dg, volume_flux_fv, nonconservative_flux_fv; configurator_2d(volume_flux_dgfv_kernel, size_arr)...) return nothing diff --git a/src/solvers/dg_3d.jl b/src/solvers/dg_3d.jl index 5f3cb98..08a5f40 100644 --- a/src/solvers/dg_3d.jl +++ b/src/solvers/dg_3d.jl @@ -945,7 +945,10 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms:: volume_integral::VolumeIntegralFluxDifferencing, dg::DGSEM, cache) volume_flux = volume_integral.volume_flux - derivative_split = CuArray{Float64}(dg.basis.derivative_split) + derivative_split = dg.basis.derivative_split + set_diagonal_to_zero!(derivative_split) # temporarily set here, maybe move outside `rhs!` + + derivative_split = CuArray{Float64}(derivative_split) volume_flux_arr1 = CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2), size(u, 2), size(u, 5)) volume_flux_arr2 = CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2), diff --git a/test/test_shallowwater_nonperiodic.jl b/test/test_shallowwater_dirichlet.jl similarity index 96% rename from test/test_shallowwater_nonperiodic.jl rename to test/test_shallowwater_dirichlet.jl index 93ef131..4469892 100644 --- a/test/test_shallowwater_nonperiodic.jl +++ b/test/test_shallowwater_dirichlet.jl @@ -11,19 +11,9 @@ isdir(outdir) && rm(outdir, recursive = true) # Test precision of the semidiscretization process @testset "Test Shallow Water" begin @testset "Shallow Water 1D" begin - equations = ShallowWaterEquations1D(gravity_constant = 1.0, H0 = 3.0) + equations = ShallowWaterEquations1D(gravity_constant = 9.81, H0 = 3.0) - function initial_condition_well_balancedness(x, t, equations::ShallowWaterEquations1D) - # Set the background values - H = equations.H0 - v = 0.0 - - b = (1.5 / exp(0.5 * ((x[1] - 1.0)^2)) + 0.75 / exp(0.5 * ((x[1] + 1.0)^2))) - - return prim2cons(SVector(H, v, b), equations) - end - - initial_condition = initial_condition_well_balancedness + initial_condition = initial_condition_convergence_test boundary_condition = BoundaryConditionDirichlet(initial_condition) @@ -43,7 +33,7 @@ isdir(outdir) && rm(outdir, recursive = true) semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver, boundary_conditions = boundary_condition) - tspan = (0.0, 100.0) + tspan = (0.0, 1.0) # Get CPU data (; mesh, equations, initial_condition, boundary_conditions, source_terms, solver, cache) = semi From 9838536ef92fcd7bfe40b7d370c9674f0ed28906 Mon Sep 17 00:00:00 2001 From: huiyuxie Date: Tue, 10 Sep 2024 18:20:57 -1000 Subject: [PATCH 4/5] Reformat --- examples/shallowwater_dirichlet_1d.jl | 17 +++-------------- test/test_shallowwater_dirichlet.jl | 1 - 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/examples/shallowwater_dirichlet_1d.jl b/examples/shallowwater_dirichlet_1d.jl index 276f002..97159c4 100644 --- a/examples/shallowwater_dirichlet_1d.jl +++ b/examples/shallowwater_dirichlet_1d.jl @@ -6,20 +6,9 @@ using OrdinaryDiffEq ############################################################################### # Semidiscretization of the shallow water equations -equations = ShallowWaterEquations1D(gravity_constant = 1.0, H0 = 3.0) +equations = ShallowWaterEquations1D(gravity_constant = 9.81, H0 = 3.0) -# An initial condition with constant total water height and zero velocities to test well-balancedness. -function initial_condition_well_balancedness(x, t, equations::ShallowWaterEquations1D) - # Set the background values - H = equations.H0 - v = 0.0 - - b = (1.5 / exp(0.5 * ((x[1] - 1.0)^2)) + 0.75 / exp(0.5 * ((x[1] + 1.0)^2))) - - return prim2cons(SVector(H, v, b), equations) -end - -initial_condition = initial_condition_well_balancedness +initial_condition = initial_condition_convergence_test boundary_condition = BoundaryConditionDirichlet(initial_condition) @@ -49,7 +38,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver, ############################################################################### # ODE solvers, callbacks etc. -tspan = (0.0, 100.0) +tspan = (0.0, 1.0) ode = semidiscretize_gpu(semi, tspan) # from TrixiGPU.jl summary_callback = SummaryCallback() diff --git a/test/test_shallowwater_dirichlet.jl b/test/test_shallowwater_dirichlet.jl index 7ddfab0..4469892 100644 --- a/test/test_shallowwater_dirichlet.jl +++ b/test/test_shallowwater_dirichlet.jl @@ -11,7 +11,6 @@ isdir(outdir) && rm(outdir, recursive = true) # Test precision of the semidiscretization process @testset "Test Shallow Water" begin @testset "Shallow Water 1D" begin - equations = ShallowWaterEquations1D(gravity_constant = 9.81, H0 = 3.0) initial_condition = initial_condition_convergence_test From fb45900979ea16548d9aaba5f08bb22df085cda1 Mon Sep 17 00:00:00 2001 From: huiyuxie Date: Wed, 11 Sep 2024 22:40:53 -1000 Subject: [PATCH 5/5] Complete 1D --- src/solvers/dg_1d.jl | 242 ++++++++++++----------- src/solvers/dg_2d.jl | 8 +- src/solvers/dg_3d.jl | 8 +- test/test_script.jl | 85 +------- test/test_shallowwater_shockcapturing.jl | 118 +++++++++++ 5 files changed, 258 insertions(+), 203 deletions(-) create mode 100644 test/test_shallowwater_shockcapturing.jl diff --git a/src/solvers/dg_1d.jl b/src/solvers/dg_1d.jl index b6689f9..26f8b7c 100644 --- a/src/solvers/dg_1d.jl +++ b/src/solvers/dg_1d.jl @@ -181,9 +181,63 @@ function volume_flux_dgfv_kernel!(volume_flux_arr, fstar1_L, fstar1_R, u, return nothing end +# Kernel for calculating pure DG and DG-FV volume fluxes +function volume_flux_dgfv_kernel!(volume_flux_arr, noncons_flux_arr, fstar1_L, fstar1_R, u, + element_ids_dgfv, derivative_split, + equations::AbstractEquations{1}, + volume_flux_dg::Any, nonconservative_flux_dg::Any, + volume_flux_fv::Any, nonconservative_flux_fv::Any) + j = (blockIdx().x - 1) * blockDim().x + threadIdx().x + k = (blockIdx().y - 1) * blockDim().y + threadIdx().y + + if (j <= size(u, 2)^2 && k <= size(u, 3)) + # length(element_ids_dgfv) == size(u, 3) + + j1 = div(j - 1, size(u, 2)) + 1 + j2 = rem(j - 1, size(u, 2)) + 1 + + element_dgfv = element_ids_dgfv[k] # check if `element_dgfv` is zero + + # The sets of `get_node_vars` operations may be combined + # into a single set of operation for better performance (to be explored) + + u_node = get_node_vars(u, equations, j1, k) + u_node1 = get_node_vars(u, equations, j2, k) + + volume_flux_node = volume_flux_dg(u_node, u_node1, 1, equations) + noncons_flux_node = nonconservative_flux_dg(u_node, u_node1, 1, equations) + + @inbounds begin + for ii in axes(u, 1) + volume_flux_arr[ii, j1, j2, k] = derivative_split[j1, j2] * volume_flux_node[ii] + noncons_flux_arr[ii, j1, j2, k] = noncons_flux_node[ii] + end + end + + if j1 !== 1 && j2 === 1 && element_dgfv !== 0 # bad + u_ll = get_node_vars(u, equations, j1 - 1, element_dgfv) + u_rr = get_node_vars(u, equations, j1, element_dgfv) + + f1_node = volume_flux_fv(u_ll, u_rr, 1, equations) + + f1_L_node = nonconservative_flux_fv(u_ll, u_rr, 1, equations) + f1_R_node = nonconservative_flux_fv(u_rr, u_ll, 1, equations) + + @inbounds begin + for ii in axes(u, 1) + fstar1_L[ii, j1, element_dgfv] = f1_node[ii] + 0.5 * f1_L_node[ii] + fstar1_R[ii, j1, element_dgfv] = f1_node[ii] + 0.5 * f1_R_node[ii] + end + end + end + end + + return nothing +end + # Kernel for calculating DG volume integral contribution function volume_integral_dg_kernel!(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split, - volume_flux_arr) + volume_flux_arr, equations::AbstractEquations{1}) i = (blockIdx().x - 1) * blockDim().x + threadIdx().x j = (blockIdx().y - 1) * blockDim().y + threadIdx().y k = (blockIdx().z - 1) * blockDim().z + threadIdx().z @@ -216,6 +270,53 @@ function volume_integral_dg_kernel!(du, element_ids_dg, element_ids_dgfv, alpha, return nothing end +# Kernel for calculating DG volume integral contribution +function volume_integral_dg_kernel!(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split, + volume_flux_arr, noncons_flux_arr, + equations::AbstractEquations{1}) + i = (blockIdx().x - 1) * blockDim().x + threadIdx().x + j = (blockIdx().y - 1) * blockDim().y + threadIdx().y + k = (blockIdx().z - 1) * blockDim().z + threadIdx().z + + if (i <= size(du, 1) && j <= size(du, 2) && k <= size(du, 3)) + # length(element_ids_dg) == size(du, 3) + # length(element_ids_dgfv) == size(du, 3) + + element_dg = element_ids_dg[k] # check if `element_dg` is zero + element_dgfv = element_ids_dgfv[k] # check if `element_dgfv` is zero + alpha_element = alpha[k] + + @inbounds begin + if element_dg !== 0 # bad + integral_contribution = 0.0 + + for ii in axes(du, 2) + du[i, j, element_dg] += volume_flux_arr[i, j, ii, element_dg] + integral_contribution += derivative_split[j, ii] * + noncons_flux_arr[i, j, ii, element_dg] + end + + du[i, j, element_dg] += 0.5 * integral_contribution + end + + if element_dgfv !== 0 # bad + integral_contribution = 0.0 + + for ii in axes(du, 2) + du[i, j, element_dgfv] += (1 - alpha_element) * + volume_flux_arr[i, j, ii, element_dgfv] + integral_contribution += derivative_split[j, ii] * + noncons_flux_arr[i, j, ii, element_dgfv] + end + + du[i, j, element_dgfv] += 0.5 * (1 - alpha_element) * integral_contribution + end + end + end + + return nothing +end + # Kernel for calculating FV volume integral contribution function volume_integral_fv_kernel!(du, fstar1_L, fstar1_R, inverse_weights, element_ids_dgfv, alpha) @@ -477,7 +578,7 @@ end # Kernel for calculating source terms function source_terms_kernel!(du, u, node_coordinates, t, equations::AbstractEquations{1}, - source_terms::Function) + source_terms::Any) j = (blockIdx().x - 1) * blockDim().x + threadIdx().x k = (blockIdx().y - 1) * blockDim().y + threadIdx().y @@ -631,9 +732,10 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms:: element_ids_dgfv, alpha, derivative_split, - volume_flux_arr) + volume_flux_arr, + equations) volume_integral_dg_kernel(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split, - volume_flux_arr; + volume_flux_arr, equations; configurator_3d(volume_integral_dg_kernel, du)...) size_arr = CuArray{Float64}(undef, size(u, 2), size(u, 3)) @@ -649,117 +751,6 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms:: return nothing end -# Kernel for calculating pure DG and DG-FV volume fluxes -function volume_flux_dgfv_kernel!(volume_flux_arr, noncons_flux_arr, fstar1_L, fstar1_R, u, - element_ids_dgfv, derivative_split, - equations::AbstractEquations{1}, - volume_flux_dg::Any, nonconservative_flux_dg::Any, - volume_flux_fv::Any, nonconservative_flux_fv::Any) - j = (blockIdx().x - 1) * blockDim().x + threadIdx().x - k = (blockIdx().y - 1) * blockDim().y + threadIdx().y - - if (j <= size(u, 2)^2 && k <= size(u, 3)) - # length(element_ids_dgfv) == size(u, 3) - - j1 = div(j - 1, size(u, 2)) + 1 - j2 = rem(j - 1, size(u, 2)) + 1 - - element_dgfv = element_ids_dgfv[k] # check if `element_dgfv` is zero - - # The sets of `get_node_vars` operations may be combined - # into a single set of operation for better performance (to be explored) - - u_node = get_node_vars(u, equations, j1, k) - u_node1 = get_node_vars(u, equations, j2, k) - - volume_flux_node = volume_flux_dg(u_node, u_node1, 1, equations) - noncons_flux_node = nonconservative_flux_dg(u_node, u_node1, 1, equations) - - @inbounds begin - for ii in axes(u, 1) - volume_flux_arr[ii, j1, j2, k] = volume_flux_node[ii] - noncons_flux_arr[ii, j1, j2, k] = noncons_flux_node[ii] * derivative_split[j1, j2] - end - end - - if j1 !== 1 && j2 === 1 && element_dgfv !== 0 # bad - u_ll = get_node_vars(u, equations, j1 - 1, element_dgfv) - u_rr = get_node_vars(u, equations, j1, element_dgfv) - - f1_node = volume_flux_fv(u_ll, u_rr, 1, equations) - - f1_L_node = nonconservative_flux_fv(u_ll, u_rr, 1, equations) - f1_R_node = nonconservative_flux_fv(u_rr, u_ll, 1, equations) - - @inbounds begin - for ii in axes(u, 1) - fstar1_L[ii, j1, element_dgfv] = f1_node[ii] + 0.5 * f1_L_node[ii] - fstar1_R[ii, j1, element_dgfv] = f1_node[ii] + 0.5 * f1_R_node[ii] - end - end - end - end - - return nothing -end - -# # Kernel for calculating symmetric and nonconservative volume integrals -# function volume_integral_kernel!(du, derivative_split, symmetric_flux_arr, noncons_flux_arr) -# i = (blockIdx().x - 1) * blockDim().x + threadIdx().x -# j = (blockIdx().y - 1) * blockDim().y + threadIdx().y -# k = (blockIdx().z - 1) * blockDim().z + threadIdx().z - -# if (i <= size(du, 1) && j <= size(du, 2) && k <= size(du, 3)) -# @inbounds begin -# integral_contribution = 0.0 # change back to `Float32` - -# for ii in axes(du, 2) -# du[i, j, k] += derivative_split[j, ii] * symmetric_flux_arr[i, j, ii, k] -# integral_contribution += noncons_flux_arr[i, j, ii, k] -# end - -# du[i, j, k] += 0.5 * integral_contribution # change back to `Float32` -# end -# end - -# return nothing -# end - -# # Kernel for calculating DG volume integral contribution -# function volume_integral_dg_kernel!(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split, -# volume_flux_arr, noncons_flux_arr) -# i = (blockIdx().x - 1) * blockDim().x + threadIdx().x -# j = (blockIdx().y - 1) * blockDim().y + threadIdx().y -# k = (blockIdx().z - 1) * blockDim().z + threadIdx().z - -# if (i <= size(du, 1) && j <= size(du, 2) && k <= size(du, 3)) -# # length(element_ids_dg) == size(du, 3) -# # length(element_ids_dgfv) == size(du, 3) - -# element_dg = element_ids_dg[k] # check if `element_dg` is zero -# element_dgfv = element_ids_dgfv[k] # check if `element_dgfv` is zero -# alpha_element = alpha[k] - -# @inbounds begin -# if element_dg !== 0 # bad -# for ii in axes(du, 2) -# du[i, j, element_dg] += derivative_split[j, ii] * -# volume_flux_arr[i, j, ii, element_dg] -# end -# end - -# if element_dgfv !== 0 # bad -# for ii in axes(du, 2) -# du[i, j, element_dgfv] += (1 - alpha_element) * derivative_split[j, ii] * -# volume_flux_arr[i, j, ii, element_dgfv] -# end -# end -# end -# end - -# return nothing -# end - # Pack kernels for calculating volume integrals function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms::True, equations, volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM, cache) @@ -812,6 +803,27 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms:: nonconservative_flux_dg, volume_flux_fv, nonconservative_flux_fv; configurator_2d(volume_flux_dgfv_kernel, size_arr)...) + volume_integral_dg_kernel = @cuda launch=false volume_integral_dg_kernel!(du, element_ids_dg, + element_ids_dgfv, + alpha, + derivative_split, + volume_flux_arr, + noncons_flux_arr, + equations) + volume_integral_dg_kernel(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split, + volume_flux_arr, noncons_flux_arr, equations; + configurator_3d(volume_integral_dg_kernel, du)...) + + size_arr = CuArray{Float64}(undef, size(u, 2), size(u, 3)) + + volume_integral_fv_kernel = @cuda launch=false volume_integral_fv_kernel!(du, fstar1_L, + fstar1_R, + inverse_weights, + element_ids_dgfv, + alpha) + volume_integral_fv_kernel(du, fstar1_L, fstar1_R, inverse_weights, element_ids_dgfv, alpha; + configurator_2d(volume_integral_fv_kernel, size_arr)...) + return nothing end diff --git a/src/solvers/dg_2d.jl b/src/solvers/dg_2d.jl index c8cc424..965fab8 100644 --- a/src/solvers/dg_2d.jl +++ b/src/solvers/dg_2d.jl @@ -240,7 +240,8 @@ end # Kernel for calculating DG volume integral contribution function volume_integral_dg_kernel!(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split, - volume_flux_arr1, volume_flux_arr2) + volume_flux_arr1, volume_flux_arr2, + equations::AbstractEquations{2}) i = (blockIdx().x - 1) * blockDim().x + threadIdx().x j = (blockIdx().y - 1) * blockDim().y + threadIdx().y k = (blockIdx().z - 1) * blockDim().z + threadIdx().z @@ -979,9 +980,10 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms:: alpha, derivative_split, volume_flux_arr1, - volume_flux_arr2) + volume_flux_arr2, + equations) volume_integral_dg_kernel(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split, - volume_flux_arr1, volume_flux_arr2; + volume_flux_arr1, volume_flux_arr2, equations; configurator_3d(volume_integral_dg_kernel, size_arr)...) size_arr = CuArray{Float64}(undef, size(u, 2)^2, size(u, 4)) diff --git a/src/solvers/dg_3d.jl b/src/solvers/dg_3d.jl index 08a5f40..8b106e2 100644 --- a/src/solvers/dg_3d.jl +++ b/src/solvers/dg_3d.jl @@ -208,7 +208,8 @@ end # Kernel for calculating DG volume integral contribution function volume_integral_dg_kernel!(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split, - volume_flux_arr1, volume_flux_arr2, volume_flux_arr3) + volume_flux_arr1, volume_flux_arr2, volume_flux_arr3, + equations::AbstractEquations{3}) i = (blockIdx().x - 1) * blockDim().x + threadIdx().x j = (blockIdx().y - 1) * blockDim().y + threadIdx().y k = (blockIdx().z - 1) * blockDim().z + threadIdx().z @@ -1061,9 +1062,10 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms:: derivative_split, volume_flux_arr1, volume_flux_arr2, - volume_flux_arr3) + volume_flux_arr3, + equations) volume_integral_dg_kernel(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split, - volume_flux_arr1, volume_flux_arr2, volume_flux_arr3; + volume_flux_arr1, volume_flux_arr2, volume_flux_arr3, equations; configurator_3d(volume_integral_dg_kernel, size_arr)...) size_arr = CuArray{Float64}(undef, size(u, 2)^3, size(u, 5)) diff --git a/test/test_script.jl b/test/test_script.jl index 08e14df..38a575b 100644 --- a/test/test_script.jl +++ b/test/test_script.jl @@ -108,85 +108,6 @@ TrixiGPU.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu, Trixi.have_nonconservative_terms(equations_gpu), equations_gpu, solver_gpu.volume_integral, solver_gpu, cache_gpu) -# Trixi.calc_volume_integral!(du, u, mesh, Trixi.have_nonconservative_terms(equations), -# equations, solver.volume_integral, solver, cache) -# @test CUDA.@allowscalar du_gpu ≈ du - -# # Test `cuda_prolong2interfaces!` -# TrixiGPU.cuda_prolong2interfaces!(u_gpu, mesh_gpu, equations_gpu, cache_gpu) -# Trixi.prolong2interfaces!(cache, u, mesh, equations, solver.surface_integral, solver) -# interfaces_u_gpu = replace(cache_gpu.interfaces.u, NaN => 0.0) -# interfaces_u = replace(cache.interfaces.u, NaN => 0.0) -# @test interfaces_u_gpu ≈ interfaces_u - -# # Test `cuda_interface_flux!` -# TrixiGPU.cuda_interface_flux!(mesh_gpu, Trixi.have_nonconservative_terms(equations_gpu), -# equations_gpu, solver_gpu, cache_gpu) -# Trixi.calc_interface_flux!(cache.elements.surface_flux_values, mesh, -# Trixi.have_nonconservative_terms(equations), equations, -# solver.surface_integral, solver, cache) -# surface_flux_values_gpu = replace(cache_gpu.elements.surface_flux_values, NaN => 0.0) -# surface_flux_values = replace(cache.elements.surface_flux_values, NaN => 0.0) -# @test surface_flux_values_gpu ≈ surface_flux_values - -# # Test `cuda_prolong2boundaries!` -# TrixiGPU.cuda_prolong2boundaries!(u_gpu, mesh_gpu, boundary_conditions_gpu, equations_gpu, -# cache_gpu) -# Trixi.prolong2boundaries!(cache, u, mesh, equations, solver.surface_integral, solver) -# boundaries_u_gpu = replace(cache_gpu.boundaries.u, NaN => 0.0) -# boundaries_u = replace(cache.boundaries.u, NaN => 0.0) -# @test boundaries_u_gpu ≈ boundaries_u - -# # Test `cuda_boundary_flux!` -# TrixiGPU.cuda_boundary_flux!(t_gpu, mesh_gpu, boundary_conditions_gpu, -# Trixi.have_nonconservative_terms(equations_gpu), equations_gpu, -# solver_gpu, cache_gpu) -# Trixi.calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations, -# solver.surface_integral, solver) -# surface_flux_values_gpu = replace(cache_gpu.elements.surface_flux_values, NaN => 0.0) -# surface_flux_values = replace(cache.elements.surface_flux_values, NaN => 0.0) -# @test surface_flux_values_gpu ≈ surface_flux_values - -# # Test `cuda_prolong2mortars!` -# TrixiGPU.cuda_prolong2mortars!(u_gpu, mesh_gpu, TrixiGPU.check_cache_mortars(cache_gpu), -# solver_gpu, cache_gpu) -# Trixi.prolong2mortars!(cache, u, mesh, equations, -# solver.mortar, solver.surface_integral, solver) -# u_upper_left_gpu = replace(cache_gpu.mortars.u_upper_left, NaN => 0.0) -# u_upper_right_gpu = replace(cache_gpu.mortars.u_upper_right, NaN => 0.0) -# u_lower_left_gpu = replace(cache_gpu.mortars.u_lower_left, NaN => 0.0) -# u_lower_right_gpu = replace(cache_gpu.mortars.u_lower_right, NaN => 0.0) -# u_upper_left = replace(cache.mortars.u_upper_left, NaN => 0.0) -# u_upper_right = replace(cache.mortars.u_upper_right, NaN => 0.0) -# u_lower_left = replace(cache.mortars.u_lower_left, NaN => 0.0) -# u_lower_right = replace(cache.mortars.u_lower_right, NaN => 0.0) -# @test u_upper_left_gpu ≈ u_upper_left -# @test u_upper_right_gpu ≈ u_upper_right -# @test u_lower_left_gpu ≈ u_lower_left -# @test u_lower_right_gpu ≈ u_lower_right - -# # Test `cuda_mortar_flux!` -# TrixiGPU.cuda_mortar_flux!(mesh_gpu, TrixiGPU.check_cache_mortars(cache_gpu), -# Trixi.have_nonconservative_terms(equations_gpu), equations_gpu, -# solver_gpu, cache_gpu) -# Trixi.calc_mortar_flux!(cache.elements.surface_flux_values, mesh, -# Trixi.have_nonconservative_terms(equations), equations, -# solver.mortar, solver.surface_integral, solver, cache) -# surface_flux_values_gpu = replace(cache_gpu.elements.surface_flux_values, NaN => 0.0) -# surface_flux_values = replace(cache.elements.surface_flux_values, NaN => 0.0) -# @test surface_flux_values_gpu ≈ surface_flux_values - -# # Test `cuda_surface_integral!` -# TrixiGPU.cuda_surface_integral!(du_gpu, mesh_gpu, equations_gpu, solver_gpu, cache_gpu) -# Trixi.calc_surface_integral!(du, u, mesh, equations, solver.surface_integral, solver, cache) -# @test CUDA.@allowscalar du_gpu ≈ du - -# # Test `cuda_jacobian!` -# TrixiGPU.cuda_jacobian!(du_gpu, mesh_gpu, equations_gpu, cache_gpu) -# Trixi.apply_jacobian!(du, mesh, equations, solver, cache) -# @test CUDA.@allowscalar du_gpu ≈ du - -# # Test `cuda_sources!` -# TrixiGPU.cuda_sources!(du_gpu, u_gpu, t_gpu, source_terms_gpu, equations_gpu, cache_gpu) -# Trixi.calc_sources!(du, u, t, source_terms, equations, solver, cache) -# @test CUDA.@allowscalar du_gpu ≈ du +Trixi.calc_volume_integral!(du, u, mesh, Trixi.have_nonconservative_terms(equations), + equations, solver.volume_integral, solver, cache) +@test CUDA.@allowscalar du_gpu ≈ du diff --git a/test/test_shallowwater_shockcapturing.jl b/test/test_shallowwater_shockcapturing.jl new file mode 100644 index 0000000..8bb3e0b --- /dev/null +++ b/test/test_shallowwater_shockcapturing.jl @@ -0,0 +1,118 @@ +module TestShallowWaterShock # with `nonconservative_terms::True` + +using Trixi, TrixiGPU +using OrdinaryDiffEq +using Test, CUDA + +# Start testing with a clean environment +outdir = "out" +isdir(outdir) && rm(outdir, recursive = true) + +# Test precision of the semidiscretization process +@testset "Test Shallow Water" begin + @testset "Shallow Water 1D" begin + equations = ShallowWaterEquations1D(gravity_constant = 9.812, H0 = 1.75) + + function initial_condition_stone_throw_discontinuous_bottom(x, t, + equations::ShallowWaterEquations1D) + + # Calculate primitive variables + + # Flat lake + H = equations.H0 + + # Discontinuous velocity + v = 0.0 + if x[1] >= -0.75 && x[1] <= 0.0 + v = -1.0 + elseif x[1] >= 0.0 && x[1] <= 0.75 + v = 1.0 + end + + b = (1.5 / exp(0.5 * ((x[1] - 1.0)^2)) + + 0.75 / exp(0.5 * ((x[1] + 1.0)^2))) + + # Force a discontinuous bottom topography + if x[1] >= -1.5 && x[1] <= 0.0 + b = 0.5 + end + + return prim2cons(SVector(H, v, b), equations) + end + + initial_condition = initial_condition_stone_throw_discontinuous_bottom + + boundary_condition = boundary_condition_slip_wall + + volume_flux = (flux_wintermeyer_etal, flux_nonconservative_wintermeyer_etal) + surface_flux = (FluxHydrostaticReconstruction(flux_lax_friedrichs, + hydrostatic_reconstruction_audusse_etal), + flux_nonconservative_audusse_etal) + basis = LobattoLegendreBasis(4) + + indicator_sc = IndicatorHennemannGassner(equations, basis, + alpha_max = 0.5, + alpha_min = 0.001, + alpha_smooth = true, + variable = waterheight_pressure) + volume_integral = VolumeIntegralShockCapturingHG(indicator_sc; + volume_flux_dg = volume_flux, + volume_flux_fv = surface_flux) + + solver = DGSEM(basis, surface_flux, volume_integral) + + coordinates_min = -3.0 + coordinates_max = 3.0 + mesh = TreeMesh(coordinates_min, coordinates_max, + initial_refinement_level = 3, + n_cells_max = 10_000, + periodicity = false) + + semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver, + boundary_conditions = boundary_condition) + + tspan = (0.0, 3.0) + + # Get CPU data + (; mesh, equations, initial_condition, boundary_conditions, source_terms, solver, cache) = semi + + # Get GPU data + equations_gpu = deepcopy(equations) + mesh_gpu, solver_gpu, cache_gpu = deepcopy(mesh), deepcopy(solver), deepcopy(cache) + boundary_conditions_gpu, source_terms_gpu = deepcopy(boundary_conditions), + deepcopy(source_terms) + + # Set initial time + t = t_gpu = 0.0 + + # Get initial data + ode = semidiscretize(semi, tspan) + u_ode = copy(ode.u0) + du_ode = similar(u_ode) + u = Trixi.wrap_array(u_ode, mesh, equations, solver, cache) + du = Trixi.wrap_array(du_ode, mesh, equations, solver, cache) + + # Copy data to device + du_gpu, u_gpu = TrixiGPU.copy_to_device!(du, u) + # Reset data on host + Trixi.reset_du!(du, solver, cache) + + # Test `cuda_volume_integral!` + TrixiGPU.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu, + Trixi.have_nonconservative_terms(equations_gpu), + equations_gpu, solver_gpu.volume_integral, solver_gpu, + cache_gpu) + Trixi.calc_volume_integral!(du, u, mesh, Trixi.have_nonconservative_terms(equations), + equations, solver.volume_integral, solver, cache) + @test CUDA.@allowscalar du_gpu ≈ du + + # Wait for fix of boundary flux dispatches + + # Copy data back to host + du_cpu, u_cpu = TrixiGPU.copy_to_host!(du_gpu, u_gpu) + end + + @testset "Shallow Water 2D" begin end +end + +end # module