From 648ce8d0a7494a12b178525774ce352803b59f03 Mon Sep 17 00:00:00 2001 From: huiyuxie Date: Mon, 23 Sep 2024 21:39:01 -1000 Subject: [PATCH] Complete 2D --- src/TrixiCUDA.jl | 5 +-- src/solvers/cache.jl | 83 +++++++++++++++++++++++++++++++++++++------- src/solvers/dg_2d.jl | 24 ++++++------- test/test_script.jl | 49 +++++++++++++++----------- 4 files changed, 114 insertions(+), 47 deletions(-) diff --git a/src/TrixiCUDA.jl b/src/TrixiCUDA.jl index ad75d43..57c87ef 100644 --- a/src/TrixiCUDA.jl +++ b/src/TrixiCUDA.jl @@ -11,8 +11,9 @@ using Trixi: AbstractEquations, True, False, TreeMesh, DGSEM, BoundaryConditionPeriodic, SemidiscretizationHyperbolic, VolumeIntegralWeakForm, VolumeIntegralFluxDifferencing, VolumeIntegralShockCapturingHG, - flux, ntuple, nvariables, nnodes, nelements, - local_leaf_cells, init_elements, init_interfaces, init_boundaries, + LobattoLegendreMortarL2, + flux, ntuple, nvariables, nnodes, nelements, nmortars, + local_leaf_cells, init_elements, init_interfaces, init_boundaries, init_mortars, wrap_array, compute_coefficients, have_nonconservative_terms, boundary_condition_periodic, digest_boundary_conditions, check_periodicity_mesh_boundary_conditions, diff --git a/src/solvers/cache.jl b/src/solvers/cache.jl index e09c6d5..7e4b244 100644 --- a/src/solvers/cache.jl +++ b/src/solvers/cache.jl @@ -1,11 +1,19 @@ # Rewrite `create_cache` in Trixi.jl to add the specialized parts of the arrays # required to copy the arrays from CPU to GPU. -# See also `create_cache` in Trixi.jl +# Note that `volume_integral::VolumeIntegralPureLGLFiniteVolume` is currently +# experimental in Trixi.jl and it is not implemented here. + +# Create cache for general tree mesh +function create_cache_gpu(mesh, equations, + volume_integral::VolumeIntegralWeakForm, dg::DGSEM, + uEltype, cache) + NamedTuple() +end + +# Create cache specialized for 1D tree mesh function create_cache_gpu(mesh::TreeMesh{1}, equations, dg::DGSEM, RealT, uEltype) # Get cells for which an element needs to be created (i.e. all leaf cells) - - ### ALL COPY TO GPU!!! leaf_cell_ids = local_leaf_cells(mesh.tree) elements = init_elements(leaf_cell_ids, mesh, equations, dg.basis, RealT, uEltype) @@ -23,16 +31,6 @@ function create_cache_gpu(mesh::TreeMesh{1}, equations, dg::DGSEM, RealT, uEltyp return cache end -# Note that `volume_integral::VolumeIntegralPureLGLFiniteVolume` is currently experimental -# in Trixi.jl and it is not implemented here. - -# Create cache for 1D tree mesh -function create_cache_gpu(mesh::TreeMesh{1}, equations, - volume_integral::VolumeIntegralWeakForm, dg::DGSEM, - uEltype, cache) - NamedTuple() -end - function create_cache_gpu(mesh::TreeMesh{1}, equations, volume_integral::VolumeIntegralFluxDifferencing, dg::DGSEM, uEltype, cache) @@ -52,3 +50,62 @@ function create_cache_gpu(mesh::TreeMesh{1}, equations, # Remove `element_ids_dg` and `element_ids_dgfv` here return (; cache..., fstar1_L, fstar1_R) end + +# Create cache specialized for 2D tree mesh +function create_cache_gpu(mesh::TreeMesh{2}, equations, + dg::DGSEM, RealT, uEltype) + # Get cells for which an element needs to be created (i.e. all leaf cells) + leaf_cell_ids = local_leaf_cells(mesh.tree) + + elements = init_elements(leaf_cell_ids, mesh, equations, dg.basis, RealT, uEltype) + + interfaces = init_interfaces(leaf_cell_ids, mesh, elements) + + boundaries = init_boundaries(leaf_cell_ids, mesh, elements) + + mortars = init_mortars(leaf_cell_ids, mesh, elements, dg.mortar) + + cache = (; elements, interfaces, boundaries, mortars) + + # Add specialized parts of the cache required to compute the volume integral etc. + cache = (; cache..., + create_cache_gpu(mesh, equations, dg.volume_integral, dg, uEltype, cache)...) + cache = (; cache..., create_cache_gpu(mesh, equations, dg.mortar, uEltype, cache)...) + + return cache +end + +function create_cache_gpu(mesh::TreeMesh{2}, equations, + volume_integral::VolumeIntegralFluxDifferencing, dg::DGSEM, + uEltype, cache) + NamedTuple() +end + +function create_cache_gpu(mesh::TreeMesh{2}, equations, + volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM, + uEltype, cache) + fstar1_L = CUDA.zeros(Float64, nvariables(equations), nnodes(dg) + 1, nnodes(dg), + nelements(cache.elements)) + fstar1_R = CUDA.zeros(Float64, nvariables(equations), nnodes(dg) + 1, nnodes(dg), + nelements(cache.elements)) + fstar2_L = CUDA.zeros(Float64, nvariables(equations), nnodes(dg), nnodes(dg) + 1, + nelements(cache.elements)) + fstar2_R = CUDA.zeros(Float64, nvariables(equations), nnodes(dg), nnodes(dg) + 1, + nelements(cache.elements)) + + cache = create_cache(mesh, equations, + VolumeIntegralFluxDifferencing(volume_integral.volume_flux_dg), + dg, uEltype, cache) + + return (; cache..., fstar1_L, fstar1_R, fstar2_L, fstar2_R) +end + +function create_cache_gpu(mesh::TreeMesh{2}, equations, + mortar_l2::LobattoLegendreMortarL2, uEltype, cache) + fstar_upper = CUDA.zeros(Float64, nvariables(equations), nnodes(mortar_l2), + nmortars(cache.mortars)) + fstar_lower = CUDA.zeros(Float64, nvariables(equations), nnodes(mortar_l2), + nmortars(cache.mortars)) + + (; fstar_upper, fstar_lower) +end diff --git a/src/solvers/dg_2d.jl b/src/solvers/dg_2d.jl index 9c35f76..bb3eb90 100644 --- a/src/solvers/dg_2d.jl +++ b/src/solvers/dg_2d.jl @@ -1144,10 +1144,10 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms:: size(u, 4)) inverse_weights = CuArray{Float64}(dg.basis.inverse_weights) - fstar1_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 2), size(u, 4))) - fstar1_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 2), size(u, 4))) - fstar2_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2) + 1, size(u, 4))) - fstar2_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2) + 1, size(u, 4))) + fstar1_L = cache.fstar1_L + fstar1_R = cache.fstar1_R + fstar2_L = cache.fstar2_L + fstar2_R = cache.fstar2_R size_arr = CuArray{Float64}(undef, size(u, 2)^3, size(u, 4)) @@ -1231,10 +1231,10 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms:: size(u, 4)) inverse_weights = CuArray{Float64}(dg.basis.inverse_weights) - fstar1_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 2), size(u, 4))) - fstar1_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 2), size(u, 4))) - fstar2_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2) + 1, size(u, 4))) - fstar2_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2) + 1, size(u, 4))) + fstar1_L = cache.fstar1_L + fstar1_R = cache.fstar1_R + fstar2_L = cache.fstar2_L + fstar2_R = cache.fstar2_R size_arr = CuArray{Float64}(undef, size(u, 2)^3, size(u, 4)) @@ -1594,8 +1594,8 @@ function cuda_mortar_flux!(mesh::TreeMesh{2}, cache_mortars::True, nonconservati surface_flux_values = CuArray{Float64}(cache.elements.surface_flux_values) tmp_surface_flux_values = zero(similar(surface_flux_values)) - fstar_upper = CuArray{Float64}(undef, size(u_upper, 2), size(u_upper, 3), length(orientations)) - fstar_lower = CuArray{Float64}(undef, size(u_upper, 2), size(u_upper, 3), length(orientations)) + fstar_upper = cache.fstar_upper + fstar_lower = cache.fstar_lower size_arr = CuArray{Float64}(undef, size(u_upper, 3), length(orientations)) @@ -1645,8 +1645,8 @@ function cuda_mortar_flux!(mesh::TreeMesh{2}, cache_mortars::True, nonconservati surface_flux_values = CuArray{Float64}(cache.elements.surface_flux_values) tmp_surface_flux_values = zero(similar(surface_flux_values)) - fstar_upper = CuArray{Float64}(undef, size(u_upper, 2), size(u_upper, 3), length(orientations)) - fstar_lower = CuArray{Float64}(undef, size(u_upper, 2), size(u_upper, 3), length(orientations)) + fstar_upper = cache.fstar_upper + fstar_lower = cache.fstar_lower size_arr = CuArray{Float64}(undef, size(u_upper, 3), length(orientations)) diff --git a/test/test_script.jl b/test/test_script.jl index 478d612..b58eb55 100644 --- a/test/test_script.jl +++ b/test/test_script.jl @@ -1,26 +1,18 @@ include("test_trixicuda.jl") -equations = CompressibleEulerEquations1D(1.4) - -initial_condition = initial_condition_weak_blast_wave - -surface_flux = flux_lax_friedrichs -volume_flux = flux_shima_etal -basis = LobattoLegendreBasis(3) -indicator_sc = IndicatorHennemannGassner(equations, basis, - alpha_max = 0.5, - alpha_min = 0.001, - alpha_smooth = true, - variable = density_pressure) -volume_integral = VolumeIntegralShockCapturingHG(indicator_sc; - volume_flux_dg = volume_flux, - volume_flux_fv = surface_flux) -solver = DGSEM(basis, surface_flux, volume_integral) - -coordinates_min = -2.0 -coordinates_max = 2.0 +advection_velocity = (0.2, -0.7) +equations = LinearScalarAdvectionEquation2D(advection_velocity) + +initial_condition = initial_condition_convergence_test +solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs) + +coordinates_min = (-1.0, -1.0) +coordinates_max = (1.0, 1.0) +refinement_patches = ((type = "box", coordinates_min = (0.0, -1.0), + coordinates_max = (1.0, 1.0)),) mesh = TreeMesh(coordinates_min, coordinates_max, - initial_refinement_level = 5, + initial_refinement_level = 2, + refinement_patches = refinement_patches, n_cells_max = 10_000) semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) @@ -89,6 +81,23 @@ Trixi.calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations, solver.surface_integral, solver) @test_approx cache_gpu.elements.surface_flux_values ≈ cache.elements.surface_flux_values +# Test `cuda_prolong2mortars!` +TrixiCUDA.cuda_prolong2mortars!(u_gpu, mesh_gpu, TrixiCUDA.check_cache_mortars(cache_gpu), + solver_gpu, cache_gpu) +Trixi.prolong2mortars!(cache, u, mesh, equations, + solver.mortar, solver.surface_integral, solver) +@test_approx cache.mortars.u_upper ≈ cache_gpu.mortars.u_upper +@test_approx cache.mortars.u_lower ≈ cache_gpu.mortars.u_lower + +# Test `cuda_mortar_flux!` +TrixiCUDA.cuda_mortar_flux!(mesh_gpu, TrixiCUDA.check_cache_mortars(cache_gpu), + Trixi.have_nonconservative_terms(equations_gpu), equations_gpu, + solver_gpu, cache_gpu) +Trixi.calc_mortar_flux!(cache.elements.surface_flux_values, mesh, + Trixi.have_nonconservative_terms(equations), equations, + solver.mortar, solver.surface_integral, solver, cache) +@test_approx cache_gpu.elements.surface_flux_values ≈ cache.elements.surface_flux_values + # Test `cuda_surface_integral!` TrixiCUDA.cuda_surface_integral!(du_gpu, mesh_gpu, equations_gpu, solver_gpu, cache_gpu) Trixi.calc_surface_integral!(du, u, mesh, equations, solver.surface_integral, solver, cache)