diff --git a/src/TrixiCUDA.jl b/src/TrixiCUDA.jl index dc98e55..ad75d43 100644 --- a/src/TrixiCUDA.jl +++ b/src/TrixiCUDA.jl @@ -3,19 +3,19 @@ module TrixiCUDA # Include other packages that are used in TrixiCUDA.jl # using Reexport: @reexport +using CUDA using CUDA: @cuda, CuArray, HostKernel, threadIdx, blockIdx, blockDim, similar, launch_configuration using Trixi: AbstractEquations, True, False, - TreeMesh, - DGSEM, + TreeMesh, DGSEM, BoundaryConditionPeriodic, SemidiscretizationHyperbolic, VolumeIntegralWeakForm, VolumeIntegralFluxDifferencing, VolumeIntegralShockCapturingHG, - flux, - ntuple, nvariables, + flux, ntuple, nvariables, nnodes, nelements, local_leaf_cells, init_elements, init_interfaces, init_boundaries, wrap_array, compute_coefficients, have_nonconservative_terms, boundary_condition_periodic, + digest_boundary_conditions, check_periodicity_mesh_boundary_conditions, set_log_type!, set_sqrt_type! import Trixi: get_node_vars, get_node_coords, get_surface_node_vars diff --git a/src/semidiscretization/semidiscretization_hyperbolic.jl b/src/semidiscretization/semidiscretization_hyperbolic.jl index 6cbfd11..a924b43 100644 --- a/src/semidiscretization/semidiscretization_hyperbolic.jl +++ b/src/semidiscretization/semidiscretization_hyperbolic.jl @@ -11,17 +11,17 @@ function SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, sol cache = (; create_cache_gpu(mesh, equations, solver, RealT, uEltype)..., initial_cache...) - # _boundary_conditions = digest_boundary_conditions(boundary_conditions, mesh, solver, - # cache) + _boundary_conditions = digest_boundary_conditions(boundary_conditions, mesh, solver, + cache) - # check_periodicity_mesh_boundary_conditions(mesh, _boundary_conditions) + check_periodicity_mesh_boundary_conditions(mesh, _boundary_conditions) - # SemidiscretizationHyperbolic{typeof(mesh), typeof(equations), - # typeof(initial_condition), - # typeof(_boundary_conditions), typeof(source_terms), - # typeof(solver), typeof(cache)}(mesh, equations, - # initial_condition, - # _boundary_conditions, - # source_terms, solver, - # cache) + SemidiscretizationHyperbolic{typeof(mesh), typeof(equations), + typeof(initial_condition), + typeof(_boundary_conditions), typeof(source_terms), + typeof(solver), typeof(cache)}(mesh, equations, + initial_condition, + _boundary_conditions, + source_terms, solver, + cache) end diff --git a/src/solvers/cache.jl b/src/solvers/cache.jl index 3cd45fa..e09c6d5 100644 --- a/src/solvers/cache.jl +++ b/src/solvers/cache.jl @@ -18,7 +18,7 @@ function create_cache_gpu(mesh::TreeMesh{1}, equations, dg::DGSEM, RealT, uEltyp # Add specialized parts of the cache required to compute the volume integral etc. cache = (; cache..., - create_cache_gpu(mesh, equations, dg.volume_integral, dg, uEltype)...) + create_cache_gpu(mesh, equations, dg.volume_integral, dg, uEltype, cache)...) return cache end @@ -26,31 +26,29 @@ end # Note that `volume_integral::VolumeIntegralPureLGLFiniteVolume` is currently experimental # in Trixi.jl and it is not implemented here. +# Create cache for 1D tree mesh function create_cache_gpu(mesh::TreeMesh{1}, equations, - volume_integral::VolumeIntegralWeakForm, dg::DGSEM, uEltype) + volume_integral::VolumeIntegralWeakForm, dg::DGSEM, + uEltype, cache) NamedTuple() end function create_cache_gpu(mesh::TreeMesh{1}, equations, - volume_integral::VolumeIntegralFluxDifferencing, dg::DGSEM, uEltype) + volume_integral::VolumeIntegralFluxDifferencing, dg::DGSEM, + uEltype, cache) NamedTuple() end -# function create_cache_gpu(mesh::TreeMesh{1}, equations, -# volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM, uEltype) -# element_ids_dg = Int[] -# element_ids_dgfv = Int[] - -# cache = create_cache(mesh, equations, -# VolumeIntegralFluxDifferencing(volume_integral.volume_flux_dg), -# dg, uEltype) +function create_cache_gpu(mesh::TreeMesh{1}, equations, + volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM, + uEltype, cache) + fstar1_L = CUDA.zeros(Float64, nvariables(equations), nnodes(dg) + 1, nelements(cache.elements)) + fstar1_R = CUDA.zeros(Float64, nvariables(equations), nnodes(dg) + 1, nelements(cache.elements)) -# A2dp1_x = Array{uEltype, 2} -# fstar1_L_threaded = A2dp1_x[A2dp1_x(undef, nvariables(equations), nnodes(dg) + 1) -# for _ in 1:Threads.nthreads()] -# fstar1_R_threaded = A2dp1_x[A2dp1_x(undef, nvariables(equations), nnodes(dg) + 1) -# for _ in 1:Threads.nthreads()] + cache = create_cache_gpu(mesh, equations, + VolumeIntegralFluxDifferencing(volume_integral.volume_flux_dg), + dg, uEltype, cache) -# return (; cache..., element_ids_dg, element_ids_dgfv, fstar1_L_threaded, -# fstar1_R_threaded) -# end + # Remove `element_ids_dg` and `element_ids_dgfv` here + return (; cache..., fstar1_L, fstar1_R) +end diff --git a/src/solvers/dg_1d.jl b/src/solvers/dg_1d.jl index 53e506b..ebbc075 100644 --- a/src/solvers/dg_1d.jl +++ b/src/solvers/dg_1d.jl @@ -719,8 +719,8 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms:: volume_flux_arr = CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 3)) inverse_weights = CuArray{Float64}(dg.basis.inverse_weights) - fstar1_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 3))) - fstar1_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 3))) + fstar1_L = cache.fstar1_L + fstar1_R = cache.fstar1_R size_arr = CuArray{Float64}(undef, size(u, 2)^2, size(u, 3)) @@ -789,8 +789,8 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms:: noncons_flux_arr = CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 3)) inverse_weights = CuArray{Float64}(dg.basis.inverse_weights) - fstar1_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 3))) - fstar1_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 3))) + fstar1_L = cache.fstar1_L + fstar1_R = cache.fstar1_R size_arr = CuArray{Float64}(undef, size(u, 2)^2, size(u, 3)) diff --git a/test/test_script.jl b/test/test_script.jl index f76e43e..478d612 100644 --- a/test/test_script.jl +++ b/test/test_script.jl @@ -1,15 +1,105 @@ include("test_trixicuda.jl") -advection_velocity = 1.0 -equations = LinearScalarAdvectionEquation1D(advection_velocity) +equations = CompressibleEulerEquations1D(1.4) -solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs) +initial_condition = initial_condition_weak_blast_wave -coordinates_min = -1.0 -coordinates_max = 1.0 +surface_flux = flux_lax_friedrichs +volume_flux = flux_shima_etal +basis = LobattoLegendreBasis(3) +indicator_sc = IndicatorHennemannGassner(equations, basis, + alpha_max = 0.5, + alpha_min = 0.001, + alpha_smooth = true, + variable = density_pressure) +volume_integral = VolumeIntegralShockCapturingHG(indicator_sc; + volume_flux_dg = volume_flux, + volume_flux_fv = surface_flux) +solver = DGSEM(basis, surface_flux, volume_integral) -mesh = TreeMesh(coordinates_min, coordinates_max, initial_refinement_level = 4, - n_cells_max = 30_000) +coordinates_min = -2.0 +coordinates_max = 2.0 +mesh = TreeMesh(coordinates_min, coordinates_max, + initial_refinement_level = 5, + n_cells_max = 10_000) -semi = SemidiscretizationHyperbolicGPU(mesh, equations, - initial_condition_convergence_test, solver) +semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) +semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver) + +tspan = (0.0, 1.0) + +# Get CPU data +(; mesh, equations, initial_condition, boundary_conditions, source_terms, solver, cache) = semi + +# Get GPU data +mesh_gpu, solver_gpu, cache_gpu = semi_gpu.mesh, semi_gpu.solver, semi_gpu.cache +equations_gpu, source_terms_gpu = semi_gpu.equations, semi_gpu.source_terms +initial_condition_gpu, boundary_conditions_gpu = semi_gpu.initial_condition, + semi_gpu.boundary_conditions + +# Set initial time +t = t_gpu = 0.0 + +# Get initial data +ode = semidiscretize(semi, tspan) +u_ode = copy(ode.u0) +du_ode = similar(u_ode) +u = Trixi.wrap_array(u_ode, mesh, equations, solver, cache) +du = Trixi.wrap_array(du_ode, mesh, equations, solver, cache) + +# Copy data to device +du_gpu, u_gpu = TrixiCUDA.copy_to_device!(du, u) +# Reset data on host +Trixi.reset_du!(du, solver, cache) + +# Test `cuda_volume_integral!` +TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu, + Trixi.have_nonconservative_terms(equations_gpu), + equations_gpu, solver_gpu.volume_integral, solver_gpu, + cache_gpu) +Trixi.calc_volume_integral!(du, u, mesh, Trixi.have_nonconservative_terms(equations), + equations, solver.volume_integral, solver, cache) +@test_approx du_gpu ≈ du + +# Test `cuda_prolong2interfaces!` +TrixiCUDA.cuda_prolong2interfaces!(u_gpu, mesh_gpu, equations_gpu, cache_gpu) +Trixi.prolong2interfaces!(cache, u, mesh, equations, solver.surface_integral, solver) +@test_approx cache_gpu.interfaces.u ≈ cache.interfaces.u + +# Test `cuda_interface_flux!` +TrixiCUDA.cuda_interface_flux!(mesh_gpu, Trixi.have_nonconservative_terms(equations_gpu), + equations_gpu, solver_gpu, cache_gpu) +Trixi.calc_interface_flux!(cache.elements.surface_flux_values, mesh, + Trixi.have_nonconservative_terms(equations), equations, + solver.surface_integral, solver, cache) +@test_approx cache_gpu.elements.surface_flux_values ≈ cache.elements.surface_flux_values + +# Test `cuda_prolong2boundaries!` +TrixiCUDA.cuda_prolong2boundaries!(u_gpu, mesh_gpu, boundary_conditions_gpu, equations_gpu, + cache_gpu) +Trixi.prolong2boundaries!(cache, u, mesh, equations, solver.surface_integral, solver) +@test_approx cache_gpu.boundaries.u ≈ cache.boundaries.u + +# Test `cuda_boundary_flux!` +TrixiCUDA.cuda_boundary_flux!(t_gpu, mesh_gpu, boundary_conditions_gpu, + Trixi.have_nonconservative_terms(equations_gpu), + equations_gpu, + solver_gpu, cache_gpu) +Trixi.calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations, + solver.surface_integral, solver) +@test_approx cache_gpu.elements.surface_flux_values ≈ cache.elements.surface_flux_values + +# Test `cuda_surface_integral!` +TrixiCUDA.cuda_surface_integral!(du_gpu, mesh_gpu, equations_gpu, solver_gpu, cache_gpu) +Trixi.calc_surface_integral!(du, u, mesh, equations, solver.surface_integral, solver, cache) +@test_approx du_gpu ≈ du + +# Test `cuda_jacobian!` +TrixiCUDA.cuda_jacobian!(du_gpu, mesh_gpu, equations_gpu, cache_gpu) +Trixi.apply_jacobian!(du, mesh, equations, solver, cache) +@test_approx du_gpu ≈ du + +# Test `cuda_sources!` +TrixiCUDA.cuda_sources!(du_gpu, u_gpu, t_gpu, source_terms_gpu, equations_gpu, cache_gpu) +Trixi.calc_sources!(du, u, t, source_terms, equations, solver, cache) +@test_approx du_gpu ≈ du