From 4c379097af8176256398beabdd3b0ba7a53cb982 Mon Sep 17 00:00:00 2001 From: huiyuxie Date: Thu, 3 Oct 2024 22:22:07 -1000 Subject: [PATCH] Complete 1D --- src/auxiliary/configurators.jl | 59 ++++++++++++++++-- src/solvers/dg_1d.jl | 109 +++++++++++++++------------------ test/runtests.jl | 2 +- test/test_script.jl | 107 ++++++++++---------------------- 4 files changed, 136 insertions(+), 141 deletions(-) diff --git a/src/auxiliary/configurators.jl b/src/auxiliary/configurators.jl index cec9b24..d831053 100644 --- a/src/auxiliary/configurators.jl +++ b/src/auxiliary/configurators.jl @@ -1,9 +1,62 @@ # Kernel configurators are used for determining the number of threads and # blocks to be used in the kernel, which optimizes the use of GPU resources. -# Start implementation of kernel configurators with 32, 32 x 32, and 32 x 32 x 1 +# 1D kernel configurator +# We hardcode 32 threads per block for 1D kernels +function kernel_configurator_1d(kernel::HostKernel, x::Int) + # config = launch_configuration(kernel.fun) # not used in this case -# Kernel configurator for 1D CUDA array + threads = 32 # warp size is 32, if block size is less than 32, it will be padded to 32 + blocks = cld(x, threads[1]) + + return (threads = threads, blocks = blocks) +end + +# 2D kernel configurator +# We hardcode 32 threads for x dimension per block, and y dimension is determined +# by the number of threads returned by the launch configuration +function kernel_configurator_2d(kernel::HostKernel, x::Int, y::Int) + config = launch_configuration(kernel.fun) # get the number of threads + + # y dimension + dims_y1 = cld(x * y, 32) + dims_y2 = max(fld(config.threads, 32), 1) + + dims_y = min(dims_y1, dims_y2) + + # x dimension is hardcoded to warp size 32 + threads = (32, dims_y) + blocks = (cld(x, threads[1]), cld(y, threads[2])) + + return (threads = threads, blocks = blocks) +end + +# 3D kernel configurator +# We hardcode 32 threads for x dimension per block, y and z dimensions are determined +# by the number of threads returned by the launch configuration +function kernel_configurator_3d(kernel::HostKernel, x::Int, y::Int, z::Int) + config = launch_configuration(kernel.fun) # get the number of threads + + # y dimension + dims_y1 = cld(x * y, 32) + dims_y2 = max(fld(config.threads, 32), 1) + + dims_y = min(dims_y1, dims_y2) + + # z dimension + dims_z1 = cld(x * y * z, 32 * dims_y) + dims_z2 = max(fld(config.threads, 32 * dims_y), 1) + + dims_z = min(dims_z1, dims_z2) + + # x dimension is hardcoded to warp size 32 + threads = (32, dims_y, dims_z) + blocks = (cld(x, threads[1]), cld(y, threads[2]), cld(z, threads[3])) + + return (threads = threads, blocks = blocks) +end + +# Deprecated old kernel configurators below function configurator_1d(kernel::HostKernel, array::CuArray{<:Any, 1}) config = launch_configuration(kernel.fun) @@ -13,7 +66,6 @@ function configurator_1d(kernel::HostKernel, array::CuArray{<:Any, 1}) return (threads = threads, blocks = blocks) end -# Kernel configurator for 2D CUDA array function configurator_2d(kernel::HostKernel, array::CuArray{<:Any, 2}) config = launch_configuration(kernel.fun) @@ -23,7 +75,6 @@ function configurator_2d(kernel::HostKernel, array::CuArray{<:Any, 2}) return (threads = threads, blocks = blocks) end -# Kernel configurator for 3D CUDA array function configurator_3d(kernel::HostKernel, array::CuArray{<:Any, 3}) config = launch_configuration(kernel.fun) diff --git a/src/solvers/dg_1d.jl b/src/solvers/dg_1d.jl index cefff53..f9be04f 100644 --- a/src/solvers/dg_1d.jl +++ b/src/solvers/dg_1d.jl @@ -613,13 +613,13 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms, derivative_dhat = CuArray{Float64}(dg.basis.derivative_dhat) flux_arr = similar(u) - size_arr = CuArray{Float64}(undef, size(u, 2), size(u, 3)) - flux_kernel = @cuda launch=false flux_kernel!(flux_arr, u, equations, flux) - flux_kernel(flux_arr, u, equations, flux; configurator_2d(flux_kernel, size_arr)...) + flux_kernel(flux_arr, u, equations, flux; + kernel_configurator_2d(flux_kernel, size(u, 2), size(u, 3))...) weak_form_kernel = @cuda launch=false weak_form_kernel!(du, derivative_dhat, flux_arr) - weak_form_kernel(du, derivative_dhat, flux_arr; configurator_3d(weak_form_kernel, du)...) + weak_form_kernel(du, derivative_dhat, flux_arr; + kernel_configurator_3d(weak_form_kernel, size(du)...)...) return nothing end @@ -636,17 +636,15 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms:: derivative_split = CuArray{Float64}(derivative_split) volume_flux_arr = CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 3)) - size_arr = CuArray{Float64}(undef, size(u, 2)^2, size(u, 3)) - volume_flux_kernel = @cuda launch=false volume_flux_kernel!(volume_flux_arr, u, equations, volume_flux) volume_flux_kernel(volume_flux_arr, u, equations, volume_flux; - configurator_2d(volume_flux_kernel, size_arr)...) + kernel_configurator_2d(volume_flux_kernel, size(u, 2)^2, size(u, 3))...) volume_integral_kernel = @cuda launch=false volume_integral_kernel!(du, derivative_split, volume_flux_arr, equations) volume_integral_kernel(du, derivative_split, volume_flux_arr, equations; - configurator_3d(volume_integral_kernel, du)...) + kernel_configurator_3d(volume_integral_kernel, size(du)...)...) return nothing end @@ -663,8 +661,6 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms:: symmetric_flux_arr = CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 3)) noncons_flux_arr = CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 3)) - size_arr = CuArray{Float64}(undef, size(u, 2)^2, size(u, 3)) - symmetric_noncons_flux_kernel = @cuda launch=false symmetric_noncons_flux_kernel!(symmetric_flux_arr, noncons_flux_arr, u, @@ -674,7 +670,8 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms:: nonconservative_flux) symmetric_noncons_flux_kernel(symmetric_flux_arr, noncons_flux_arr, u, derivative_split, equations, symmetric_flux, nonconservative_flux; - configurator_2d(symmetric_noncons_flux_kernel, size_arr)...) + kernel_configurator_2d(symmetric_noncons_flux_kernel, + size(u, 2)^2, size(u, 3))...) derivative_split = CuArray{Float64}(dg.basis.derivative_split) # use original `derivative_split` @@ -682,7 +679,7 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms:: symmetric_flux_arr, noncons_flux_arr) volume_integral_kernel(du, derivative_split, symmetric_flux_arr, noncons_flux_arr; - configurator_3d(volume_integral_kernel, du)...) + kernel_configurator_3d(volume_integral_kernel, size(du)...)...) return nothing end @@ -702,7 +699,6 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms:: # For `Float64`, this gives 1.8189894035458565e-12 # For `Float32`, this gives 1.1920929f-5 atol = 1.8189894035458565e-12 # see also `pure_and_blended_element_ids!` in Trixi.jl - element_ids_dg = CUDA.zeros(Int, length(alpha)) element_ids_dgfv = CUDA.zeros(Int, length(alpha)) @@ -711,7 +707,8 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms:: alpha, atol) pure_blended_element_count_kernel(element_ids_dg, element_ids_dgfv, alpha, atol; - configurator_1d(pure_blended_element_count_kernel, alpha)...) + kernel_configurator_1d(pure_blended_element_count_kernel, + length(alpha))...) derivative_split = dg.basis.derivative_split set_diagonal_to_zero!(derivative_split) # temporarily set here, maybe move outside `rhs!` @@ -723,8 +720,6 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms:: fstar1_L = cache.fstar1_L fstar1_R = cache.fstar1_R - size_arr = CuArray{Float64}(undef, size(u, 2)^2, size(u, 3)) - volume_flux_dgfv_kernel = @cuda launch=false volume_flux_dgfv_kernel!(volume_flux_arr, fstar1_L, fstar1_R, u, element_ids_dgfv, @@ -732,7 +727,8 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms:: volume_flux_fv) volume_flux_dgfv_kernel(volume_flux_arr, fstar1_L, fstar1_R, u, element_ids_dgfv, equations, volume_flux_dg, volume_flux_fv; - configurator_2d(volume_flux_dgfv_kernel, size_arr)...) + kernel_configurator_2d(volume_flux_dgfv_kernel, size(u, 2)^2, + size(u, 3))...) volume_integral_dg_kernel = @cuda launch=false volume_integral_dg_kernel!(du, element_ids_dg, element_ids_dgfv, @@ -742,9 +738,7 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms:: equations) volume_integral_dg_kernel(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split, volume_flux_arr, equations; - configurator_3d(volume_integral_dg_kernel, du)...) - - size_arr = CuArray{Float64}(undef, size(u, 2), size(u, 3)) + kernel_configurator_3d(volume_integral_dg_kernel, size(du)...)...) volume_integral_fv_kernel = @cuda launch=false volume_integral_fv_kernel!(du, fstar1_L, fstar1_R, @@ -752,7 +746,8 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms:: element_ids_dgfv, alpha) volume_integral_fv_kernel(du, fstar1_L, fstar1_R, inverse_weights, element_ids_dgfv, alpha; - configurator_2d(volume_integral_fv_kernel, size_arr)...) + kernel_configurator_2d(volume_integral_fv_kernel, size(u, 2), + size(u, 3))...) return nothing end @@ -772,7 +767,6 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms:: # For `Float64`, this gives 1.8189894035458565e-12 # For `Float32`, this gives 1.1920929f-5 atol = 1.8189894035458565e-12 # see also `pure_and_blended_element_ids!` in Trixi.jl - element_ids_dg = CUDA.zeros(Int, length(alpha)) element_ids_dgfv = CUDA.zeros(Int, length(alpha)) @@ -781,7 +775,8 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms:: alpha, atol) pure_blended_element_count_kernel(element_ids_dg, element_ids_dgfv, alpha, atol; - configurator_1d(pure_blended_element_count_kernel, alpha)...) + kernel_configurator_1d(pure_blended_element_count_kernel, + length(alpha))...) derivative_split = dg.basis.derivative_split set_diagonal_to_zero!(derivative_split) # temporarily set here, maybe move outside `rhs!` @@ -794,8 +789,6 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms:: fstar1_L = cache.fstar1_L fstar1_R = cache.fstar1_R - size_arr = CuArray{Float64}(undef, size(u, 2)^2, size(u, 3)) - volume_flux_dgfv_kernel = @cuda launch=false volume_flux_dgfv_kernel!(volume_flux_arr, noncons_flux_arr, fstar1_L, fstar1_R, u, @@ -808,7 +801,8 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms:: volume_flux_dgfv_kernel(volume_flux_arr, noncons_flux_arr, fstar1_L, fstar1_R, u, element_ids_dgfv, derivative_split, equations, volume_flux_dg, nonconservative_flux_dg, volume_flux_fv, nonconservative_flux_fv; - configurator_2d(volume_flux_dgfv_kernel, size_arr)...) + kernel_configurator_2d(volume_flux_dgfv_kernel, size(u, 2)^2, + size(u, 3))...) derivative_split = CuArray{Float64}(dg.basis.derivative_split) # use original `derivative_split` @@ -821,9 +815,7 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms:: equations) volume_integral_dg_kernel(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split, volume_flux_arr, noncons_flux_arr, equations; - configurator_3d(volume_integral_dg_kernel, du)...) - - size_arr = CuArray{Float64}(undef, size(u, 2), size(u, 3)) + kernel_configurator_3d(volume_integral_dg_kernel, size(du)...)...) volume_integral_fv_kernel = @cuda launch=false volume_integral_fv_kernel!(du, fstar1_L, fstar1_R, @@ -831,7 +823,8 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms:: element_ids_dgfv, alpha) volume_integral_fv_kernel(du, fstar1_L, fstar1_R, inverse_weights, element_ids_dgfv, alpha; - configurator_2d(volume_integral_fv_kernel, size_arr)...) + kernel_configurator_2d(volume_integral_fv_kernel, size(u, 2), + size(u, 3))...) return nothing end @@ -841,12 +834,12 @@ function cuda_prolong2interfaces!(u, mesh::TreeMesh{1}, equations, cache) neighbor_ids = cache.interfaces.neighbor_ids interfaces_u = cache.interfaces.u - size_arr = CuArray{Float64}(undef, size(interfaces_u, 2), size(interfaces_u, 3)) - prolong_interfaces_kernel = @cuda launch=false prolong_interfaces_kernel!(interfaces_u, u, neighbor_ids) prolong_interfaces_kernel(interfaces_u, u, neighbor_ids; - configurator_2d(prolong_interfaces_kernel, size_arr)...) + kernel_configurator_2d(prolong_interfaces_kernel, + size(interfaces_u, 2), + size(interfaces_u, 3))...) return nothing end @@ -859,22 +852,20 @@ function cuda_interface_flux!(mesh::TreeMesh{1}, nonconservative_terms::False, e neighbor_ids = cache.interfaces.neighbor_ids interfaces_u = cache.interfaces.u surface_flux_values = cache.elements.surface_flux_values - surface_flux_arr = CuArray{Float64}(undef, size(interfaces_u)[2:end]...) - size_arr = CuArray{Float64}(undef, size(interfaces_u, 3)) surface_flux_kernel = @cuda launch=false surface_flux_kernel!(surface_flux_arr, interfaces_u, equations, surface_flux) surface_flux_kernel(surface_flux_arr, interfaces_u, equations, surface_flux; - configurator_1d(surface_flux_kernel, size_arr)...) - - size_arr = CuArray{Float64}(undef, size(surface_flux_values, 1), size(interfaces_u, 3)) + kernel_configurator_1d(surface_flux_kernel, size(interfaces_u, 3))...) interface_flux_kernel = @cuda launch=false interface_flux_kernel!(surface_flux_values, surface_flux_arr, neighbor_ids) interface_flux_kernel(surface_flux_values, surface_flux_arr, neighbor_ids; - configurator_2d(interface_flux_kernel, size_arr)...) + kernel_configurator_2d(interface_flux_kernel, + size(surface_flux_values, 1), + size(interfaces_u, 3))...) return nothing end @@ -892,8 +883,6 @@ function cuda_interface_flux!(mesh::TreeMesh{1}, nonconservative_terms::True, eq noncons_left_arr = CuArray{Float64}(undef, size(interfaces_u)[2:end]...) noncons_right_arr = CuArray{Float64}(undef, size(interfaces_u)[2:end]...) - size_arr = CuArray{Float64}(undef, size(interfaces_u, 3)) - surface_noncons_flux_kernel = @cuda launch=false surface_noncons_flux_kernel!(surface_flux_arr, noncons_left_arr, noncons_right_arr, @@ -903,9 +892,8 @@ function cuda_interface_flux!(mesh::TreeMesh{1}, nonconservative_terms::True, eq nonconservative_flux) surface_noncons_flux_kernel(surface_flux_arr, noncons_left_arr, noncons_right_arr, interfaces_u, equations, surface_flux, nonconservative_flux; - configurator_1d(surface_noncons_flux_kernel, size_arr)...) - - size_arr = CuArray{Float64}(undef, size(surface_flux_values, 1), size(interfaces_u, 3)) + kernel_configurator_1d(surface_noncons_flux_kernel, + size(interfaces_u, 3))...) interface_flux_kernel = @cuda launch=false interface_flux_kernel!(surface_flux_values, surface_flux_arr, @@ -914,7 +902,9 @@ function cuda_interface_flux!(mesh::TreeMesh{1}, nonconservative_terms::True, eq neighbor_ids) interface_flux_kernel(surface_flux_values, surface_flux_arr, noncons_left_arr, noncons_right_arr, neighbor_ids; - configurator_2d(interface_flux_kernel, size_arr)...) + kernel_configurator_2d(interface_flux_kernel, + size(surface_flux_values, 1), + size(interfaces_u, 3))...) return nothing end @@ -932,13 +922,13 @@ function cuda_prolong2boundaries!(u, mesh::TreeMesh{1}, boundary_conditions::Nam neighbor_sides = cache.boundaries.neighbor_sides boundaries_u = cache.boundaries.u - size_arr = CuArray{Float64}(undef, size(boundaries_u, 2), size(boundaries_u, 3)) - prolong_boundaries_kernel = @cuda launch=false prolong_boundaries_kernel!(boundaries_u, u, neighbor_ids, neighbor_sides) prolong_boundaries_kernel(boundaries_u, u, neighbor_ids, neighbor_sides; - configurator_2d(prolong_boundaries_kernel, size_arr)...) + kernel_configurator_2d(prolong_boundaries_kernel, + size(boundaries_u, 2), + size(boundaries_u, 3))...) return nothing end @@ -970,11 +960,10 @@ function cuda_boundary_flux!(t, mesh::TreeMesh{1}, boundary_conditions::NamedTup last_first_indices_kernel = @cuda launch=false last_first_indices_kernel!(lasts, firsts, n_boundaries_per_direction) last_first_indices_kernel(lasts, firsts, n_boundaries_per_direction; - configurator_1d(last_first_indices_kernel, lasts)...) + kernel_configurator_1d(last_first_indices_kernel, length(lasts))...) - indices_arr = firsts boundary_arr = CuArray{Int}(Array(firsts)[1]:Array(lasts)[end]) - + indices_arr = firsts boundary_conditions_callable = replace_boundary_conditions(boundary_conditions) boundary_flux_kernel = @cuda launch=false boundary_flux_kernel!(surface_flux_values, @@ -988,7 +977,7 @@ function cuda_boundary_flux!(t, mesh::TreeMesh{1}, boundary_conditions::NamedTup boundary_flux_kernel(surface_flux_values, boundaries_u, node_coordinates, t, boundary_arr, indices_arr, neighbor_ids, neighbor_sides, orientations, boundary_conditions_callable, equations, surface_flux; - configurator_1d(boundary_flux_kernel, boundary_arr)...) + kernel_configurator_1d(boundary_flux_kernel, length(boundary_arr))...) return nothing end @@ -1014,11 +1003,10 @@ function cuda_boundary_flux!(t, mesh::TreeMesh{1}, boundary_conditions::NamedTup last_first_indices_kernel = @cuda launch=false last_first_indices_kernel!(lasts, firsts, n_boundaries_per_direction) last_first_indices_kernel(lasts, firsts, n_boundaries_per_direction; - configurator_1d(last_first_indices_kernel, lasts)...) + kernel_configurator_1d(last_first_indices_kernel, length(lasts))...) - indices_arr = firsts boundary_arr = CuArray{Int}(Array(firsts)[1]:Array(lasts)[end]) - + indices_arr = firsts boundary_conditions_callable = replace_boundary_conditions(boundary_conditions) boundary_flux_kernel = @cuda launch=false boundary_flux_kernel!(surface_flux_values, @@ -1034,7 +1022,7 @@ function cuda_boundary_flux!(t, mesh::TreeMesh{1}, boundary_conditions::NamedTup indices_arr, neighbor_ids, neighbor_sides, orientations, boundary_conditions_callable, equations, surface_flux, nonconservative_flux; - configurator_1d(boundary_flux_kernel, boundary_arr)...) + kernel_configurator_1d(boundary_flux_kernel, length(boundary_arr))...) return nothing end @@ -1051,7 +1039,7 @@ function cuda_surface_integral!(du, mesh::TreeMesh{1}, equations, dg::DGSEM, cac surface_flux_values, equations) surface_integral_kernel(du, factor_arr, surface_flux_values, equations; - configurator_3d(surface_integral_kernel, du)...) + kernel_configurator_3d(surface_integral_kernel, size(du)...)...) return nothing end @@ -1061,7 +1049,8 @@ function cuda_jacobian!(du, mesh::TreeMesh{1}, equations, cache) inverse_jacobian = cache.elements.inverse_jacobian jacobian_kernel = @cuda launch=false jacobian_kernel!(du, inverse_jacobian, equations) - jacobian_kernel(du, inverse_jacobian, equations; configurator_3d(jacobian_kernel, du)...) + jacobian_kernel(du, inverse_jacobian, equations; + kernel_configurator_3d(jacobian_kernel, size(du)...)...) return nothing end @@ -1075,12 +1064,10 @@ end function cuda_sources!(du, u, t, source_terms, equations::AbstractEquations{1}, cache) node_coordinates = cache.elements.node_coordinates - size_arr = CuArray{Float64}(undef, size(du, 2), size(du, 3)) - source_terms_kernel = @cuda launch=false source_terms_kernel!(du, u, node_coordinates, t, equations, source_terms) source_terms_kernel(du, u, node_coordinates, t, equations, source_terms; - configurator_2d(source_terms_kernel, size_arr)...) + kernel_configurator_2d(source_terms_kernel, size(du, 2), size(du, 3))...) return nothing end diff --git a/test/runtests.jl b/test/runtests.jl index aa306ac..af33d57 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,7 +3,7 @@ module TestTrixiCUDA using Test: @testset @testset "TrixiCUDA.jl" begin - # include("./tree_dgsem_1d/tree_dgsem_1d.jl") + include("./tree_dgsem_1d/tree_dgsem_1d.jl") # include("./tree_dgsem_2d/tree_dgsem_2d.jl") # include("./tree_dgsem_3d/tree_dgsem_3d.jl") end diff --git a/test/test_script.jl b/test/test_script.jl index 5ce6563..613e744 100644 --- a/test/test_script.jl +++ b/test/test_script.jl @@ -1,101 +1,58 @@ -include("test_trixicuda.jl") +include("test_macros.jl") -equations = HyperbolicDiffusionEquations1D() +advection_velocity = (0.2, -0.7, 0.5) +equations = LinearScalarAdvectionEquation3D(advection_velocity) -initial_condition = initial_condition_poisson_nonperiodic +solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs) -boundary_conditions = boundary_condition_poisson_nonperiodic +coordinates_min = (-1.0, -1.0, -1.0) +coordinates_max = (1.0, 1.0, 1.0) -solver = DGSEM(polydeg = 4, surface_flux = flux_lax_friedrichs) - -coordinates_min = 0.0 -coordinates_max = 1.0 mesh = TreeMesh(coordinates_min, coordinates_max, initial_refinement_level = 3, - n_cells_max = 30_000, - periodicity = false) + n_cells_max = 30_000) + +semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test, + solver) +semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition_convergence_test, + solver) -semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver, - boundary_conditions = boundary_conditions, - source_terms = source_terms_poisson_nonperiodic) -semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver, - boundary_conditions = boundary_conditions, - source_terms = source_terms_poisson_nonperiodic) +tspan = (0.0, 1.0) -tspan = (0.0, 5.0) +ode = semidiscretize(semi, tspan) +u_ode = copy(ode.u0) +du_ode = similar(u_ode) # Get CPU data +t = 0.0 (; mesh, equations, initial_condition, boundary_conditions, source_terms, solver, cache) = semi +u = Trixi.wrap_array(u_ode, mesh, equations, solver, cache) +du = Trixi.wrap_array(du_ode, mesh, equations, solver, cache) # Get GPU data -mesh_gpu, solver_gpu, cache_gpu = semi_gpu.mesh, semi_gpu.solver, semi_gpu.cache +t_gpu = 0.0 equations_gpu = semi_gpu.equations +mesh_gpu, solver_gpu, cache_gpu = semi_gpu.mesh, semi_gpu.solver, semi_gpu.cache initial_condition_gpu = semi_gpu.initial_condition boundary_conditions_gpu = semi_gpu.boundary_conditions source_terms_gpu = semi_gpu.source_terms - -# Set initial time -t = t_gpu = 0.0 - -# Get initial data -ode = semidiscretize(semi, tspan) -u_ode = copy(ode.u0) -du_ode = similar(u_ode) -u = Trixi.wrap_array(u_ode, mesh, equations, solver, cache) -du = Trixi.wrap_array(du_ode, mesh, equations, solver, cache) +u_gpu = CuArray(u) +du_gpu = CuArray(du) # Copy data to device du_gpu, u_gpu = TrixiCUDA.copy_to_gpu!(du, u) # Reset data on host Trixi.reset_du!(du, solver, cache) -# Test `cuda_volume_integral!` -TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu, - Trixi.have_nonconservative_terms(equations_gpu), - equations_gpu, solver_gpu.volume_integral, solver_gpu, - cache_gpu) -Trixi.calc_volume_integral!(du, u, mesh, Trixi.have_nonconservative_terms(equations), - equations, solver.volume_integral, solver, cache) -@test_approx du_gpu ≈ du - -# Test `cuda_prolong2interfaces!` -TrixiCUDA.cuda_prolong2interfaces!(u_gpu, mesh_gpu, equations_gpu, cache_gpu) -Trixi.prolong2interfaces!(cache, u, mesh, equations, solver.surface_integral, solver) -@test_approx cache_gpu.interfaces.u ≈ cache.interfaces.u - -# Test `cuda_interface_flux!` -TrixiCUDA.cuda_interface_flux!(mesh_gpu, Trixi.have_nonconservative_terms(equations_gpu), - equations_gpu, solver_gpu, cache_gpu) -Trixi.calc_interface_flux!(cache.elements.surface_flux_values, mesh, - Trixi.have_nonconservative_terms(equations), equations, - solver.surface_integral, solver, cache) -@test_approx cache_gpu.elements.surface_flux_values ≈ cache.elements.surface_flux_values - -# Test `cuda_prolong2boundaries!` -TrixiCUDA.cuda_prolong2boundaries!(u_gpu, mesh_gpu, boundary_conditions_gpu, equations_gpu, - cache_gpu) -Trixi.prolong2boundaries!(cache, u, mesh, equations, solver.surface_integral, solver) -@test_approx cache_gpu.boundaries.u ≈ cache.boundaries.u - -# Test `cuda_boundary_flux!` -TrixiCUDA.cuda_boundary_flux!(t_gpu, mesh_gpu, boundary_conditions_gpu, - Trixi.have_nonconservative_terms(equations_gpu), equations_gpu, - solver_gpu, cache_gpu) -Trixi.calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations, - solver.surface_integral, solver) -@test_approx cache_gpu.elements.surface_flux_values ≈ cache.elements.surface_flux_values +derivative_dhat = CuArray{Float64}(solver_gpu.basis.derivative_dhat) +flux_arr1 = similar(u_gpu) +flux_arr2 = similar(u_gpu) +flux_arr3 = similar(u_gpu) -# Test `cuda_surface_integral!` -TrixiCUDA.cuda_surface_integral!(du_gpu, mesh_gpu, equations_gpu, solver_gpu, cache_gpu) -Trixi.calc_surface_integral!(du, u, mesh, equations, solver.surface_integral, solver, cache) -@test_approx du_gpu ≈ du +size_arr = CuArray{Float64}(undef, size(u, 2)^3, size(u, 5)) -# Test `cuda_jacobian!` -TrixiCUDA.cuda_jacobian!(du_gpu, mesh_gpu, equations_gpu, cache_gpu) -Trixi.apply_jacobian!(du, mesh, equations, solver, cache) -@test_approx du_gpu ≈ du +flux_kernel = @cuda launch=false TrixiCUDA.flux_kernel!(flux_arr1, flux_arr2, flux_arr3, u_gpu, + equations, + flux) -# Test `cuda_sources!` -TrixiCUDA.cuda_sources!(du_gpu, u_gpu, t_gpu, source_terms_gpu, equations_gpu, cache_gpu) -Trixi.calc_sources!(du, u, t, source_terms, equations, solver, cache) -@test_approx du_gpu ≈ du +config = launch_configuration(flux_kernel.fun)