From 774b556601707bc0303651993558b233365a4499 Mon Sep 17 00:00:00 2001 From: huiyuxie Date: Thu, 3 Oct 2024 23:46:52 -1000 Subject: [PATCH] Complete 3D --- src/auxiliary/configurators.jl | 37 +++--- src/solvers/dg_2d.jl | 2 +- src/solvers/dg_3d.jl | 201 ++++++++++++++++----------------- test/runtests.jl | 4 +- 4 files changed, 117 insertions(+), 127 deletions(-) diff --git a/src/auxiliary/configurators.jl b/src/auxiliary/configurators.jl index d831053..c2e04a7 100644 --- a/src/auxiliary/configurators.jl +++ b/src/auxiliary/configurators.jl @@ -57,29 +57,30 @@ function kernel_configurator_3d(kernel::HostKernel, x::Int, y::Int, z::Int) end # Deprecated old kernel configurators below -function configurator_1d(kernel::HostKernel, array::CuArray{<:Any, 1}) - config = launch_configuration(kernel.fun) - threads = min(length(array), config.threads) - blocks = cld(length(array), threads) +# function configurator_1d(kernel::HostKernel, array::CuArray{<:Any, 1}) +# config = launch_configuration(kernel.fun) - return (threads = threads, blocks = blocks) -end +# threads = min(length(array), config.threads) +# blocks = cld(length(array), threads) -function configurator_2d(kernel::HostKernel, array::CuArray{<:Any, 2}) - config = launch_configuration(kernel.fun) +# return (threads = threads, blocks = blocks) +# end - threads = Tuple(fill(Int(floor((min(maximum(size(array)), config.threads))^(1 / 2))), 2)) - blocks = map(cld, size(array), threads) +# function configurator_2d(kernel::HostKernel, array::CuArray{<:Any, 2}) +# config = launch_configuration(kernel.fun) - return (threads = threads, blocks = blocks) -end +# threads = Tuple(fill(Int(floor((min(maximum(size(array)), config.threads))^(1 / 2))), 2)) +# blocks = map(cld, size(array), threads) -function configurator_3d(kernel::HostKernel, array::CuArray{<:Any, 3}) - config = launch_configuration(kernel.fun) +# return (threads = threads, blocks = blocks) +# end - threads = Tuple(fill(Int(floor((min(maximum(size(array)), config.threads))^(1 / 3))), 3)) - blocks = map(cld, size(array), threads) +# function configurator_3d(kernel::HostKernel, array::CuArray{<:Any, 3}) +# config = launch_configuration(kernel.fun) - return (threads = threads, blocks = blocks) -end +# threads = Tuple(fill(Int(floor((min(maximum(size(array)), config.threads))^(1 / 3))), 3)) +# blocks = map(cld, size(array), threads) + +# return (threads = threads, blocks = blocks) +# end diff --git a/src/solvers/dg_2d.jl b/src/solvers/dg_2d.jl index 679683c..c01c251 100644 --- a/src/solvers/dg_2d.jl +++ b/src/solvers/dg_2d.jl @@ -1536,7 +1536,7 @@ function cuda_prolong2mortars!(u, mesh::TreeMesh{2}, cache_mortars::True, dg::DG orientations) prolong_mortars_large2small_kernel(u_upper, u_lower, u, forward_upper, forward_lower, neighbor_ids, large_sides, orientations; - kernel_configurator_3d(prolong_mortars_small2small_kernel, + kernel_configurator_3d(prolong_mortars_large2small_kernel, size(u_upper, 2), size(u_upper, 3), size(u_upper, 4))...) diff --git a/src/solvers/dg_3d.jl b/src/solvers/dg_3d.jl index 869e378..7fbf3b1 100644 --- a/src/solvers/dg_3d.jl +++ b/src/solvers/dg_3d.jl @@ -1386,19 +1386,16 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms, flux_arr2 = similar(u) flux_arr3 = similar(u) - size_arr = CuArray{Float64}(undef, size(u, 2)^3, size(u, 5)) - flux_kernel = @cuda launch=false flux_kernel!(flux_arr1, flux_arr2, flux_arr3, u, equations, flux) flux_kernel(flux_arr1, flux_arr2, flux_arr3, u, equations, flux; - configurator_2d(flux_kernel, size_arr)...) - - size_arr = CuArray{Float64}(undef, size(du, 1), size(du, 2)^3, size(du, 5)) + kernel_configurator_2d(flux_kernel, size(u, 2)^3, size(u, 5))...) weak_form_kernel = @cuda launch=false weak_form_kernel!(du, derivative_dhat, flux_arr1, flux_arr2, flux_arr3) weak_form_kernel(du, derivative_dhat, flux_arr1, flux_arr2, flux_arr3; - configurator_3d(weak_form_kernel, size_arr)...) + kernel_configurator_3d(weak_form_kernel, size(du, 1), size(du, 2)^3, + size(du, 5))...) return nothing end @@ -1419,16 +1416,12 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms:: volume_flux_arr3 = CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2), size(u, 2), size(u, 5)) - size_arr = CuArray{Float64}(undef, size(u, 2)^4, size(u, 5)) - volume_flux_kernel = @cuda launch=false volume_flux_kernel!(volume_flux_arr1, volume_flux_arr2, volume_flux_arr3, u, equations, volume_flux) volume_flux_kernel(volume_flux_arr1, volume_flux_arr2, volume_flux_arr3, u, equations, volume_flux; - configurator_2d(volume_flux_kernel, size_arr)...) - - size_arr = CuArray{Float64}(undef, size(du, 1), size(du, 2)^3, size(du, 5)) + kernel_configurator_2d(volume_flux_kernel, size(u, 2)^4, size(u, 5))...) volume_integral_kernel = @cuda launch=false volume_integral_kernel!(du, derivative_split, volume_flux_arr1, @@ -1436,7 +1429,8 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms:: volume_flux_arr3, equations) volume_integral_kernel(du, derivative_split, volume_flux_arr1, volume_flux_arr2, volume_flux_arr3, equations; - configurator_3d(volume_integral_kernel, size_arr)...) + kernel_configurator_3d(volume_integral_kernel, size(du, 1), + size(du, 2)^3, size(du, 5))...) return nothing end @@ -1463,8 +1457,6 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms:: noncons_flux_arr3 = CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2), size(u, 2), size(u, 5)) - size_arr = CuArray{Float64}(undef, size(u, 2)^4, size(u, 5)) - symmetric_noncons_flux_kernel = @cuda launch=false symmetric_noncons_flux_kernel!(symmetric_flux_arr1, symmetric_flux_arr2, symmetric_flux_arr3, @@ -1479,10 +1471,10 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms:: symmetric_noncons_flux_kernel(symmetric_flux_arr1, symmetric_flux_arr2, symmetric_flux_arr3, noncons_flux_arr1, noncons_flux_arr2, noncons_flux_arr3, u, derivative_split, equations, symmetric_flux, nonconservative_flux; - configurator_2d(symmetric_noncons_flux_kernel, size_arr)...) + kernel_configurator_2d(symmetric_noncons_flux_kernel, + size(u, 2)^4, size(u, 5))...) derivative_split = CuArray{Float64}(dg.basis.derivative_split) # use original `derivative_split` - size_arr = CuArray{Float64}(undef, size(du, 1), size(du, 2)^3, size(du, 5)) volume_integral_kernel = @cuda launch=false volume_integral_kernel!(du, derivative_split, symmetric_flux_arr1, @@ -1494,7 +1486,8 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms:: volume_integral_kernel(du, derivative_split, symmetric_flux_arr1, symmetric_flux_arr2, symmetric_flux_arr3, noncons_flux_arr1, noncons_flux_arr2, noncons_flux_arr3; - configurator_3d(volume_integral_kernel, size_arr)...) + kernel_configurator_3d(volume_integral_kernel, size(du, 1), + size(du, 2)^3, size(du, 5))...) return nothing end @@ -1514,7 +1507,6 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms:: # For `Float64`, this gives 1.8189894035458565e-12 # For `Float32`, this gives 1.1920929f-5 atol = 1.8189894035458565e-12 # see also `pure_and_blended_element_ids!` in Trixi.jl - element_ids_dg = CUDA.zeros(Int, length(alpha)) element_ids_dgfv = CUDA.zeros(Int, length(alpha)) @@ -1523,7 +1515,8 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms:: alpha, atol) pure_blended_element_count_kernel(element_ids_dg, element_ids_dgfv, alpha, atol; - configurator_1d(pure_blended_element_count_kernel, alpha)...) + kernel_configurator_1d(pure_blended_element_count_kernel, + length(alpha))...) derivative_split = dg.basis.derivative_split set_diagonal_to_zero!(derivative_split) # temporarily set here, maybe move outside `rhs!` @@ -1544,8 +1537,6 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms:: fstar3_L = cache.fstar3_L fstar3_R = cache.fstar3_R - size_arr = CuArray{Float64}(undef, size(u, 2)^4, size(u, 5)) - volume_flux_dgfv_kernel = @cuda launch=false volume_flux_dgfv_kernel!(volume_flux_arr1, volume_flux_arr2, volume_flux_arr3, @@ -1560,9 +1551,8 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms:: volume_flux_dgfv_kernel(volume_flux_arr1, volume_flux_arr2, volume_flux_arr3, fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, u, element_ids_dgfv, equations, volume_flux_dg, volume_flux_fv; - configurator_2d(volume_flux_dgfv_kernel, size_arr)...) - - size_arr = CuArray{Float64}(undef, size(du, 1), size(du, 2)^3, size(du, 5)) + kernel_configurator_2d(volume_flux_dgfv_kernel, size(u, 2)^4, + size(u, 5))...) volume_integral_dg_kernel = @cuda launch=false volume_integral_dg_kernel!(du, element_ids_dg, element_ids_dgfv, @@ -1574,9 +1564,8 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms:: equations) volume_integral_dg_kernel(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split, volume_flux_arr1, volume_flux_arr2, volume_flux_arr3, equations; - configurator_3d(volume_integral_dg_kernel, size_arr)...) - - size_arr = CuArray{Float64}(undef, size(u, 2)^3, size(u, 5)) + kernel_configurator_3d(volume_integral_dg_kernel, size(du, 1), + size(du, 2)^3, size(du, 5))...) volume_integral_fv_kernel = @cuda launch=false volume_integral_fv_kernel!(du, fstar1_L, fstar1_R, @@ -1587,7 +1576,8 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms:: alpha) volume_integral_fv_kernel(du, fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, inverse_weights, element_ids_dgfv, alpha; - configurator_2d(volume_integral_fv_kernel, size_arr)...) + kernel_configurator_2d(volume_integral_fv_kernel, size(u, 2)^3, + size(u, 5))...) return nothing end @@ -1607,7 +1597,6 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms:: # For `Float64`, this gives 1.8189894035458565e-12 # For `Float32`, this gives 1.1920929f-5 atol = 1.8189894035458565e-12 # see also `pure_and_blended_element_ids!` in Trixi.jl - element_ids_dg = CUDA.zeros(Int, length(alpha)) element_ids_dgfv = CUDA.zeros(Int, length(alpha)) @@ -1616,7 +1605,8 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms:: alpha, atol) pure_blended_element_count_kernel(element_ids_dg, element_ids_dgfv, alpha, atol; - configurator_1d(pure_blended_element_count_kernel, alpha)...) + kernel_configurator_1d(pure_blended_element_count_kernel, + length(alpha))...) derivative_split = dg.basis.derivative_split set_diagonal_to_zero!(derivative_split) # temporarily set here, maybe move outside `rhs!` @@ -1643,8 +1633,6 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms:: fstar3_L = cache.fstar3_L fstar3_R = cache.fstar3_R - size_arr = CuArray{Float64}(undef, size(u, 2)^4, size(u, 5)) - volume_flux_dgfv_kernel = @cuda launch=false volume_flux_dgfv_kernel!(volume_flux_arr1, volume_flux_arr2, volume_flux_arr3, @@ -1666,12 +1654,11 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms:: fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, u, element_ids_dgfv, derivative_split, equations, volume_flux_dg, nonconservative_flux_dg, volume_flux_fv, nonconservative_flux_fv; - configurator_2d(volume_flux_dgfv_kernel, size_arr)...) + kernel_configurator_2d(volume_flux_dgfv_kernel, size(u, 2)^4, + size(u, 5))...) derivative_split = CuArray{Float64}(dg.basis.derivative_split) # use original `derivative_split` - size_arr = CuArray{Float64}(undef, size(du, 1), size(du, 2)^3, size(du, 5)) - volume_integral_dg_kernel = @cuda launch=false volume_integral_dg_kernel!(du, element_ids_dg, element_ids_dgfv, alpha, @@ -1686,9 +1673,8 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms:: volume_integral_dg_kernel(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split, volume_flux_arr1, volume_flux_arr2, volume_flux_arr3, noncons_flux_arr1, noncons_flux_arr2, noncons_flux_arr3, equations; - configurator_3d(volume_integral_dg_kernel, size_arr)...) - - size_arr = CuArray{Float64}(undef, size(u, 2)^3, size(u, 5)) + kernel_configurator_3d(volume_integral_dg_kernel, size(du, 1), + size(du, 2)^3, size(du, 5))...) volume_integral_fv_kernel = @cuda launch=false volume_integral_fv_kernel!(du, fstar1_L, fstar1_R, @@ -1699,7 +1685,8 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms:: alpha) volume_integral_fv_kernel(du, fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, inverse_weights, element_ids_dgfv, alpha; - configurator_2d(volume_integral_fv_kernel, size_arr)...) + kernel_configurator_2d(volume_integral_fv_kernel, size(u, 2)^3, + size(u, 5))...) return nothing end @@ -1710,15 +1697,15 @@ function cuda_prolong2interfaces!(u, mesh::TreeMesh{3}, equations, cache) orientations = cache.interfaces.orientations interfaces_u = cache.interfaces.u - size_arr = CuArray{Float64}(undef, size(interfaces_u, 2) * size(interfaces_u, 3)^2, - size(interfaces_u, 5)) - prolong_interfaces_kernel = @cuda launch=false prolong_interfaces_kernel!(interfaces_u, u, neighbor_ids, orientations, equations) prolong_interfaces_kernel(interfaces_u, u, neighbor_ids, orientations, equations; - configurator_2d(prolong_interfaces_kernel, size_arr)...) + kernel_configurator_2d(prolong_interfaces_kernel, + size(interfaces_u, 2) * + size(interfaces_u, 3)^2, + size(interfaces_u, 5))...) return nothing end @@ -1732,19 +1719,15 @@ function cuda_interface_flux!(mesh::TreeMesh{3}, nonconservative_terms::False, e orientations = cache.interfaces.orientations interfaces_u = cache.interfaces.u surface_flux_values = cache.elements.surface_flux_values - surface_flux_arr = CuArray{Float64}(undef, size(interfaces_u)[2:end]...) - size_arr = CuArray{Float64}(undef, size(interfaces_u, 3), size(interfaces_u, 4), - size(interfaces_u, 5)) surface_flux_kernel = @cuda launch=false surface_flux_kernel!(surface_flux_arr, interfaces_u, orientations, equations, surface_flux) surface_flux_kernel(surface_flux_arr, interfaces_u, orientations, equations, surface_flux; - configurator_3d(surface_flux_kernel, size_arr)...) - - size_arr = CuArray{Float64}(undef, size(surface_flux_values, 1), size(interfaces_u, 3)^2, - size(interfaces_u, 5)) + kernel_configurator_3d(surface_flux_kernel, size(interfaces_u, 3), + size(interfaces_u, 4), + size(interfaces_u, 5))...) interface_flux_kernel = @cuda launch=false interface_flux_kernel!(surface_flux_values, surface_flux_arr, @@ -1752,7 +1735,10 @@ function cuda_interface_flux!(mesh::TreeMesh{3}, nonconservative_terms::False, e equations) interface_flux_kernel(surface_flux_values, surface_flux_arr, neighbor_ids, orientations, equations; - configurator_3d(interface_flux_kernel, size_arr)...) + kernel_configurator_3d(interface_flux_kernel, + size(surface_flux_values, 1), + size(interfaces_u, 3)^2, + size(interfaces_u, 5))...) return nothing end @@ -1771,9 +1757,6 @@ function cuda_interface_flux!(mesh::TreeMesh{3}, nonconservative_terms::True, eq noncons_left_arr = CuArray{Float64}(undef, size(interfaces_u)[2:end]...) noncons_right_arr = CuArray{Float64}(undef, size(interfaces_u)[2:end]...) - size_arr = CuArray{Float64}(undef, size(interfaces_u, 3), size(interfaces_u, 4), - size(interfaces_u, 5)) - surface_noncons_flux_kernel = @cuda launch=false surface_noncons_flux_kernel!(surface_flux_arr, noncons_left_arr, noncons_right_arr, @@ -1784,10 +1767,10 @@ function cuda_interface_flux!(mesh::TreeMesh{3}, nonconservative_terms::True, eq nonconservative_flux) surface_noncons_flux_kernel(surface_flux_arr, noncons_left_arr, noncons_right_arr, interfaces_u, orientations, equations, surface_flux, nonconservative_flux; - configurator_3d(surface_noncons_flux_kernel, size_arr)...) - - size_arr = CuArray{Float64}(undef, size(surface_flux_values, 1), size(interfaces_u, 3)^2, - size(interfaces_u, 5)) + kernel_configurator_3d(surface_noncons_flux_kernel, + size(interfaces_u, 3), + size(interfaces_u, 4), + size(interfaces_u, 5))...) interface_flux_kernel = @cuda launch=false interface_flux_kernel!(surface_flux_values, surface_flux_arr, @@ -1798,7 +1781,10 @@ function cuda_interface_flux!(mesh::TreeMesh{3}, nonconservative_terms::True, eq interface_flux_kernel(surface_flux_values, surface_flux_arr, noncons_left_arr, noncons_right_arr, neighbor_ids, orientations, equations; - configurator_3d(interface_flux_kernel, size_arr)...) + kernel_configurator_3d(interface_flux_kernel, + size(surface_flux_values, 1), + size(interfaces_u, 3)^2, + size(interfaces_u, 5))...) return nothing end @@ -1817,9 +1803,6 @@ function cuda_prolong2boundaries!(u, mesh::TreeMesh{3}, boundary_conditions::Nam orientations = cache.boundaries.orientations boundaries_u = cache.boundaries.u - size_arr = CuArray{Float64}(undef, size(boundaries_u, 2) * size(boundaries_u, 3)^2, - size(boundaries_u, 5)) - prolong_boundaries_kernel = @cuda launch=false prolong_boundaries_kernel!(boundaries_u, u, neighbor_ids, neighbor_sides, @@ -1827,7 +1810,10 @@ function cuda_prolong2boundaries!(u, mesh::TreeMesh{3}, boundary_conditions::Nam equations) prolong_boundaries_kernel(boundaries_u, u, neighbor_ids, neighbor_sides, orientations, equations; - configurator_2d(prolong_boundaries_kernel, size_arr)...) + kernel_configurator_2d(prolong_boundaries_kernel, + size(boundaries_u, 2) * + size(boundaries_u, 3)^2, + size(boundaries_u, 5))...) return nothing end @@ -1859,13 +1845,11 @@ function cuda_boundary_flux!(t, mesh::TreeMesh{3}, boundary_conditions::NamedTup last_first_indices_kernel = @cuda launch=false last_first_indices_kernel!(lasts, firsts, n_boundaries_per_direction) last_first_indices_kernel(lasts, firsts, n_boundaries_per_direction; - configurator_1d(last_first_indices_kernel, lasts)...) + kernel_configurator_1d(last_first_indices_kernel, length(lasts))...) - indices_arr = firsts boundary_arr = CuArray{Int}(Array(firsts)[1]:Array(lasts)[end]) - + indices_arr = firsts boundary_conditions_callable = replace_boundary_conditions(boundary_conditions) - size_arr = CuArray{Float64}(undef, size(surface_flux_values, 2)^2, length(boundary_arr)) boundary_flux_kernel = @cuda launch=false boundary_flux_kernel!(surface_flux_values, boundaries_u, node_coordinates, @@ -1877,7 +1861,9 @@ function cuda_boundary_flux!(t, mesh::TreeMesh{3}, boundary_conditions::NamedTup boundary_flux_kernel(surface_flux_values, boundaries_u, node_coordinates, t, boundary_arr, indices_arr, neighbor_ids, neighbor_sides, orientations, boundary_conditions_callable, equations, surface_flux; - configurator_2d(boundary_flux_kernel, size_arr)...) + kernel_configurator_2d(boundary_flux_kernel, + size(surface_flux_values, 2)^2, + length(boundary_arr))...) return nothing end @@ -1898,13 +1884,9 @@ function cuda_prolong2mortars!(u, mesh::TreeMesh{3}, cache_mortars::True, dg::DG u_upper_right = cache.mortars.u_upper_right u_lower_left = cache.mortars.u_lower_left u_lower_right = cache.mortars.u_lower_right - forward_upper = CuArray{Float64}(dg.mortar.forward_upper) forward_lower = CuArray{Float64}(dg.mortar.forward_lower) - size_arr = CuArray{Float64}(undef, size(u_upper_left, 2), size(u_upper_left, 3)^2, - size(u_upper_left, 5)) - prolong_mortars_small2small_kernel = @cuda launch=false prolong_mortars_small2small_kernel!(u_upper_left, u_upper_right, u_lower_left, @@ -1915,8 +1897,11 @@ function cuda_prolong2mortars!(u, mesh::TreeMesh{3}, cache_mortars::True, dg::DG orientations) prolong_mortars_small2small_kernel(u_upper_left, u_upper_right, u_lower_left, u_lower_right, u, neighbor_ids, large_sides, orientations; - configurator_3d(prolong_mortars_small2small_kernel, - size_arr)...) + kernel_configurator_3d(prolong_mortars_small2small_kernel, + size(u_upper_left, 2), + size(u_upper_left, 3)^2, + size(u_upper_left, 5))...) + tmp_upper_left = zero(similar(u_upper_left)) # undef to zero tmp_upper_right = zero(similar(u_upper_right)) # undef to zero tmp_lower_left = zero(similar(u_lower_left)) # undef to zero @@ -1936,8 +1921,10 @@ function cuda_prolong2mortars!(u, mesh::TreeMesh{3}, cache_mortars::True, dg::DG prolong_mortars_large2small_kernel(tmp_upper_left, tmp_upper_right, tmp_lower_left, tmp_lower_right, u, forward_upper, forward_lower, neighbor_ids, large_sides, orientations; - configurator_3d(prolong_mortars_large2small_kernel, - size_arr)...) + kernel_configurator_3d(prolong_mortars_large2small_kernel, + size(u_upper_left, 2), + size(u_upper_left, 3)^2, + size(u_upper_left, 5))...) prolong_mortars_large2small_kernel = @cuda launch=false prolong_mortars_large2small_kernel!(u_upper_left, u_upper_right, @@ -1954,8 +1941,10 @@ function cuda_prolong2mortars!(u, mesh::TreeMesh{3}, cache_mortars::True, dg::DG tmp_upper_left, tmp_upper_right, tmp_lower_left, tmp_lower_right, forward_upper, forward_lower, large_sides; - configurator_3d(prolong_mortars_large2small_kernel, - size_arr)...) + kernel_configurator_3d(prolong_mortars_large2small_kernel, + size(u_upper_left, 2), + size(u_upper_left, 3)^2, + size(u_upper_left, 5))...) return nothing end @@ -1991,9 +1980,6 @@ function cuda_mortar_flux!(mesh::TreeMesh{3}, cache_mortars::True, nonconservati fstar_lower_left = cache.fstar_lower_left fstar_lower_right = cache.fstar_lower_right - size_arr = CuArray{Float64}(undef, size(u_upper_left, 3), size(u_upper_left, 4), - length(orientations)) - mortar_flux_kernel = @cuda launch=false mortar_flux_kernel!(fstar_upper_left, fstar_upper_right, fstar_lower_left, fstar_lower_right, u_upper_left, u_upper_right, @@ -2003,16 +1989,15 @@ function cuda_mortar_flux!(mesh::TreeMesh{3}, cache_mortars::True, nonconservati mortar_flux_kernel(fstar_upper_left, fstar_upper_right, fstar_lower_left, fstar_lower_right, u_upper_left, u_upper_right, u_lower_left, u_lower_right, orientations, equations, surface_flux; - configurator_3d(mortar_flux_kernel, size_arr)...) + kernel_configurator_3d(mortar_flux_kernel, size(u_upper_left, 3), + size(u_upper_left, 4), + length(orientations))...) tmp_upper_left = zero(similar(surface_flux_values)) # undef to zero tmp_upper_right = zero(similar(surface_flux_values)) # undef to zero tmp_lower_left = zero(similar(surface_flux_values)) # undef to zero tmp_lower_right = zero(similar(surface_flux_values)) # undef to zero - size_arr = CuArray{Float64}(undef, size(surface_flux_values, 1), size(surface_flux_values, 2)^2, - length(orientations)) - # TODO: Combine these two kernels into one (synchronization) mortar_flux_copy_to_kernel = @cuda launch=false mortar_flux_copy_to_kernel!(surface_flux_values, tmp_upper_left, @@ -2032,7 +2017,10 @@ function cuda_mortar_flux!(mesh::TreeMesh{3}, cache_mortars::True, nonconservati tmp_lower_right, fstar_upper_left, fstar_upper_right, fstar_lower_left, fstar_lower_right, reverse_upper, reverse_lower, neighbor_ids, large_sides, orientations; - configurator_3d(mortar_flux_copy_to_kernel, size_arr)...) + kernel_configurator_3d(mortar_flux_copy_to_kernel, + size(surface_flux_values, 1), + size(surface_flux_values, 2)^2, + length(orientations))...) mortar_flux_copy_to_kernel = @cuda launch=false mortar_flux_copy_to_kernel!(surface_flux_values, tmp_surface_flux_values, @@ -2048,7 +2036,10 @@ function cuda_mortar_flux!(mesh::TreeMesh{3}, cache_mortars::True, nonconservati mortar_flux_copy_to_kernel(surface_flux_values, tmp_surface_flux_values, tmp_upper_left, tmp_upper_right, tmp_lower_left, tmp_lower_right, reverse_upper, reverse_lower, neighbor_ids, large_sides, orientations; - configurator_3d(mortar_flux_copy_to_kernel, size_arr)...) + kernel_configurator_3d(mortar_flux_copy_to_kernel, + size(surface_flux_values, 1), + size(surface_flux_values, 2)^2, + length(orientations))...) return nothing end @@ -2072,15 +2063,11 @@ function cuda_mortar_flux!(mesh::TreeMesh{3}, cache_mortars::True, nonconservati surface_flux_values = cache.elements.surface_flux_values tmp_surface_flux_values = zero(similar(surface_flux_values)) # undef to zero - fstar_upper_left = cache.fstar_upper_left fstar_upper_right = cache.fstar_upper_right fstar_lower_left = cache.fstar_lower_left fstar_lower_right = cache.fstar_lower_right - size_arr = CuArray{Float64}(undef, size(u_upper_left, 3), size(u_upper_left, 4), - length(orientations)) - mortar_flux_kernel = @cuda launch=false mortar_flux_kernel!(fstar_upper_left, fstar_upper_right, fstar_lower_left, fstar_lower_right, u_upper_left, u_upper_right, @@ -2091,16 +2078,15 @@ function cuda_mortar_flux!(mesh::TreeMesh{3}, cache_mortars::True, nonconservati mortar_flux_kernel(fstar_upper_left, fstar_upper_right, fstar_lower_left, fstar_lower_right, u_upper_left, u_upper_right, u_lower_left, u_lower_right, orientations, large_sides, equations, surface_flux, nonconservative_flux; - configurator_3d(mortar_flux_kernel, size_arr)...) + kernel_configurator_3d(mortar_flux_kernel, size(u_upper_left, 3), + size(u_upper_left, 4), + length(orientations))...) tmp_upper_left = zero(similar(surface_flux_values)) # undef to zero tmp_upper_right = zero(similar(surface_flux_values)) # undef to zero tmp_lower_left = zero(similar(surface_flux_values)) # undef to zero tmp_lower_right = zero(similar(surface_flux_values)) # undef to zero - size_arr = CuArray{Float64}(undef, size(surface_flux_values, 1), size(surface_flux_values, 2)^2, - length(orientations)) - # TODO: Combine these two kernels into one (synchronization) mortar_flux_copy_to_kernel = @cuda launch=false mortar_flux_copy_to_kernel!(surface_flux_values, tmp_upper_left, @@ -2120,7 +2106,10 @@ function cuda_mortar_flux!(mesh::TreeMesh{3}, cache_mortars::True, nonconservati tmp_lower_right, fstar_upper_left, fstar_upper_right, fstar_lower_left, fstar_lower_right, reverse_upper, reverse_lower, neighbor_ids, large_sides, orientations; - configurator_3d(mortar_flux_copy_to_kernel, size_arr)...) + kernel_configurator_3d(mortar_flux_copy_to_kernel, + size(surface_flux_values, 1), + size(surface_flux_values, 2)^2, + length(orientations))...) mortar_flux_copy_to_kernel = @cuda launch=false mortar_flux_copy_to_kernel!(surface_flux_values, tmp_surface_flux_values, @@ -2136,7 +2125,10 @@ function cuda_mortar_flux!(mesh::TreeMesh{3}, cache_mortars::True, nonconservati mortar_flux_copy_to_kernel(surface_flux_values, tmp_surface_flux_values, tmp_upper_left, tmp_upper_right, tmp_lower_left, tmp_lower_right, reverse_upper, reverse_lower, neighbor_ids, large_sides, orientations; - configurator_3d(mortar_flux_copy_to_kernel, size_arr)...) + kernel_configurator_3d(mortar_flux_copy_to_kernel, + size(surface_flux_values, 1), + size(surface_flux_values, 2)^2, + length(orientations))...) return nothing end @@ -2149,13 +2141,12 @@ function cuda_surface_integral!(du, mesh::TreeMesh{3}, equations, dg::DGSEM, cac ]) surface_flux_values = cache.elements.surface_flux_values - size_arr = CuArray{Float64}(undef, size(du, 1), size(du, 2)^3, size(du, 5)) - surface_integral_kernel = @cuda launch=false surface_integral_kernel!(du, factor_arr, surface_flux_values, equations) surface_integral_kernel(du, factor_arr, surface_flux_values, equations; - configurator_3d(surface_integral_kernel, size_arr)...) + kernel_configurator_3d(surface_integral_kernel, size(du, 1), + size(du, 2)^3, size(du, 5))...) return nothing end @@ -2164,10 +2155,10 @@ end function cuda_jacobian!(du, mesh::TreeMesh{3}, equations, cache) inverse_jacobian = cache.elements.inverse_jacobian - size_arr = CuArray{Float64}(undef, size(du, 1), size(du, 2)^3, size(du, 5)) - jacobian_kernel = @cuda launch=false jacobian_kernel!(du, inverse_jacobian, equations) - jacobian_kernel(du, inverse_jacobian, equations; configurator_3d(jacobian_kernel, size_arr)...) + jacobian_kernel(du, inverse_jacobian, equations; + kernel_configurator_3d(jacobian_kernel, size(du, 1), size(du, 2)^3, + size(du, 5))...) return nothing end @@ -2181,12 +2172,10 @@ end function cuda_sources!(du, u, t, source_terms, equations::AbstractEquations{3}, cache) node_coordinates = cache.elements.node_coordinates - size_arr = CuArray{Float64}(undef, size(u, 2)^3, size(u, 5)) - source_terms_kernel = @cuda launch=false source_terms_kernel!(du, u, node_coordinates, t, equations, source_terms) source_terms_kernel(du, u, node_coordinates, t, equations, source_terms; - configurator_2d(source_terms_kernel, size_arr)...) + kernel_configurator_2d(source_terms_kernel, size(u, 2)^3, size(u, 5))...) return nothing end diff --git a/test/runtests.jl b/test/runtests.jl index bae96cd..424abfe 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,8 +4,8 @@ using Test: @testset @testset "TrixiCUDA.jl" begin # include("./tree_dgsem_1d/tree_dgsem_1d.jl") - include("./tree_dgsem_2d/tree_dgsem_2d.jl") - # include("./tree_dgsem_3d/tree_dgsem_3d.jl") + # include("./tree_dgsem_2d/tree_dgsem_2d.jl") + include("./tree_dgsem_3d/tree_dgsem_3d.jl") end end # module