diff --git a/Project.toml b/Project.toml index 1fb325b..725773e 100644 --- a/Project.toml +++ b/Project.toml @@ -15,6 +15,7 @@ TrixiBase = "9a0f1c46-06d5-4909-a5a3-ce25d3fa3284" BenchmarkTools = "1" CUDA = "5" StrideArrays = "0.1" +StaticArrays = "1" Trixi = "0.8" TrixiBase = "0.1" julia = "1.10" diff --git a/src/TrixiGPU.jl b/src/TrixiGPU.jl index 0cac011..0028b05 100644 --- a/src/TrixiGPU.jl +++ b/src/TrixiGPU.jl @@ -7,7 +7,8 @@ using CUDA: @cuda, CuArray, HostKernel, threadIdx, blockIdx, blockDim, similar, launch_configuration using Trixi: AbstractEquations, TreeMesh, VolumeIntegralWeakForm, DGSEM, - flux, ntuple, nvariables + flux, ntuple, nvariables, + True, False import Trixi: get_node_vars, get_node_coords, get_surface_node_vars diff --git a/src/solvers/dg_1d.jl b/src/solvers/dg_1d.jl index 5c055e6..a313b39 100644 --- a/src/solvers/dg_1d.jl +++ b/src/solvers/dg_1d.jl @@ -1,9 +1,10 @@ # Everything related to a DG semidiscretization in 1D +# TODO: Please check whether `equations::AbstractEquations{1}` is needed for each function here!! # Functions end with `_kernel` are CUDA kernels that are going to be launed by the `@cuda` macro. -# Kernel for calculating flux along normal direction -function flux_kernel!(flux_arr, u, flux::Function, equations::AbstractEquations{1}) +# Kernel for calculating fluxes along normal direction +function flux_kernel!(flux_arr, u, equations::AbstractEquations{1}, flux::Function) j = (blockIdx().x - 1) * blockDim().x + threadIdx().x k = (blockIdx().y - 1) * blockDim().y + threadIdx().y @@ -22,8 +23,8 @@ function flux_kernel!(flux_arr, u, flux::Function, equations::AbstractEquations{ return nothing end -# kernel for calculating weak form -function weak_form_kernel!(du, derivative_dhat, flux_arr, equations::AbstractEquations{1}) +# Kernel for calculating weak form +function weak_form_kernel!(du, derivative_dhat, flux_arr) i = (blockIdx().x - 1) * blockDim().x + threadIdx().x j = (blockIdx().y - 1) * blockDim().y + threadIdx().y k = (blockIdx().z - 1) * blockDim().z + threadIdx().z @@ -39,6 +40,62 @@ function weak_form_kernel!(du, derivative_dhat, flux_arr, equations::AbstractEqu return nothing end +# Kernel for prolonging two interfaces +function prolong_interfaces_kernel!(interfaces_u, u, neighbor_ids) + j = (blockIdx().x - 1) * blockDim().x + threadIdx().x + k = (blockIdx().y - 1) * blockDim().y + threadIdx().y + + if (j <= size(interfaces_u, 2) && k <= size(interfaces_u, 3)) + left_element = neighbor_ids[1, k] + right_element = neighbor_ids[2, k] + + @inbounds begin + interfaces_u[1, j, k] = u[j, size(u, 2), left_element] + interfaces_u[2, j, k] = u[j, 1, right_element] + end + end + + return nothing +end + +# Kernel for calculating surface fluxes +function surface_flux_kernel!(surface_flux_arr, interfaces_u, + equations::AbstractEquations{1}, surface_flux::Any) + k = (blockIdx().x - 1) * blockDim().x + threadIdx().x + + if (k <= size(surface_flux_arr, 3)) + u_ll, u_rr = get_surface_node_vars(interfaces_u, equations, k) + + surface_flux_node = surface_flux(u_ll, u_rr, 1, equations) + + @inbounds begin + for jj in axes(surface_flux_arr, 2) + surface_flux_arr[1, jj, k] = surface_flux_node[jj] + end + end + end + + return nothing +end + +# Kernel for setting interface fluxes +function interface_flux_kernel!(surface_flux_values, surface_flux_arr, neighbor_ids) + i = (blockIdx().x - 1) * blockDim().x + threadIdx().x + k = (blockIdx().y - 1) * blockDim().y + threadIdx().y + + if (i <= size(surface_flux_values, 1) && k <= size(surface_flux_arr, 3)) + left_id = neighbor_ids[1, k] + right_id = neighbor_ids[2, k] + + @inbounds begin + surface_flux_values[i, 2, left_id] = surface_flux_arr[1, i, k] + surface_flux_values[i, 1, right_id] = surface_flux_arr[1, i, k] + end + end + + return nothing +end + # Functions begin with `cuda_` are the functions that pack CUDA kernels together, # calling Tthem from the host (i.e., CPU) and running them on the device (i.e., GPU). @@ -50,13 +107,61 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms, size_arr = CuArray{Float32}(undef, size(u, 2), size(u, 3)) - flux_kernel = @cuda launch=false flux_kernel!(flux_arr, u, flux, equations) - flux_kernel(flux_arr, u, flux, equations; configurator_2d(flux_kernel, size_arr)...) + flux_kernel = @cuda launch=false flux_kernel!(flux_arr, u, equations, flux) + flux_kernel(flux_arr, u, equations, flux; configurator_2d(flux_kernel, size_arr)...) - weak_form_kernel = @cuda launch=false weak_form_kernel!(du, derivative_dhat, flux_arr, - equations) - weak_form_kernel(du, derivative_dhat, flux_arr, equations; + weak_form_kernel = @cuda launch=false weak_form_kernel!(du, derivative_dhat, flux_arr) + weak_form_kernel(du, derivative_dhat, flux_arr; configurator_3d(weak_form_kernel, du)...,) return nothing end + +# Pack kernels for prolonging solution to interfaces +function cuda_prolong2interfaces!(u, mesh::TreeMesh{1}, cache) + neighbor_ids = CuArray{Int64}(cache.interfaces.neighbor_ids) + interfaces_u = CuArray{Float32}(cache.interfaces.u) + + size_arr = CuArray{Float32}(undef, size(interfaces_u, 2), size(interfaces_u, 3)) + + prolong_interfaces_kernel = @cuda launch=false prolong_interfaces_kernel!(interfaces_u, + u, + neighbor_ids) + prolong_interfaces_kernel(interfaces_u, u, neighbor_ids; + configurator_2d(prolong_interfaces_kernel, size_arr)...,) + + cache.interfaces.u = interfaces_u # copy back to host automatically + + return nothing +end + +# Pack kernels for calculating interface fluxes +function cuda_interface_flux!(mesh::TreeMesh{1}, nonconservative_terms::False, equations, + dg::DGSEM, cache) + surface_flux = dg.surface_integral.surface_flux + + neighbor_ids = CuArray{Int64}(cache.interfaces.neighbor_ids) + interfaces_u = CuArray{Float32}(cache.interfaces.u) + surface_flux_arr = CuArray{Float32}(undef, 1, size(interfaces_u)[2:end]...) + surface_flux_values = CuArray{Float32}(cache.elements.surface_flux_values) + + size_arr = CuArray{Float32}(undef, size(interfaces_u, 3)) + + surface_flux_kernel = @cuda launch=false surface_flux_kernel!(surface_flux_arr, + interfaces_u, equations, + surface_flux) + surface_flux_kernel(surface_flux_arr, interfaces_u, equations, surface_flux; + configurator_1d(surface_flux_kernel, size_arr)...,) + + size_arr = CuArray{Float32}(undef, size(surface_flux_values, 1), size(interfaces_u, 3)) + + interface_flux_kernel = @cuda launch=false interface_flux_kernel!(surface_flux_values, + surface_flux_arr, + neighbor_ids) + interface_flux_kernel(surface_flux_values, surface_flux_arr, neighbor_ids; + configurator_2d(interface_flux_kernel, size_arr)...,) + + cache.elements.surface_flux_values = surface_flux_values # copy back to host automatically + + return nothing +end diff --git a/test/test_solvers.jl b/test/test_solvers.jl index 9ecd84a..e276cfe 100644 --- a/test/test_solvers.jl +++ b/test/test_solvers.jl @@ -37,3 +37,8 @@ du, u = TrixiGPU.copy_to_device!(du, u) TrixiGPU.cuda_volume_integral!(du, u, mesh, Trixi.have_nonconservative_terms(equations), equations, solver.volume_integral, solver) + +TrixiGPU.cuda_prolong2interfaces!(u, mesh, cache) + +TrixiGPU.cuda_interface_flux!(mesh, Trixi.have_nonconservative_terms(equations), equations, + solver, cache)