From 766b38fcbe2b31eb17d13ca7c385cd858e6672d3 Mon Sep 17 00:00:00 2001 From: CompatHelper Julia Date: Fri, 9 Aug 2024 01:27:47 +0000 Subject: [PATCH 1/3] CompatHelper: add new compat entry for StaticArrays at version 1, (keep existing compat) --- Project.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/Project.toml b/Project.toml index 3248cf5..9c54acb 100644 --- a/Project.toml +++ b/Project.toml @@ -14,6 +14,7 @@ TrixiBase = "9a0f1c46-06d5-4909-a5a3-ce25d3fa3284" [compat] BenchmarkTools = "1" CUDA = "5" +StaticArrays = "1" Trixi = "0.8" TrixiBase = "0.1" julia = "1.10" From 3593a737a0d1cfad4c6dd373ac2bc17a1f96471b Mon Sep 17 00:00:00 2001 From: huiyuxie Date: Thu, 8 Aug 2024 15:48:58 -1000 Subject: [PATCH 2/3] Partial test passed for simple solver --- src/TrixiGPU.jl | 3 +- src/solvers/dg_1d.jl | 123 +++++++++++++++++++++++++++++++++++++++---- test/test_solvers.jl | 5 ++ 3 files changed, 121 insertions(+), 10 deletions(-) diff --git a/src/TrixiGPU.jl b/src/TrixiGPU.jl index 0cac011..0028b05 100644 --- a/src/TrixiGPU.jl +++ b/src/TrixiGPU.jl @@ -7,7 +7,8 @@ using CUDA: @cuda, CuArray, HostKernel, threadIdx, blockIdx, blockDim, similar, launch_configuration using Trixi: AbstractEquations, TreeMesh, VolumeIntegralWeakForm, DGSEM, - flux, ntuple, nvariables + flux, ntuple, nvariables, + True, False import Trixi: get_node_vars, get_node_coords, get_surface_node_vars diff --git a/src/solvers/dg_1d.jl b/src/solvers/dg_1d.jl index 5c055e6..db08600 100644 --- a/src/solvers/dg_1d.jl +++ b/src/solvers/dg_1d.jl @@ -1,9 +1,10 @@ # Everything related to a DG semidiscretization in 1D +# TODO: Please check whether `equations::AbstractEquations{1}` is needed for each funtion here!! # Functions end with `_kernel` are CUDA kernels that are going to be launed by the `@cuda` macro. -# Kernel for calculating flux along normal direction -function flux_kernel!(flux_arr, u, flux::Function, equations::AbstractEquations{1}) +# Kernel for calculating fluxes along normal direction +function flux_kernel!(flux_arr, u, equations::AbstractEquations{1}, flux::Function) j = (blockIdx().x - 1) * blockDim().x + threadIdx().x k = (blockIdx().y - 1) * blockDim().y + threadIdx().y @@ -22,8 +23,8 @@ function flux_kernel!(flux_arr, u, flux::Function, equations::AbstractEquations{ return nothing end -# kernel for calculating weak form -function weak_form_kernel!(du, derivative_dhat, flux_arr, equations::AbstractEquations{1}) +# Kernel for calculating weak form +function weak_form_kernel!(du, derivative_dhat, flux_arr) i = (blockIdx().x - 1) * blockDim().x + threadIdx().x j = (blockIdx().y - 1) * blockDim().y + threadIdx().y k = (blockIdx().z - 1) * blockDim().z + threadIdx().z @@ -39,6 +40,62 @@ function weak_form_kernel!(du, derivative_dhat, flux_arr, equations::AbstractEqu return nothing end +# Kernel for prolonging two interfaces +function prolong_interfaces_kernel!(interfaces_u, u, neighbor_ids) + j = (blockIdx().x - 1) * blockDim().x + threadIdx().x + k = (blockIdx().y - 1) * blockDim().y + threadIdx().y + + if (j <= size(interfaces_u, 2) && k <= size(interfaces_u, 3)) + left_element = neighbor_ids[1, k] + right_element = neighbor_ids[2, k] + + @inbounds begin + interfaces_u[1, j, k] = u[j, size(u, 2), left_element] + interfaces_u[2, j, k] = u[j, 1, right_element] + end + end + + return nothing +end + +# Kernel for calculating surface fluxes +function surface_flux_kernel!(surface_flux_arr, interfaces_u, + equations::AbstractEquations{1}, surface_flux::Any) + k = (blockIdx().x - 1) * blockDim().x + threadIdx().x + + if (k <= size(surface_flux_arr, 3)) + u_ll, u_rr = get_surface_node_vars(interfaces_u, equations, k) + + surface_flux_node = surface_flux(u_ll, u_rr, 1, equations) + + @inbounds begin + for jj in axes(surface_flux_arr, 2) + surface_flux_arr[1, jj, k] = surface_flux_node[jj] + end + end + end + + return nothing +end + +# Kernel for setting interface fluxes +function interface_flux_kernel!(surface_flux_values, surface_flux_arr, neighbor_ids) + i = (blockIdx().x - 1) * blockDim().x + threadIdx().x + k = (blockIdx().y - 1) * blockDim().y + threadIdx().y + + if (i <= size(surface_flux_values, 1) && k <= size(surface_flux_arr, 3)) + left_id = neighbor_ids[1, k] + right_id = neighbor_ids[2, k] + + @inbounds begin + surface_flux_values[i, 2, left_id] = surface_flux_arr[1, i, k] + surface_flux_values[i, 1, right_id] = surface_flux_arr[1, i, k] + end + end + + return nothing +end + # Functions begin with `cuda_` are the functions that pack CUDA kernels together, # calling Tthem from the host (i.e., CPU) and running them on the device (i.e., GPU). @@ -50,13 +107,61 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms, size_arr = CuArray{Float32}(undef, size(u, 2), size(u, 3)) - flux_kernel = @cuda launch=false flux_kernel!(flux_arr, u, flux, equations) - flux_kernel(flux_arr, u, flux, equations; configurator_2d(flux_kernel, size_arr)...) + flux_kernel = @cuda launch=false flux_kernel!(flux_arr, u, equations, flux) + flux_kernel(flux_arr, u, equations, flux; configurator_2d(flux_kernel, size_arr)...) - weak_form_kernel = @cuda launch=false weak_form_kernel!(du, derivative_dhat, flux_arr, - equations) - weak_form_kernel(du, derivative_dhat, flux_arr, equations; + weak_form_kernel = @cuda launch=false weak_form_kernel!(du, derivative_dhat, flux_arr) + weak_form_kernel(du, derivative_dhat, flux_arr; configurator_3d(weak_form_kernel, du)...,) return nothing end + +# Pack kernels for prolonging solution to interfaces +function cuda_prolong2interfaces!(u, mesh::TreeMesh{1}, cache) + neighbor_ids = CuArray{Int64}(cache.interfaces.neighbor_ids) + interfaces_u = CuArray{Float32}(cache.interfaces.u) + + size_arr = CuArray{Float32}(undef, size(interfaces_u, 2), size(interfaces_u, 3)) + + prolong_interfaces_kernel = @cuda launch=false prolong_interfaces_kernel!(interfaces_u, + u, + neighbor_ids) + prolong_interfaces_kernel(interfaces_u, u, neighbor_ids; + configurator_2d(prolong_interfaces_kernel, size_arr)...,) + + cache.interfaces.u = interfaces_u # copy back to host automatically + + return nothing +end + +# Pack kernels for calculating interface fluxes +function cuda_interface_flux!(mesh::TreeMesh{1}, nonconservative_terms::False, equations, + dg::DGSEM, cache) + surface_flux = dg.surface_integral.surface_flux + + neighbor_ids = CuArray{Int64}(cache.interfaces.neighbor_ids) + interfaces_u = CuArray{Float32}(cache.interfaces.u) + surface_flux_arr = CuArray{Float32}(undef, 1, size(interfaces_u)[2:end]...) + surface_flux_values = CuArray{Float32}(cache.elements.surface_flux_values) + + size_arr = CuArray{Float32}(undef, size(interfaces_u, 3)) + + surface_flux_kernel = @cuda launch=false surface_flux_kernel!(surface_flux_arr, + interfaces_u, equations, + surface_flux) + surface_flux_kernel(surface_flux_arr, interfaces_u, equations, surface_flux; + configurator_1d(surface_flux_kernel, size_arr)...,) + + size_arr = CuArray{Float32}(undef, size(surface_flux_values, 1), size(interfaces_u, 3)) + + interface_flux_kernel = @cuda launch=false interface_flux_kernel!(surface_flux_values, + surface_flux_arr, + neighbor_ids) + interface_flux_kernel(surface_flux_values, surface_flux_arr, neighbor_ids; + configurator_2d(interface_flux_kernel, size_arr)...,) + + cache.elements.surface_flux_values = surface_flux_values # copy back to host automatically + + return nothing +end diff --git a/test/test_solvers.jl b/test/test_solvers.jl index 9ecd84a..e276cfe 100644 --- a/test/test_solvers.jl +++ b/test/test_solvers.jl @@ -37,3 +37,8 @@ du, u = TrixiGPU.copy_to_device!(du, u) TrixiGPU.cuda_volume_integral!(du, u, mesh, Trixi.have_nonconservative_terms(equations), equations, solver.volume_integral, solver) + +TrixiGPU.cuda_prolong2interfaces!(u, mesh, cache) + +TrixiGPU.cuda_interface_flux!(mesh, Trixi.have_nonconservative_terms(equations), equations, + solver, cache) From 814983150108ce4f66f11a2d9d2b2103ea35dec0 Mon Sep 17 00:00:00 2001 From: huiyuxie Date: Thu, 8 Aug 2024 15:51:04 -1000 Subject: [PATCH 3/3] Fix small typo --- src/solvers/dg_1d.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/solvers/dg_1d.jl b/src/solvers/dg_1d.jl index db08600..a313b39 100644 --- a/src/solvers/dg_1d.jl +++ b/src/solvers/dg_1d.jl @@ -1,6 +1,6 @@ # Everything related to a DG semidiscretization in 1D -# TODO: Please check whether `equations::AbstractEquations{1}` is needed for each funtion here!! +# TODO: Please check whether `equations::AbstractEquations{1}` is needed for each function here!! # Functions end with `_kernel` are CUDA kernels that are going to be launed by the `@cuda` macro. # Kernel for calculating fluxes along normal direction