From d86f561bcec3b84df6797bc59aa35487c48da9d7 Mon Sep 17 00:00:00 2001 From: huiyuxie Date: Sun, 25 Aug 2024 15:44:18 -1000 Subject: [PATCH] Add tests for Shallow Water 1D and 2D --- src/solvers/common.jl | 11 +- src/solvers/dg_1d.jl | 186 +++++++++++++++++- src/solvers/dg_2d.jl | 230 +++++++++++++++++++++- src/solvers/dg_3d.jl | 9 +- test/test_script.jl | 136 +++++++++---- test/test_shallowwater_well_balanced.jl | 243 ++++++++++++++++++++++++ 6 files changed, 757 insertions(+), 58 deletions(-) create mode 100644 test/test_shallowwater_well_balanced.jl diff --git a/src/solvers/common.jl b/src/solvers/common.jl index 72fe92e..c4b3fef 100644 --- a/src/solvers/common.jl +++ b/src/solvers/common.jl @@ -18,13 +18,22 @@ end # Copy data from device to host function copy_to_host!(du::CuArray, u::CuArray) - # FIXME: Maybe direct CuArray to PtrArray conversion is possible (in the future) + # TODO: Direct CuArray to PtrArray du = PtrArray(Array{Float64}(du)) u = PtrArray(Array{Float64}(u)) return (du, u) end +# Set diagonal entries of a matrix to zeros +function set_diagonal_to_zero!(A::Array{Float64}) + n = min(size(A)...) + for i in 1:n + A[i, i] = 0.0 # change back to `Float32` + end + return nothing +end + # Kernel for getting last and first indices function last_first_indices_kernel!(lasts, firsts, n_boundaries_per_direction) i = (blockIdx().x - 1) * blockDim().x + threadIdx().x diff --git a/src/solvers/dg_1d.jl b/src/solvers/dg_1d.jl index 9fbc525..d107074 100644 --- a/src/solvers/dg_1d.jl +++ b/src/solvers/dg_1d.jl @@ -66,8 +66,37 @@ function volume_flux_kernel!(volume_flux_arr, u, equations::AbstractEquations{1} return nothing end +# Kernel for calculating symmetric and nonconservative fluxes +function symmetric_noncons_flux_kernel!(symmetric_flux_arr, noncons_flux_arr, u, derivative_split, + equations::AbstractEquations{1}, symmetric_flux::Function, + nonconservative_flux::Function) + j = (blockIdx().x - 1) * blockDim().x + threadIdx().x + k = (blockIdx().y - 1) * blockDim().y + threadIdx().y + + if (j <= size(u, 2)^2 && k <= size(u, 3)) + j1 = div(j - 1, size(u, 2)) + 1 + j2 = rem(j - 1, size(u, 2)) + 1 + + u_node = get_node_vars(u, equations, j1, k) + u_node1 = get_node_vars(u, equations, j2, k) + + symmetric_flux_node = symmetric_flux(u_node, u_node1, 1, equations) + noncons_flux_node = nonconservative_flux(u_node, u_node1, 1, equations) + + @inbounds begin + for ii in axes(u, 1) + symmetric_flux_arr[ii, j1, j2, k] = symmetric_flux_node[ii] + noncons_flux_arr[ii, j1, j2, k] = noncons_flux_node[ii] * derivative_split[j1, j2] + end + end + end + + return nothing +end + # Kernel for calculating volume integrals -function volume_integral_kernel!(du, derivative_split, volume_flux_arr) +function volume_integral_kernel!(du, derivative_split, volume_flux_arr, + equations::AbstractEquations{1}) i = (blockIdx().x - 1) * blockDim().x + threadIdx().x j = (blockIdx().y - 1) * blockDim().y + threadIdx().y k = (blockIdx().z - 1) * blockDim().z + threadIdx().z @@ -83,6 +112,28 @@ function volume_integral_kernel!(du, derivative_split, volume_flux_arr) return nothing end +# Kernel for calculating symmetric and nonconservative volume integrals +function volume_integral_kernel!(du, derivative_split, symmetric_flux_arr, noncons_flux_arr) + i = (blockIdx().x - 1) * blockDim().x + threadIdx().x + j = (blockIdx().y - 1) * blockDim().y + threadIdx().y + k = (blockIdx().z - 1) * blockDim().z + threadIdx().z + + if (i <= size(du, 1) && j <= size(du, 2) && k <= size(du, 3)) + @inbounds begin + integral_contribution = 0.0 # change back to `Float32` + + for ii in axes(du, 2) + du[i, j, k] += derivative_split[j, ii] * symmetric_flux_arr[i, j, ii, k] + integral_contribution += noncons_flux_arr[i, j, ii, k] + end + + du[i, j, k] += 0.5 * integral_contribution # change back to `Float32` + end + end + + return nothing +end + # Kernel for prolonging two interfaces function prolong_interfaces_kernel!(interfaces_u, u, neighbor_ids) j = (blockIdx().x - 1) * blockDim().x + threadIdx().x @@ -121,6 +172,31 @@ function surface_flux_kernel!(surface_flux_arr, interfaces_u, equations::Abstrac return nothing end +# Kernel for calculating surface and both nonconservative fluxes +function surface_noncons_flux_kernel!(surface_flux_arr, interfaces_u, noncons_left_arr, + noncons_right_arr, equations::AbstractEquations{1}, + surface_flux::Any, nonconservative_flux::Any) + k = (blockIdx().x - 1) * blockDim().x + threadIdx().x + + if (k <= size(surface_flux_arr, 3)) + u_ll, u_rr = get_surface_node_vars(interfaces_u, equations, k) + + surface_flux_node = surface_flux(u_ll, u_rr, 1, equations) + noncons_left_node = nonconservative_flux(u_ll, u_rr, 1, equations) + noncons_right_node = nonconservative_flux(u_rr, u_ll, 1, equations) + + @inbounds begin + for jj in axes(surface_flux_arr, 2) + surface_flux_arr[1, jj, k] = surface_flux_node[jj] + noncons_left_arr[1, jj, k] = noncons_left_node[jj] + noncons_right_arr[1, jj, k] = noncons_right_node[jj] + end + end + end + + return nothing +end + # Kernel for setting interface fluxes function interface_flux_kernel!(surface_flux_values, surface_flux_arr, neighbor_ids) i = (blockIdx().x - 1) * blockDim().x + threadIdx().x @@ -139,6 +215,26 @@ function interface_flux_kernel!(surface_flux_values, surface_flux_arr, neighbor_ return nothing end +function interface_flux_kernel!(surface_flux_values, surface_flux_arr, noncons_left_arr, + noncons_right_arr, neighbor_ids) + i = (blockIdx().x - 1) * blockDim().x + threadIdx().x + k = (blockIdx().y - 1) * blockDim().y + threadIdx().y + + if (i <= size(surface_flux_values, 1) && k <= size(surface_flux_arr, 3)) + left_id = neighbor_ids[1, k] + right_id = neighbor_ids[2, k] + + @inbounds begin + surface_flux_values[i, 2, left_id] = surface_flux_arr[1, i, k] + + 0.5 * noncons_left_arr[1, i, k] # change back to `Float32` + surface_flux_values[i, 1, right_id] = surface_flux_arr[1, i, k] + + 0.5 * noncons_right_arr[1, i, k] # change back to `Float32` + end + end + + return nothing +end + # Kernel for prolonging two boundaries function prolong_boundaries_kernel!(boundaries_u, u, neighbor_ids, neighbor_sides) j = (blockIdx().x - 1) * blockDim().x + threadIdx().x @@ -176,7 +272,7 @@ function boundary_flux_kernel!(surface_flux_values, boundaries_u, node_coordinat u_inner = isequal(side, 1) * u_ll + (1 - isequal(side, 1)) * u_rr x = get_node_coords(node_coordinates, equations, boundary) - # TODO: Optimize flow controls as they are not recommended on GPUs + # TODO: Improve this part if direction == 1 boundary_flux_node = boundary_conditions[1](u_inner, orientation, direction, x, t, surface_flux, equations) @@ -276,7 +372,10 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms:: dg::DGSEM) volume_flux = volume_integral.volume_flux - derivative_split = CuArray{Float64}(dg.basis.derivative_split) + derivative_split = dg.basis.derivative_split + set_diagonal_to_zero!(derivative_split) # temperarily set here, maybe move outside `rhs!` + + derivative_split = CuArray{Float64}(derivative_split) volume_flux_arr = CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 3)) size_arr = CuArray{Float64}(undef, size(u, 2)^2, size(u, 3)) @@ -287,8 +386,44 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms:: configurator_2d(volume_flux_kernel, size_arr)...) volume_integral_kernel = @cuda launch=false volume_integral_kernel!(du, derivative_split, - volume_flux_arr) - volume_integral_kernel(du, derivative_split, volume_flux_arr; + volume_flux_arr, equations) + volume_integral_kernel(du, derivative_split, volume_flux_arr, equations; + configurator_3d(volume_integral_kernel, du)...) + + return nothing +end + +# Pack kernels to calculate volume integrals +function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms::True, equations, + volume_integral::VolumeIntegralFluxDifferencing, dg::DGSEM) + symmetric_flux, nonconservative_flux = dg.volume_integral.volume_flux + + derivative_split = dg.basis.derivative_split + set_diagonal_to_zero!(derivative_split) # temperarily set here, maybe move outside `rhs!` + + derivative_split = CuArray{Float64}(derivative_split) + symmetric_flux_arr = CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 3)) + noncons_flux_arr = CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 3)) + + size_arr = CuArray{Float64}(undef, size(u, 2)^2, size(u, 3)) + + symmetric_noncons_flux_kernel = @cuda launch=false symmetric_noncons_flux_kernel!(symmetric_flux_arr, + noncons_flux_arr, + u, + derivative_split, + equations, + symmetric_flux, + nonconservative_flux) + symmetric_noncons_flux_kernel(symmetric_flux_arr, noncons_flux_arr, u, derivative_split, + equations, symmetric_flux, nonconservative_flux; + configurator_2d(symmetric_noncons_flux_kernel, size_arr)...) + + derivative_split = CuArray{Float64}(dg.basis.derivative_split) # use original `derivative_split` + + volume_integral_kernel = @cuda launch=false volume_integral_kernel!(du, derivative_split, + symmetric_flux_arr, + noncons_flux_arr) + volume_integral_kernel(du, derivative_split, symmetric_flux_arr, noncons_flux_arr; configurator_3d(volume_integral_kernel, du)...) return nothing @@ -341,6 +476,46 @@ function cuda_interface_flux!(mesh::TreeMesh{1}, nonconservative_terms::False, e return nothing end +function cuda_interface_flux!(mesh::TreeMesh{1}, nonconservative_terms::True, equations, dg::DGSEM, + cache) + surface_flux, nonconservative_flux = dg.surface_integral.surface_flux + + neighbor_ids = CuArray{Int64}(cache.interfaces.neighbor_ids) + interfaces_u = CuArray{Float64}(cache.interfaces.u) + surface_flux_arr = CuArray{Float64}(undef, 1, size(interfaces_u)[2:end]...) + noncons_left_arr = CuArray{Float64}(undef, 1, size(interfaces_u)[2:end]...) + noncons_right_arr = CuArray{Float64}(undef, 1, size(interfaces_u)[2:end]...) + surface_flux_values = CuArray{Float64}(cache.elements.surface_flux_values) + + size_arr = CuArray{Float64}(undef, size(interfaces_u, 3)) + + surface_noncons_flux_kernel = @cuda launch=false surface_noncons_flux_kernel!(surface_flux_arr, + interfaces_u, + noncons_left_arr, + noncons_right_arr, + equations, + surface_flux, + nonconservative_flux) + surface_noncons_flux_kernel(surface_flux_arr, interfaces_u, noncons_left_arr, noncons_right_arr, + equations, surface_flux, nonconservative_flux; + configurator_1d(surface_noncons_flux_kernel, size_arr)...) + + size_arr = CuArray{Float64}(undef, size(surface_flux_values, 1), size(interfaces_u, 3)) + + interface_flux_kernel = @cuda launch=false interface_flux_kernel!(surface_flux_values, + surface_flux_arr, + noncons_left_arr, + noncons_right_arr, + neighbor_ids) + interface_flux_kernel(surface_flux_values, surface_flux_arr, noncons_left_arr, + noncons_right_arr, neighbor_ids; + configurator_2d(interface_flux_kernel, size_arr)...) + + cache.elements.surface_flux_values = surface_flux_values # copy back to host automatically + + return nothing +end + # Dummy function for asserting boundaries function cuda_prolong2boundaries!(u, mesh::TreeMesh{1}, boundary_condition::BoundaryConditionPeriodic, equations, cache) @@ -421,7 +596,6 @@ end # Pack kernels for calculating surface integrals function cuda_surface_integral!(du, mesh::TreeMesh{1}, equations, dg::DGSEM, cache) - # FIXME: Check `surface_integral` factor_arr = CuArray{Float64}([ dg.basis.boundary_interpolation[1, 1], dg.basis.boundary_interpolation[size(du, 2), 2] diff --git a/src/solvers/dg_2d.jl b/src/solvers/dg_2d.jl index 90ad0f2..04fd9bd 100644 --- a/src/solvers/dg_2d.jl +++ b/src/solvers/dg_2d.jl @@ -79,8 +79,47 @@ function volume_flux_kernel!(volume_flux_arr1, volume_flux_arr2, u, equations::A return nothing end +# Kernel for calculating symmetric and nonconservative fluxes +function symmetric_noncons_flux_kernel!(symmetric_flux_arr1, symmetric_flux_arr2, noncons_flux_arr1, + noncons_flux_arr2, u, derivative_split, + equations::AbstractEquations{2}, symmetric_flux::Function, + nonconservative_flux::Function) + j = (blockIdx().x - 1) * blockDim().x + threadIdx().x + k = (blockIdx().y - 1) * blockDim().y + threadIdx().y + + if (j <= size(u, 2)^3 && k <= size(u, 4)) + j1 = div(j - 1, size(u, 2)^2) + 1 + j2 = div(rem(j - 1, size(u, 2)^2), size(u, 2)) + 1 + j3 = rem(rem(j - 1, size(u, 2)^2), size(u, 2)) + 1 + + u_node = get_node_vars(u, equations, j1, j2, k) + u_node1 = get_node_vars(u, equations, j3, j2, k) + u_node2 = get_node_vars(u, equations, j1, j3, k) + + symmetric_flux_node1 = symmetric_flux(u_node, u_node1, 1, equations) + symmetric_flux_node2 = symmetric_flux(u_node, u_node2, 2, equations) + + noncons_flux_node1 = nonconservative_flux(u_node, u_node1, 1, equations) + noncons_flux_node2 = nonconservative_flux(u_node, u_node2, 2, equations) + + @inbounds begin + for ii in axes(u, 1) + symmetric_flux_arr1[ii, j1, j3, j2, k] = derivative_split[j1, j3] * + symmetric_flux_node1[ii] + symmetric_flux_arr2[ii, j1, j2, j3, k] = derivative_split[j2, j3] * + symmetric_flux_node2[ii] + noncons_flux_arr1[ii, j1, j3, j2, k] = noncons_flux_node1[ii] + noncons_flux_arr2[ii, j1, j2, j3, k] = noncons_flux_node2[ii] + end + end + end + + return nothing +end + # Kernel for calculating volume integrals -function volume_integral_kernel!(du, derivative_split, volume_flux_arr1, volume_flux_arr2) +function volume_integral_kernel!(du, derivative_split, volume_flux_arr1, volume_flux_arr2, + equations::AbstractEquations{2}) i = (blockIdx().x - 1) * blockDim().x + threadIdx().x j = (blockIdx().y - 1) * blockDim().y + threadIdx().y k = (blockIdx().z - 1) * blockDim().z + threadIdx().z @@ -100,6 +139,36 @@ function volume_integral_kernel!(du, derivative_split, volume_flux_arr1, volume_ return nothing end +# Kernel for calculating symmetric and nonconservative volume integrals +function volume_integral_kernel!(du, derivative_split, symmetric_flux_arr1, symmetric_flux_arr2, + noncons_flux_arr1, noncons_flux_arr2) + i = (blockIdx().x - 1) * blockDim().x + threadIdx().x + j = (blockIdx().y - 1) * blockDim().y + threadIdx().y + k = (blockIdx().z - 1) * blockDim().z + threadIdx().z + + if (i <= size(du, 1) && j <= size(du, 2)^2 && k <= size(du, 4)) + j1 = div(j - 1, size(du, 2)) + 1 + j2 = rem(j - 1, size(du, 2)) + 1 + + @inbounds begin + integral_contribution = 0.0 # change back to `Float32` + + for ii in axes(du, 2) + du[i, j1, j2, k] += symmetric_flux_arr1[i, j1, ii, j2, k] + du[i, j1, j2, k] += symmetric_flux_arr2[i, j1, j2, ii, k] + integral_contribution += derivative_split[j1, ii] * + noncons_flux_arr1[i, j1, ii, j2, k] + integral_contribution += derivative_split[j2, ii] * + noncons_flux_arr2[i, j1, j2, ii, k] + end + + du[i, j1, j2, k] += 0.5f0 * integral_contribution # change back to `Float32` + end + end + + return nothing +end + # Kernel for prolonging two interfaces function prolong_interfaces_kernel!(interfaces_u, u, neighbor_ids, orientations, euqations::AbstractEquations{2}) @@ -153,6 +222,34 @@ function surface_flux_kernel!(surface_flux_arr, interfaces_u, orientations, return nothing end +# Kernel for calculating surface and both nonconservative fluxes +function surface_noncons_flux_kernel!(surface_flux_arr, interfaces_u, noncons_left_arr, + noncons_right_arr, orientations, + equations::AbstractEquations{2}, surface_flux::Any, + nonconservative_flux::Any) + j2 = (blockIdx().x - 1) * blockDim().x + threadIdx().x + k = (blockIdx().y - 1) * blockDim().y + threadIdx().y + + if (j2 <= size(surface_flux_arr, 3) && k <= size(surface_flux_arr, 4)) + u_ll, u_rr = get_surface_node_vars(interfaces_u, equations, j2, k) + orientation = orientations[k] + + surface_flux_node = surface_flux(u_ll, u_rr, orientation, equations) + noncons_left_node = nonconservative_flux(u_ll, u_rr, orientation, equations) + noncons_right_node = nonconservative_flux(u_rr, u_ll, orientation, equations) + + @inbounds begin + for j1j1 in axes(surface_flux_arr, 2) + surface_flux_arr[1, j1j1, j2, k] = surface_flux_node[j1j1] + noncons_left_arr[1, j1j1, j2, k] = noncons_left_node[j1j1] + noncons_right_arr[1, j1j1, j2, k] = noncons_right_node[j1j1] + end + end + end + + return nothing +end + # Kernel for setting interface fluxes function interface_flux_kernel!(surface_flux_values, surface_flux_arr, neighbor_ids, orientations, equations::AbstractEquations{2}) @@ -178,6 +275,34 @@ function interface_flux_kernel!(surface_flux_values, surface_flux_arr, neighbor_ return nothing end +function interface_flux_kernel!(surface_flux_values, surface_flux_arr, noncons_left_arr, + noncons_right_arr, neighbor_ids, orientations) + i = (blockIdx().x - 1) * blockDim().x + threadIdx().x + j = (blockIdx().y - 1) * blockDim().y + threadIdx().y + k = (blockIdx().z - 1) * blockDim().z + threadIdx().z + + if (i <= size(surface_flux_values, 1) && + j <= size(surface_flux_arr, 3) && + k <= size(surface_flux_arr, 4)) + left_id = neighbor_ids[1, k] + right_id = neighbor_ids[2, k] + + left_direction = 2 * orientations[k] + right_direction = 2 * orientations[k] - 1 + + @inbounds begin + surface_flux_values[i, j, left_direction, left_id] = surface_flux_arr[1, i, j, k] + + 0.5 * # change back to `Float32` + noncons_left_arr[1, i, j, k] + surface_flux_values[i, j, right_direction, right_id] = surface_flux_arr[1, i, j, k] + + 0.5 * # change back to `Float32` + noncons_right_arr[1, i, j, k] + end + end + + return nothing +end + # Kernel for prolonging two boundaries function prolong_boundaries_kernel!(boundaries_u, u, neighbor_ids, neighbor_sides, orientations, equations::AbstractEquations{2}) @@ -230,7 +355,7 @@ function boundary_flux_kernel!(surface_flux_values, boundaries_u, node_coordinat u_inner = isequal(side, 1) * u_ll + (1 - isequal(side, 1)) * u_rr x = get_node_coords(node_coordinates, equations, j, boundary) - # TODO: Optimize flow controls as they are not recommended on GPUs + # TODO: Improve this part if direction == 1 boundary_flux_node = boundary_conditions[1](u_inner, orientation, direction, x, t, surface_flux, equations) @@ -353,7 +478,10 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms:: volume_integral::VolumeIntegralFluxDifferencing, dg::DGSEM) volume_flux = volume_integral.volume_flux - derivative_split = CuArray{Float64}(dg.basis.derivative_split) + derivative_split = dg.basis.derivative_split + set_diagonal_to_zero!(derivative_split) # temperarily set here, maybe move outside `rhs!` + + derivative_split = CuArray{Float64}(derivative_split) volume_flux_arr1 = CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2), size(u, 4)) volume_flux_arr2 = CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2), @@ -370,8 +498,56 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms:: volume_integral_kernel = @cuda launch=false volume_integral_kernel!(du, derivative_split, volume_flux_arr1, - volume_flux_arr2) - volume_integral_kernel(du, derivative_split, volume_flux_arr1, volume_flux_arr2; + volume_flux_arr2, equations) + volume_integral_kernel(du, derivative_split, volume_flux_arr1, volume_flux_arr2, equations; + configurator_3d(volume_integral_kernel, size_arr)...) + + return nothing +end + +function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms::True, equations, + volume_integral::VolumeIntegralFluxDifferencing, dg::DGSEM) + symmetric_flux, nonconservative_flux = dg.volume_integral.volume_flux + + derivative_split = dg.basis.derivative_split + set_diagonal_to_zero!(derivative_split) # temperarily set here, maybe move outside `rhs!` + + derivative_split = CuArray{Float64}(derivative_split) + symmetric_flux_arr1 = CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2), + size(u, 4)) + symmetric_flux_arr2 = CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2), + size(u, 4)) + noncons_flux_arr1 = CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2), + size(u, 4)) + noncons_flux_arr2 = CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2), + size(u, 4)) + + size_arr = CuArray{Float64}(undef, size(u, 2)^3, size(u, 4)) + + symmetric_noncons_flux_kernel = @cuda launch=false symmetric_noncons_flux_kernel!(symmetric_flux_arr1, + symmetric_flux_arr2, + noncons_flux_arr1, + noncons_flux_arr2, + u, + derivative_split, + equations, + symmetric_flux, + nonconservative_flux) + symmetric_noncons_flux_kernel(symmetric_flux_arr1, symmetric_flux_arr2, noncons_flux_arr1, + noncons_flux_arr2, u, derivative_split, equations, symmetric_flux, + nonconservative_flux; + configurator_2d(symmetric_noncons_flux_kernel, size_arr)...) + + derivative_split = CuArray{Float64}(dg.basis.derivative_split) # use original `derivative_split` + size_arr = CuArray{Float64}(undef, size(du, 1), size(du, 2)^2, size(du, 4)) + + volume_integral_kernel = @cuda launch=false volume_integral_kernel!(du, derivative_split, + symmetric_flux_arr1, + symmetric_flux_arr2, + noncons_flux_arr1, + noncons_flux_arr2) + volume_integral_kernel(du, derivative_split, symmetric_flux_arr1, symmetric_flux_arr2, + noncons_flux_arr1, noncons_flux_arr2; configurator_3d(volume_integral_kernel, size_arr)...) return nothing @@ -433,6 +609,49 @@ function cuda_interface_flux!(mesh::TreeMesh{2}, nonconservative_terms::False, e return nothing end +function cuda_interface_flux!(mesh::TreeMesh{2}, nonconservative_terms::True, equations, dg::DGSEM, + cache) + surface_flux, nonconservative_flux = dg.surface_integral.surface_flux + + neighbor_ids = CuArray{Int64}(cache.interfaces.neighbor_ids) + orientations = CuArray{Int64}(cache.interfaces.orientations) + interfaces_u = CuArray{Float64}(cache.interfaces.u) + surface_flux_arr = CuArray{Float64}(undef, 1, size(interfaces_u)[2:end]...) + noncons_left_arr = CuArray{Float64}(undef, 1, size(interfaces_u)[2:end]...) + noncons_right_arr = CuArray{Float64}(undef, 1, size(interfaces_u)[2:end]...) + surface_flux_values = CuArray{Float64}(cache.elements.surface_flux_values) + + size_arr = CuArray{Float64}(undef, size(interfaces_u, 3), size(interfaces_u, 4)) + + surface_noncons_flux_kernel = @cuda launch=false surface_noncons_flux_kernel!(surface_flux_arr, + interfaces_u, + noncons_left_arr, + noncons_right_arr, + orientations, + equations, + surface_flux, + nonconservative_flux) + surface_noncons_flux_kernel(surface_flux_arr, interfaces_u, noncons_left_arr, noncons_right_arr, + orientations, equations, surface_flux, nonconservative_flux; + configurator_2d(surface_noncons_flux_kernel, size_arr)...) + + size_arr = CuArray{Float64}(undef, size(surface_flux_values, 1), size(interfaces_u, 3), + size(interfaces_u, 4)) + + interface_flux_kernel = @cuda launch=false interface_flux_kernel!(surface_flux_values, + surface_flux_arr, + noncons_left_arr, + noncons_right_arr, + neighbor_ids, orientations) + interface_flux_kernel(surface_flux_values, surface_flux_arr, noncons_left_arr, + noncons_right_arr, neighbor_ids, orientations; + configurator_3d(interface_flux_kernel, size_arr)...) + + cache.elements.surface_flux_values = surface_flux_values # copy back to host automatically + + return nothing +end + # Dummy function for asserting boundaries function cuda_prolong2boundaries!(u, mesh::TreeMesh{2}, boundary_condition::BoundaryConditionPeriodic, equations, cache) @@ -518,7 +737,6 @@ end # Pack kernels for calculating surface integrals function cuda_surface_integral!(du, mesh::TreeMesh{2}, equations, dg::DGSEM, cache) - # FIXME: `surface_integral::SurfaceIntegralWeakForm` factor_arr = CuArray{Float64}([ dg.basis.boundary_interpolation[1, 1], dg.basis.boundary_interpolation[size(du, 2), 2] diff --git a/src/solvers/dg_3d.jl b/src/solvers/dg_3d.jl index a2d309d..eb92b52 100644 --- a/src/solvers/dg_3d.jl +++ b/src/solvers/dg_3d.jl @@ -91,7 +91,7 @@ end # Kernel for calculating volume integrals function volume_integral_kernel!(du, derivative_split, volume_flux_arr1, volume_flux_arr2, - volume_flux_arr3) + volume_flux_arr3, equations::AbstractEquations{3}) i = (blockIdx().x - 1) * blockDim().x + threadIdx().x j = (blockIdx().y - 1) * blockDim().y + threadIdx().y k = (blockIdx().z - 1) * blockDim().z + threadIdx().z @@ -262,7 +262,7 @@ function boundary_flux_kernel!(surface_flux_values, boundaries_u, node_coordinat u_inner = isequal(side, 1) * u_ll + (1 - isequal(side, 1)) * u_rr x = get_node_coords(node_coordinates, equations, j1, j2, boundary) - # TODO: Optimize flow controls as they are not recommended on GPUs + # TODO: Improve this part if direction == 1 boundary_flux_node = boundary_conditions[1](u_inner, orientation, direction, x, t, surface_flux, equations) @@ -427,9 +427,9 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms:: volume_integral_kernel = @cuda launch=false volume_integral_kernel!(du, derivative_split, volume_flux_arr1, volume_flux_arr2, - volume_flux_arr3) + volume_flux_arr3, equations) volume_integral_kernel(du, derivative_split, volume_flux_arr1, volume_flux_arr2, - volume_flux_arr3; + volume_flux_arr3, equations; configurator_3d(volume_integral_kernel, size_arr)...) return nothing @@ -576,7 +576,6 @@ end # Pack kernels for calculating surface integrals function cuda_surface_integral!(du, mesh::TreeMesh{3}, equations, dg::DGSEM, cache) - # FIXME: Check `surface_integral` factor_arr = CuArray{Float64}([ dg.basis.boundary_interpolation[1, 1], dg.basis.boundary_interpolation[size(du, 2), 2] diff --git a/test/test_script.jl b/test/test_script.jl index 284fe46..53b4cbd 100644 --- a/test/test_script.jl +++ b/test/test_script.jl @@ -1,38 +1,45 @@ using Trixi, TrixiGPU using OrdinaryDiffEq using CUDA +using Test -equations = HyperbolicDiffusionEquations3D() - -initial_condition = initial_condition_poisson_nonperiodic -boundary_conditions = (x_neg = boundary_condition_poisson_nonperiodic, - x_pos = boundary_condition_poisson_nonperiodic, - y_neg = boundary_condition_periodic, - y_pos = boundary_condition_periodic, - z_neg = boundary_condition_periodic, - z_pos = boundary_condition_periodic) - -solver = DGSEM(polydeg = 4, surface_flux = flux_lax_friedrichs) - -coordinates_min = (0.0, 0.0, 0.0) -coordinates_max = (1.0, 1.0, 1.0) -mesh = TreeMesh(coordinates_min, - coordinates_max, - initial_refinement_level = 2, - n_cells_max = 30_000, - periodicity = (false, true, true)) - -semi = SemidiscretizationHyperbolic(mesh, - equations, - initial_condition, - solver, - source_terms = source_terms_poisson_nonperiodic, - boundary_conditions = boundary_conditions) +equations = ShallowWaterEquations1D(gravity_constant = 9.81, H0 = 3.25) +function initial_condition_discontinuous_well_balancedness(x, t, + equations::ShallowWaterEquations1D) + H = equations.H0 + v = 0.0 + b = 0.0 + + if x[1] >= 0.5 && x[1] <= 0.75 + b = 2.0 + 0.5 * sin(2.0 * pi * x[1]) + end + + return prim2cons(SVector(H, v, b), equations) +end + +initial_condition = initial_condition_discontinuous_well_balancedness + +volume_flux = (flux_wintermeyer_etal, flux_nonconservative_wintermeyer_etal) +surface_flux = (flux_fjordholm_etal, flux_nonconservative_fjordholm_etal) +solver = DGSEM(polydeg = 4, surface_flux = surface_flux, + volume_integral = VolumeIntegralFluxDifferencing(volume_flux)) + +coordinates_min = -1.0 +coordinates_max = 1.0 +mesh = TreeMesh(coordinates_min, coordinates_max, initial_refinement_level = 3, + n_cells_max = 10_000) + +semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) (; mesh, equations, initial_condition, boundary_conditions, source_terms, solver, cache) = semi -t = 0.0 -tspan = (0.0, 5.0) +# Get copy for GPU to avoid overwriting during tests +mesh_gpu, equations_gpu = mesh, equations +initial_condition_gpu, boundary_conditions_gpu = initial_condition, boundary_conditions +source_terms_gpu, solver_gpu, cache_gpu = source_terms, solver, cache + +t = t_gpu = 0.0 +tspan = (0.0, 100.0) ode = semidiscretize(semi, tspan) u_ode = copy(ode.u0) @@ -40,15 +47,64 @@ du_ode = similar(u_ode) u = Trixi.wrap_array(u_ode, mesh, equations, solver, cache) du = Trixi.wrap_array(du_ode, mesh, equations, solver, cache) -du, u = TrixiGPU.copy_to_device!(du, u) -TrixiGPU.cuda_volume_integral!(du, u, mesh, Trixi.have_nonconservative_terms(equations), equations, - solver.volume_integral, solver) -TrixiGPU.cuda_prolong2interfaces!(u, mesh, equations, cache) -TrixiGPU.cuda_interface_flux!(mesh, Trixi.have_nonconservative_terms(equations), equations, solver, - cache) -TrixiGPU.cuda_prolong2boundaries!(u, mesh, boundary_conditions, equations, cache) - -TrixiGPU.cuda_boundary_flux!(t, mesh, boundary_conditions, equations, solver, cache) -TrixiGPU.cuda_surface_integral!(du, mesh, equations, solver, cache) -TrixiGPU.cuda_jacobian!(du, mesh, equations, cache) -TrixiGPU.cuda_sources!(du, u, t, source_terms, equations, cache) +# Copy data to device +du_gpu, u_gpu = TrixiGPU.copy_to_device!(du, u) +# Reset data on host +Trixi.reset_du!(du, solver, cache) + +# Test `cuda_volume_integral!` +TrixiGPU.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu, + Trixi.have_nonconservative_terms(equations_gpu), + equations_gpu, solver_gpu.volume_integral, solver_gpu) +Trixi.calc_volume_integral!(du, u, mesh, Trixi.have_nonconservative_terms(equations), + equations, solver.volume_integral, solver, cache) +@test CUDA.@allowscalar du ≈ du_gpu +@test CUDA.@allowscalar u ≈ u_gpu + +# Test `cuda_prolong2interfaces!` +TrixiGPU.cuda_prolong2interfaces!(u_gpu, mesh_gpu, equations_gpu, cache_gpu) +Trixi.prolong2interfaces!(cache, u, mesh, equations, solver.surface_integral, solver) +@test CUDA.@allowscalar du ≈ du_gpu +@test CUDA.@allowscalar u ≈ u_gpu + +# Test `cuda_interface_flux!` +TrixiGPU.cuda_interface_flux!(mesh_gpu, Trixi.have_nonconservative_terms(equations_gpu), + equations_gpu, solver_gpu, cache_gpu) +Trixi.calc_interface_flux!(cache.elements.surface_flux_values, mesh, + Trixi.have_nonconservative_terms(equations), equations, + solver.surface_integral, solver, cache) +@test CUDA.@allowscalar du ≈ du_gpu +@test CUDA.@allowscalar u ≈ u_gpu + +# Test `cuda_prolong2boundaries!` +TrixiGPU.cuda_prolong2boundaries!(u_gpu, mesh_gpu, boundary_conditions_gpu, equations_gpu, + cache_gpu) +Trixi.prolong2boundaries!(cache, u, mesh, equations, solver.surface_integral, solver) +@test CUDA.@allowscalar du ≈ du_gpu +@test CUDA.@allowscalar u ≈ u_gpu + +# Test `cuda_boundary_flux!` +TrixiGPU.cuda_boundary_flux!(t_gpu, mesh_gpu, boundary_conditions_gpu, equations_gpu, + solver_gpu, cache_gpu) +Trixi.calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations, + solver.surface_integral, solver) +@test CUDA.@allowscalar du ≈ du_gpu +@test CUDA.@allowscalar u ≈ u_gpu + +# Test `cuda_surface_integral!` +TrixiGPU.cuda_surface_integral!(du_gpu, mesh_gpu, equations_gpu, solver_gpu, cache_gpu) +Trixi.calc_surface_integral!(du, u, mesh, equations, solver.surface_integral, solver, cache) +@test CUDA.@allowscalar du ≈ du_gpu +@test CUDA.@allowscalar u ≈ u_gpu + +# Test `cuda_jacobian!` +TrixiGPU.cuda_jacobian!(du_gpu, mesh_gpu, equations_gpu, cache_gpu) +Trixi.apply_jacobian!(du, mesh, equations, solver, cache) +@test CUDA.@allowscalar du ≈ du_gpu +@test CUDA.@allowscalar u ≈ u_gpu + +# Test `cuda_sources!` +TrixiGPU.cuda_sources!(du_gpu, u_gpu, t_gpu, source_terms_gpu, equations_gpu, cache_gpu) +Trixi.calc_sources!(du, u, t, source_terms, equations, solver, cache) +@test CUDA.@allowscalar du ≈ du_gpu +@test CUDA.@allowscalar u ≈ u_gpu diff --git a/test/test_shallowwater_well_balanced.jl b/test/test_shallowwater_well_balanced.jl new file mode 100644 index 0000000..f2ca76e --- /dev/null +++ b/test/test_shallowwater_well_balanced.jl @@ -0,0 +1,243 @@ +module TestShallowWaterWellBalanced + +using Trixi, TrixiGPU +using OrdinaryDiffEq +using Test, CUDA + +# Start testing with a clean environment +outdir = "out" +isdir(outdir) && rm(outdir, recursive = true) + +# Note that it is complicated to get tight error bounds for GPU kernels, here we use `isapprox` +# with the default mode to validate the precision by comparing the results from GPU kernels and +# CPU kernels, which corresponds to requiring equality of about half of the significant digits +# (see https://docs.julialang.org/en/v1/base/math/#Base.isapprox). + +# Basically, this heuristic method first checks whether this error bound (sometimes it is further +# relaxed) is satisfied. Any new methods and optimizations introduced later should at least satisfy +# this error bound. + +# Test precision of the semidiscretization process +@testset "Test Shallow Water Well Balanced" begin + @testset "Shallow Water 1D" begin + equations = ShallowWaterEquations1D(gravity_constant = 9.81, H0 = 3.25) + + function initial_condition_discontinuous_well_balancedness(x, t, + equations::ShallowWaterEquations1D) + H = equations.H0 + v = 0.0 + b = 0.0 + + if x[1] >= 0.5 && x[1] <= 0.75 + b = 2.0 + 0.5 * sin(2.0 * pi * x[1]) + end + + return prim2cons(SVector(H, v, b), equations) + end + + initial_condition = initial_condition_discontinuous_well_balancedness + + volume_flux = (flux_wintermeyer_etal, flux_nonconservative_wintermeyer_etal) + surface_flux = (flux_fjordholm_etal, flux_nonconservative_fjordholm_etal) + solver = DGSEM(polydeg = 4, surface_flux = surface_flux, + volume_integral = VolumeIntegralFluxDifferencing(volume_flux)) + + coordinates_min = -1.0 + coordinates_max = 1.0 + mesh = TreeMesh(coordinates_min, coordinates_max, initial_refinement_level = 3, + n_cells_max = 10_000) + + semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) + (; mesh, equations, initial_condition, boundary_conditions, source_terms, solver, cache) = semi + + # Get copy for GPU to avoid overwriting during tests + mesh_gpu, equations_gpu = mesh, equations + initial_condition_gpu, boundary_conditions_gpu = initial_condition, boundary_conditions + source_terms_gpu, solver_gpu, cache_gpu = source_terms, solver, cache + + t = t_gpu = 0.0 + tspan = (0.0, 100.0) + + ode = semidiscretize(semi, tspan) + u_ode = copy(ode.u0) + du_ode = similar(u_ode) + u = Trixi.wrap_array(u_ode, mesh, equations, solver, cache) + du = Trixi.wrap_array(du_ode, mesh, equations, solver, cache) + + # Copy data to device + du_gpu, u_gpu = TrixiGPU.copy_to_device!(du, u) + # Reset data on host + Trixi.reset_du!(du, solver, cache) + + # Test `cuda_volume_integral!` + TrixiGPU.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu, + Trixi.have_nonconservative_terms(equations_gpu), + equations_gpu, solver_gpu.volume_integral, solver_gpu) + Trixi.calc_volume_integral!(du, u, mesh, Trixi.have_nonconservative_terms(equations), + equations, solver.volume_integral, solver, cache) + @test CUDA.@allowscalar du ≈ du_gpu + @test CUDA.@allowscalar u ≈ u_gpu + + # Test `cuda_prolong2interfaces!` + TrixiGPU.cuda_prolong2interfaces!(u_gpu, mesh_gpu, equations_gpu, cache_gpu) + Trixi.prolong2interfaces!(cache, u, mesh, equations, solver.surface_integral, solver) + @test CUDA.@allowscalar du ≈ du_gpu + @test CUDA.@allowscalar u ≈ u_gpu + + # Test `cuda_interface_flux!` + TrixiGPU.cuda_interface_flux!(mesh_gpu, Trixi.have_nonconservative_terms(equations_gpu), + equations_gpu, solver_gpu, cache_gpu) + Trixi.calc_interface_flux!(cache.elements.surface_flux_values, mesh, + Trixi.have_nonconservative_terms(equations), equations, + solver.surface_integral, solver, cache) + @test CUDA.@allowscalar du ≈ du_gpu + @test CUDA.@allowscalar u ≈ u_gpu + + # Test `cuda_prolong2boundaries!` + TrixiGPU.cuda_prolong2boundaries!(u_gpu, mesh_gpu, boundary_conditions_gpu, equations_gpu, + cache_gpu) + Trixi.prolong2boundaries!(cache, u, mesh, equations, solver.surface_integral, solver) + @test CUDA.@allowscalar du ≈ du_gpu + @test CUDA.@allowscalar u ≈ u_gpu + + # Test `cuda_boundary_flux!` + TrixiGPU.cuda_boundary_flux!(t_gpu, mesh_gpu, boundary_conditions_gpu, equations_gpu, + solver_gpu, cache_gpu) + Trixi.calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations, + solver.surface_integral, solver) + @test CUDA.@allowscalar du ≈ du_gpu + @test CUDA.@allowscalar u ≈ u_gpu + + # Test `cuda_surface_integral!` + TrixiGPU.cuda_surface_integral!(du_gpu, mesh_gpu, equations_gpu, solver_gpu, cache_gpu) + Trixi.calc_surface_integral!(du, u, mesh, equations, solver.surface_integral, solver, cache) + @test CUDA.@allowscalar du ≈ du_gpu + @test CUDA.@allowscalar u ≈ u_gpu + + # Test `cuda_jacobian!` + TrixiGPU.cuda_jacobian!(du_gpu, mesh_gpu, equations_gpu, cache_gpu) + Trixi.apply_jacobian!(du, mesh, equations, solver, cache) + @test CUDA.@allowscalar du ≈ du_gpu + @test CUDA.@allowscalar u ≈ u_gpu + + # Test `cuda_sources!` + TrixiGPU.cuda_sources!(du_gpu, u_gpu, t_gpu, source_terms_gpu, equations_gpu, cache_gpu) + Trixi.calc_sources!(du, u, t, source_terms, equations, solver, cache) + @test CUDA.@allowscalar du ≈ du_gpu + @test CUDA.@allowscalar u ≈ u_gpu + + # Copy data back to host + du_cpu, u_cpu = TrixiGPU.copy_to_host!(du_gpu, u_gpu) + end + + @testset "Shallow Water 2D" begin + equations = ShallowWaterEquations2D(gravity_constant = 9.81, H0 = 3.25) + + function initial_condition_well_balancedness(x, t, equations::ShallowWaterEquations2D) + H = equations.H0 + v1 = 0.0 + v2 = 0.0 + + x1, x2 = x + b = (1.5 / exp(0.5 * ((x1 - 1.0)^2 + (x2 - 1.0)^2)) + + 0.75 / exp(0.5 * ((x1 + 1.0)^2 + (x2 + 1.0)^2))) + return prim2cons(SVector(H, v1, v2, b), equations) + end + + initial_condition = initial_condition_well_balancedness + + volume_flux = (flux_wintermeyer_etal, flux_nonconservative_wintermeyer_etal) + surface_flux = (flux_fjordholm_etal, flux_nonconservative_fjordholm_etal) + solver = DGSEM(polydeg = 4, surface_flux = surface_flux, + volume_integral = VolumeIntegralFluxDifferencing(volume_flux)) + + coordinates_min = (-1.0, -1.0) + coordinates_max = (1.0, 1.0) + mesh = TreeMesh(coordinates_min, coordinates_max, initial_refinement_level = 2, + n_cells_max = 10_000) + + semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) + (; mesh, equations, initial_condition, boundary_conditions, source_terms, solver, cache) = semi + + # Get copy for GPU to avoid overwriting during tests + mesh_gpu, equations_gpu = mesh, equations + initial_condition_gpu, boundary_conditions_gpu = initial_condition, boundary_conditions + source_terms_gpu, solver_gpu, cache_gpu = source_terms, solver, cache + + t = t_gpu = 0.0 + tspan = (0.0, 100.0) + + ode = semidiscretize(semi, tspan) + u_ode = copy(ode.u0) + du_ode = similar(u_ode) + u = Trixi.wrap_array(u_ode, mesh, equations, solver, cache) + du = Trixi.wrap_array(du_ode, mesh, equations, solver, cache) + + # Copy data to device + du_gpu, u_gpu = TrixiGPU.copy_to_device!(du, u) + # Reset data on host + Trixi.reset_du!(du, solver, cache) + + # Test `cuda_volume_integral!` + TrixiGPU.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu, + Trixi.have_nonconservative_terms(equations_gpu), + equations_gpu, solver_gpu.volume_integral, solver_gpu) + Trixi.calc_volume_integral!(du, u, mesh, Trixi.have_nonconservative_terms(equations), + equations, solver.volume_integral, solver, cache) + @test CUDA.@allowscalar du ≈ du_gpu + @test CUDA.@allowscalar u ≈ u_gpu + + # Test `cuda_prolong2interfaces!` + TrixiGPU.cuda_prolong2interfaces!(u_gpu, mesh_gpu, equations_gpu, cache_gpu) + Trixi.prolong2interfaces!(cache, u, mesh, equations, solver.surface_integral, solver) + @test CUDA.@allowscalar du ≈ du_gpu + @test CUDA.@allowscalar u ≈ u_gpu + + # Test `cuda_interface_flux!` + TrixiGPU.cuda_interface_flux!(mesh_gpu, Trixi.have_nonconservative_terms(equations_gpu), + equations_gpu, solver_gpu, cache_gpu) + Trixi.calc_interface_flux!(cache.elements.surface_flux_values, mesh, + Trixi.have_nonconservative_terms(equations), equations, + solver.surface_integral, solver, cache) + @test CUDA.@allowscalar du ≈ du_gpu + @test CUDA.@allowscalar u ≈ u_gpu + + # Test `cuda_prolong2boundaries!` + TrixiGPU.cuda_prolong2boundaries!(u_gpu, mesh_gpu, boundary_conditions_gpu, equations_gpu, + cache_gpu) + Trixi.prolong2boundaries!(cache, u, mesh, equations, solver.surface_integral, solver) + @test CUDA.@allowscalar du ≈ du_gpu + @test CUDA.@allowscalar u ≈ u_gpu + + # Test `cuda_boundary_flux!` + TrixiGPU.cuda_boundary_flux!(t_gpu, mesh_gpu, boundary_conditions_gpu, equations_gpu, + solver_gpu, cache_gpu) + Trixi.calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations, + solver.surface_integral, solver) + @test CUDA.@allowscalar du ≈ du_gpu + @test CUDA.@allowscalar u ≈ u_gpu + + # # Test `cuda_surface_integral!` + # TrixiGPU.cuda_surface_integral!(du_gpu, mesh_gpu, equations_gpu, solver_gpu, cache_gpu) + # Trixi.calc_surface_integral!(du, u, mesh, equations, solver.surface_integral, solver, cache) + # @test CUDA.@allowscalar du ≈ du_gpu + # @test CUDA.@allowscalar u ≈ u_gpu + + # # Test `cuda_jacobian!` + # TrixiGPU.cuda_jacobian!(du_gpu, mesh_gpu, equations_gpu, cache_gpu) + # Trixi.apply_jacobian!(du, mesh, equations, solver, cache) + # @test CUDA.@allowscalar du ≈ du_gpu + # @test CUDA.@allowscalar u ≈ u_gpu + + # # Test `cuda_sources!` + # TrixiGPU.cuda_sources!(du_gpu, u_gpu, t_gpu, source_terms_gpu, equations_gpu, cache_gpu) + # Trixi.calc_sources!(du, u, t, source_terms, equations, solver, cache) + # @test CUDA.@allowscalar du ≈ du_gpu + # @test CUDA.@allowscalar u ≈ u_gpu + + # Copy data back to host + du_cpu, u_cpu = TrixiGPU.copy_to_host!(du_gpu, u_gpu) + end +end + +end # module