Skip to content

Commit

Permalink
Complete 1D
Browse files Browse the repository at this point in the history
  • Loading branch information
huiyuxie committed Sep 9, 2024
1 parent 11e42fb commit a9a783f
Show file tree
Hide file tree
Showing 3 changed files with 227 additions and 62 deletions.
180 changes: 176 additions & 4 deletions src/solvers/dg_1d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,127 @@ function volume_integral_kernel!(du, derivative_split, symmetric_flux_arr, nonco
return nothing
end

# Kernel for counting elements for DG-only and blended DG-FV volume integral
function pure_blended_element_count_kernel!(element_ids_dg, element_ids_dgfv, alpha, atol)
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x

if (i <= length(alpha))
dg_only = isapprox(alpha[i], 0, atol = atol)

if dg_only
element_ids_dg[i] = i
else
element_ids_dgfv[i] = i
end
end

return nothing
end

# Kernel for calculating pure DG and DG-FV volume fluxes
function volume_flux_dgfv_kernel!(volume_flux_arr, fstar1_L, fstar1_R, u,
element_ids_dgfv, equations::AbstractEquations{1},
volume_flux_dg::Any, volume_flux_fv::Any)
j = (blockIdx().x - 1) * blockDim().x + threadIdx().x
k = (blockIdx().y - 1) * blockDim().y + threadIdx().y

if (j <= size(u, 2)^2 && k <= size(u, 3))
# length(element_ids_dgfv) == size(u, 3)

j1 = div(j - 1, size(u, 2)) + 1
j2 = rem(j - 1, size(u, 2)) + 1

element_dgfv = element_ids_dgfv[k] # check if `element_dgfv` is zero

u_node = get_node_vars(u, equations, j1, k)
u_node1 = get_node_vars(u, equations, j2, k)

volume_flux_node = volume_flux_dg(u_node, u_node1, 1, equations)

@inbounds begin
for ii in axes(u, 1)
volume_flux_arr[ii, j1, j2, k] = volume_flux_node[ii]
end
end

if j1 !== 1 && j2 === 1 && element_dgfv !== 0
u_ll = get_node_vars(u, equations, j1 - 1, element_dgfv)
u_rr = get_node_vars(u, equations, j1, element_dgfv)
flux_fv_node = volume_flux_fv(u_ll, u_rr, 1, equations)

@inbounds begin
for ii in axes(u, 1)
fstar1_L[ii, j1, element_dgfv] = flux_fv_node[ii]
fstar1_R[ii, j1, element_dgfv] = flux_fv_node[ii]
end
end
end
end

return nothing
end

# Kernel for calculating DG volume integral contribution
function volume_integral_dg_kernel!(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split,
volume_flux_arr, equations::AbstractEquations{1})
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
j = (blockIdx().y - 1) * blockDim().y + threadIdx().y
k = (blockIdx().z - 1) * blockDim().z + threadIdx().z

if (i <= size(du, 1) && j <= size(du, 2) && k <= size(du, 3))
# length(element_ids_dg) == size(du, 3)
# length(element_ids_dgfv) == size(du, 3)

element_dg = element_ids_dg[k] # check if `element_dg` is zero
element_dgfv = element_ids_dgfv[k] # check if `element_dgfv` is zero
alpha_element = alpha[k]

@inbounds begin
if element_dg !== 0 # bad
for ii in axes(du, 2)
du[i, j, element_dg] += derivative_split[j, ii] *
volume_flux_arr[i, j, ii, element_dg]
end
end

if element_dgfv !== 0 # bad
for ii in axes(du, 2)
du[i, j, element_dgfv] += (1 - alpha_element) * derivative_split[j, ii] *
volume_flux_arr[i, j, ii, element_dgfv]
end
end
end
end

return nothing
end

# Kernel for calculating FV volume integral contribution
function volume_integral_fv_kernel!(du, fstar1_L, fstar1_R, invserse_weights, element_ids_dgfv,
alpha, equations::AbstractEquations{1})
j = (blockIdx().x - 1) * blockDim().x + threadIdx().x
k = (blockIdx().y - 1) * blockDim().y + threadIdx().y

if (j <= size(du, 2) && k <= size(du, 3))
# length(element_ids_dgfv) == size(du, 3)

element_dgfv = element_ids_dgfv[k] # check if `element_dgfv` is zero
alpha_element = alpha[k]

if element_dgfv !== 0
@inbounds begin
for ii in axes(du, 1)
du[ii, j, element_dgfv] += alpha_element * invserse_weights[j] *
(fstar1_L[ii, j + 1, element_dgfv] -
fstar1_R[ii, j, element_dgfv])
end
end
end
end

return nothing
end

# Kernel for prolonging two interfaces
function prolong_interfaces_kernel!(interfaces_u, u, neighbor_ids)
j = (blockIdx().x - 1) * blockDim().x + threadIdx().x
Expand Down Expand Up @@ -471,18 +592,69 @@ end

function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms::False, equations,
volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM, cache)
element_ids_dg, element_ids_dgfv = cache.element_ids_dg, cache.element_ids_dgfv
volume_flux_dg, volume_flux_fv = dg.volume_integral.volume_flux_dg,
dg.volume_integral.volume_flux_fv
indicator = dg.volume_integral.indicator

# This will cause scalar indexing on the GPU
# alpha = CuArray{Float64}(indicator(u, mesh, equations, dg, cache))

# TODO: Get copies of `u` and `du` on both device and host
alpha = indicator(Array(u), mesh, equations, dg, cache)
alpha = CuArray{Float64}(alpha)

# For `Float64`, this gives 1.8189894035458565e-12
# For `Float32`, this gives 1.1920929f-5
atol = 1.8189894035458565e-12 # Ref: `pure_and_blended_element_ids!` in Trixi.jl

element_ids_dg = zero(CuArray{Int64}(undef, length(alpha)))
element_ids_dgfv = zero(CuArray{Int64}(undef, length(alpha)))

pure_blended_element_count_kernel = @cuda launch=false pure_blended_element_count_kernel!(element_ids_dg,
element_ids_dgfv,
alpha,
atol)
pure_blended_element_count_kernel(element_ids_dg, element_ids_dgfv, alpha, atol;
configurator_1d(pure_blended_element_count_kernel, alpha)...)

derivative_split = dg.basis.derivative_split
set_diagonal_to_zero!(derivative_split) # temporarily set here, maybe move outside `rhs!`

derivative_split = CuArray{Float64}(derivative_split)
volume_flux_arr = CuArray{Float64}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 3))

invserse_weights = CuArray{Float64}(dg.basis.inverse_weights)
fstar1_L = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 3)))
fstar1_R = zero(CuArray{Float64}(undef, size(u, 1), size(u, 2) + 1, size(u, 3)))

size_arr = CuArray{Float64}(undef, size(u, 2)^2, size(u, 3))

volume_flux_dgfv_kernel = @cuda launch=false volume_flux_dgfv_kernel!(volume_flux_arr, fstar1_L,
fstar1_R, u,
element_ids_dgfv,
equations, volume_flux_dg,
volume_flux_fv)
volume_flux_dgfv_kernel(volume_flux_arr, fstar1_L, fstar1_R, u, element_ids_dgfv, equations,
volume_flux_dg, volume_flux_fv;
configurator_2d(volume_flux_dgfv_kernel, size_arr)...)

volume_integral_dg_kernel = @cuda launch=false volume_integral_dg_kernel!(du, element_ids_dg,
element_ids_dgfv,
alpha,
derivative_split,
volume_flux_arr,
equations)
volume_integral_dg_kernel(du, element_ids_dg, element_ids_dgfv, alpha, derivative_split,
volume_flux_arr, equations;
configurator_3d(volume_integral_dg_kernel, du)...)

size_arr = CuArray{Float64}(undef, size(u, 2), size(u, 3))

volume_integral_fv_kernel = @cuda launch=false volume_integral_fv_kernel!(du, fstar1_L,
fstar1_R,
invserse_weights,
element_ids_dgfv,
alpha, equations)
volume_integral_fv_kernel(du, fstar1_L, fstar1_R, invserse_weights, element_ids_dgfv, alpha,
equations; configurator_2d(volume_integral_fv_kernel, size_arr)...)

return nothing
end

Expand Down
2 changes: 1 addition & 1 deletion src/solvers/dg_2d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ function volume_integral_kernel!(du, derivative_split, symmetric_flux_arr1, symm
noncons_flux_arr2[i, j1, j2, ii, k]
end

du[i, j1, j2, k] += 0.5f0 * integral_contribution # change back to `Float32`
du[i, j1, j2, k] += 0.5 * integral_contribution # change back to `Float32`
end
end

Expand Down
107 changes: 50 additions & 57 deletions test/test_script.jl
Original file line number Diff line number Diff line change
Expand Up @@ -57,56 +57,49 @@ du_gpu, u_gpu = TrixiGPU.copy_to_device!(du, u)
# Reset data on host
Trixi.reset_du!(du, solver, cache)

element_ids_dg, element_ids_dgfv = cache.element_ids_dg, cache.element_ids_dgfv
volume_flux_dg, volume_flux_fv = solver.volume_integral.volume_flux_dg,
solver.volume_integral.volume_flux_fv
indicator = solver.volume_integral.indicator

# alpha = CuArray{Float64}(indicator(u, mesh, equations, solver, cache))

# Test `cuda_volume_integral!`
TrixiGPU.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
Trixi.have_nonconservative_terms(equations_gpu),
equations_gpu, solver_gpu.volume_integral, solver_gpu,
cache_gpu)
# Trixi.calc_volume_integral!(du, u, mesh, Trixi.have_nonconservative_terms(equations),
# equations, solver.volume_integral, solver, cache)
# @test CUDA.@allowscalar du_gpu ≈ du

# # Test `cuda_prolong2interfaces!`
# TrixiGPU.cuda_prolong2interfaces!(u_gpu, mesh_gpu, equations_gpu, cache_gpu)
# Trixi.prolong2interfaces!(cache, u, mesh, equations, solver.surface_integral, solver)
# interfaces_u_gpu = replace(cache_gpu.interfaces.u, NaN => 0.0)
# interfaces_u = replace(cache.interfaces.u, NaN => 0.0)
# @test interfaces_u_gpu ≈ interfaces_u

# # Test `cuda_interface_flux!`
# TrixiGPU.cuda_interface_flux!(mesh_gpu, Trixi.have_nonconservative_terms(equations_gpu),
# equations_gpu, solver_gpu, cache_gpu)
# Trixi.calc_interface_flux!(cache.elements.surface_flux_values, mesh,
# Trixi.have_nonconservative_terms(equations), equations,
# solver.surface_integral, solver, cache)
# surface_flux_values_gpu = replace(cache_gpu.elements.surface_flux_values, NaN => 0.0)
# surface_flux_values = replace(cache.elements.surface_flux_values, NaN => 0.0)
# @test surface_flux_values_gpu ≈ surface_flux_values

# # Test `cuda_prolong2boundaries!`
# TrixiGPU.cuda_prolong2boundaries!(u_gpu, mesh_gpu, boundary_conditions_gpu, equations_gpu,
# cache_gpu)
# Trixi.prolong2boundaries!(cache, u, mesh, equations, solver.surface_integral, solver)
# boundaries_u_gpu = replace(cache_gpu.boundaries.u, NaN => 0.0)
# boundaries_u = replace(cache.boundaries.u, NaN => 0.0)
# @test boundaries_u_gpu ≈ boundaries_u

# # Test `cuda_boundary_flux!`
# TrixiGPU.cuda_boundary_flux!(t_gpu, mesh_gpu, boundary_conditions_gpu,
# Trixi.have_nonconservative_terms(equations_gpu), equations_gpu,
# solver_gpu, cache_gpu)
# Trixi.calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations,
# solver.surface_integral, solver)
# surface_flux_values_gpu = replace(cache_gpu.elements.surface_flux_values, NaN => 0.0)
# surface_flux_values = replace(cache.elements.surface_flux_values, NaN => 0.0)
# @test surface_flux_values_gpu ≈ surface_flux_values
Trixi.calc_volume_integral!(du, u, mesh, Trixi.have_nonconservative_terms(equations),
equations, solver.volume_integral, solver, cache)
@test CUDA.@allowscalar du_gpu du

# Test `cuda_prolong2interfaces!`
TrixiGPU.cuda_prolong2interfaces!(u_gpu, mesh_gpu, equations_gpu, cache_gpu)
Trixi.prolong2interfaces!(cache, u, mesh, equations, solver.surface_integral, solver)
interfaces_u_gpu = replace(cache_gpu.interfaces.u, NaN => 0.0)
interfaces_u = replace(cache.interfaces.u, NaN => 0.0)
@test interfaces_u_gpu interfaces_u

# Test `cuda_interface_flux!`
TrixiGPU.cuda_interface_flux!(mesh_gpu, Trixi.have_nonconservative_terms(equations_gpu),
equations_gpu, solver_gpu, cache_gpu)
Trixi.calc_interface_flux!(cache.elements.surface_flux_values, mesh,
Trixi.have_nonconservative_terms(equations), equations,
solver.surface_integral, solver, cache)
surface_flux_values_gpu = replace(cache_gpu.elements.surface_flux_values, NaN => 0.0)
surface_flux_values = replace(cache.elements.surface_flux_values, NaN => 0.0)
@test surface_flux_values_gpu surface_flux_values

# Test `cuda_prolong2boundaries!`
TrixiGPU.cuda_prolong2boundaries!(u_gpu, mesh_gpu, boundary_conditions_gpu, equations_gpu,
cache_gpu)
Trixi.prolong2boundaries!(cache, u, mesh, equations, solver.surface_integral, solver)
boundaries_u_gpu = replace(cache_gpu.boundaries.u, NaN => 0.0)
boundaries_u = replace(cache.boundaries.u, NaN => 0.0)
@test boundaries_u_gpu boundaries_u

# Test `cuda_boundary_flux!`
TrixiGPU.cuda_boundary_flux!(t_gpu, mesh_gpu, boundary_conditions_gpu,
Trixi.have_nonconservative_terms(equations_gpu), equations_gpu,
solver_gpu, cache_gpu)
Trixi.calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations,
solver.surface_integral, solver)
surface_flux_values_gpu = replace(cache_gpu.elements.surface_flux_values, NaN => 0.0)
surface_flux_values = replace(cache.elements.surface_flux_values, NaN => 0.0)
@test surface_flux_values_gpu surface_flux_values

# # Test `cuda_prolong2mortars!`
# TrixiGPU.cuda_prolong2mortars!(u_gpu, mesh_gpu, TrixiGPU.check_cache_mortars(cache_gpu),
Expand All @@ -131,20 +124,20 @@ TrixiGPU.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
# surface_flux_values = replace(cache.elements.surface_flux_values, NaN => 0.0)
# @test surface_flux_values_gpu ≈ surface_flux_values

# # Test `cuda_surface_integral!`
# TrixiGPU.cuda_surface_integral!(du_gpu, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
# Trixi.calc_surface_integral!(du, u, mesh, equations, solver.surface_integral, solver, cache)
# @test CUDA.@allowscalar du_gpu ≈ du
# Test `cuda_surface_integral!`
TrixiGPU.cuda_surface_integral!(du_gpu, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
Trixi.calc_surface_integral!(du, u, mesh, equations, solver.surface_integral, solver, cache)
@test CUDA.@allowscalar du_gpu du

# # Test `cuda_jacobian!`
# TrixiGPU.cuda_jacobian!(du_gpu, mesh_gpu, equations_gpu, cache_gpu)
# Trixi.apply_jacobian!(du, mesh, equations, solver, cache)
# @test CUDA.@allowscalar du_gpu ≈ du
# Test `cuda_jacobian!`
TrixiGPU.cuda_jacobian!(du_gpu, mesh_gpu, equations_gpu, cache_gpu)
Trixi.apply_jacobian!(du, mesh, equations, solver, cache)
@test CUDA.@allowscalar du_gpu du

# # Test `cuda_sources!`
# TrixiGPU.cuda_sources!(du_gpu, u_gpu, t_gpu, source_terms_gpu, equations_gpu, cache_gpu)
# Trixi.calc_sources!(du, u, t, source_terms, equations, solver, cache)
# @test CUDA.@allowscalar du_gpu ≈ du
# Test `cuda_sources!`
TrixiGPU.cuda_sources!(du_gpu, u_gpu, t_gpu, source_terms_gpu, equations_gpu, cache_gpu)
Trixi.calc_sources!(du, u, t, source_terms, equations, solver, cache)
@test CUDA.@allowscalar du_gpu du

# # Copy data back to host
# # du_cpu, u_cpu = TrixiGPU.copy_to_host!(du_gpu, u_gpu)

0 comments on commit a9a783f

Please sign in to comment.