Skip to content

Commit

Permalink
Complete 1D
Browse files Browse the repository at this point in the history
  • Loading branch information
huiyuxie committed Dec 14, 2024
1 parent 5c0bfde commit 8981b7a
Show file tree
Hide file tree
Showing 6 changed files with 197 additions and 84 deletions.
32 changes: 22 additions & 10 deletions benchmark/benchmark_1d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,41 @@ using Trixi, TrixiCUDA
using CUDA
using BenchmarkTools

# Set the precision
RealT = Float32

# Set up the problem
equations = CompressibleEulerEquations1D(1.4)
equations = CompressibleEulerEquations1D(1.4f0)

initial_condition = initial_condition_weak_blast_wave

volume_flux = flux_ranocha
solver = DGSEM(polydeg = 3, surface_flux = flux_ranocha,
volume_integral = VolumeIntegralFluxDifferencing(volume_flux))

coordinates_min = (-2.0,)
coordinates_max = (2.0,)
surface_flux = flux_lax_friedrichs
volume_flux = flux_shima_etal
basis = LobattoLegendreBasis(RealT, 3)
indicator_sc = IndicatorHennemannGassner(equations, basis,
alpha_max = 0.5f0,
alpha_min = 0.001f0,
alpha_smooth = true,
variable = density_pressure)
volume_integral = VolumeIntegralShockCapturingHG(indicator_sc;
volume_flux_dg = volume_flux,
volume_flux_fv = surface_flux)
solver = DGSEM(basis, surface_flux, volume_integral)

coordinates_min = -2.0f0
coordinates_max = 2.0f0
mesh = TreeMesh(coordinates_min, coordinates_max,
initial_refinement_level = 5,
n_cells_max = 10_000)
n_cells_max = 10_000, RealT = RealT)

# Cache initialization
@info "Time for cache initialization on CPU"
@time semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
@info "Time for cache initialization on GPU"
CUDA.@time semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver)

tspan = tspan_gpu = (0.0, 0.4)
t = t_gpu = 0.0
tspan = tspan_gpu = (0.0f0, 0.4f0)
t = t_gpu = 0.0f0

# Semi on CPU
(; mesh, equations, boundary_conditions, source_terms, solver, cache) = semi
Expand Down
5 changes: 2 additions & 3 deletions src/semidiscretization/semidiscretization_hyperbolic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@
function SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver;
source_terms = nothing,
boundary_conditions = boundary_condition_periodic,
# `RealT` is used as real type for node locations etc.
# while `uEltype` is used as element type of solutions etc.
RealT = real(solver), uEltype = RealT,
RealT = real(solver), # `RealT` is used as real type for node locations etc.
uEltype = RealT, # `uEltype` is used as element type of solutions etc.
initial_cache = NamedTuple())
@assert ndims(mesh) == ndims(equations)

Expand Down
48 changes: 24 additions & 24 deletions src/solvers/cache.jl
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ end
function create_cache_gpu(mesh::TreeMesh{1}, equations,
volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM,
uEltype, cache)
fstar1_L = CUDA.zeros(Float64, nvariables(equations), nnodes(dg) + 1, nelements(cache.elements))
fstar1_R = CUDA.zeros(Float64, nvariables(equations), nnodes(dg) + 1, nelements(cache.elements))
fstar1_L = CUDA.zeros(uEltype, nvariables(equations), nnodes(dg) + 1, nelements(cache.elements))
fstar1_R = CUDA.zeros(uEltype, nvariables(equations), nnodes(dg) + 1, nelements(cache.elements))

cache = create_cache_gpu(mesh, equations,
VolumeIntegralFluxDifferencing(volume_integral.volume_flux_dg),
Expand Down Expand Up @@ -103,13 +103,13 @@ end
function create_cache_gpu(mesh::TreeMesh{2}, equations,
volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM,
uEltype, cache)
fstar1_L = CUDA.zeros(Float64, nvariables(equations), nnodes(dg) + 1, nnodes(dg),
fstar1_L = CUDA.zeros(uEltype, nvariables(equations), nnodes(dg) + 1, nnodes(dg),
nelements(cache.elements))
fstar1_R = CUDA.zeros(Float64, nvariables(equations), nnodes(dg) + 1, nnodes(dg),
fstar1_R = CUDA.zeros(uEltype, nvariables(equations), nnodes(dg) + 1, nnodes(dg),
nelements(cache.elements))
fstar2_L = CUDA.zeros(Float64, nvariables(equations), nnodes(dg), nnodes(dg) + 1,
fstar2_L = CUDA.zeros(uEltype, nvariables(equations), nnodes(dg), nnodes(dg) + 1,
nelements(cache.elements))
fstar2_R = CUDA.zeros(Float64, nvariables(equations), nnodes(dg), nnodes(dg) + 1,
fstar2_R = CUDA.zeros(uEltype, nvariables(equations), nnodes(dg), nnodes(dg) + 1,
nelements(cache.elements))

cache = create_cache_gpu(mesh, equations,
Expand All @@ -121,13 +121,13 @@ end

function create_cache_gpu(mesh::TreeMesh{2}, equations,
mortar_l2::LobattoLegendreMortarL2, uEltype, cache)
fstar_primary_upper = CUDA.zeros(Float64, nvariables(equations), nnodes(mortar_l2),
fstar_primary_upper = CUDA.zeros(uEltype, nvariables(equations), nnodes(mortar_l2),
nmortars(cache.mortars))
fstar_primary_lower = CUDA.zeros(Float64, nvariables(equations), nnodes(mortar_l2),
fstar_primary_lower = CUDA.zeros(uEltype, nvariables(equations), nnodes(mortar_l2),
nmortars(cache.mortars))
fstar_secondary_upper = CUDA.zeros(Float64, nvariables(equations), nnodes(mortar_l2),
fstar_secondary_upper = CUDA.zeros(uEltype, nvariables(equations), nnodes(mortar_l2),
nmortars(cache.mortars))
fstar_secondary_lower = CUDA.zeros(Float64, nvariables(equations), nnodes(mortar_l2),
fstar_secondary_lower = CUDA.zeros(uEltype, nvariables(equations), nnodes(mortar_l2),
nmortars(cache.mortars))

(; fstar_primary_upper, fstar_primary_lower, fstar_secondary_upper, fstar_secondary_lower)
Expand Down Expand Up @@ -176,17 +176,17 @@ end
function create_cache_gpu(mesh::TreeMesh{3}, equations,
volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM,
uEltype, cache)
fstar1_L = CUDA.zeros(Float64, nvariables(equations), nnodes(dg) + 1, nnodes(dg),
fstar1_L = CUDA.zeros(uEltype, nvariables(equations), nnodes(dg) + 1, nnodes(dg),
nnodes(dg), nelements(cache.elements))
fstar1_R = CUDA.zeros(Float64, nvariables(equations), nnodes(dg) + 1, nnodes(dg),
fstar1_R = CUDA.zeros(uEltype, nvariables(equations), nnodes(dg) + 1, nnodes(dg),
nnodes(dg), nelements(cache.elements))
fstar2_L = CUDA.zeros(Float64, nvariables(equations), nnodes(dg), nnodes(dg) + 1,
fstar2_L = CUDA.zeros(uEltype, nvariables(equations), nnodes(dg), nnodes(dg) + 1,
nnodes(dg), nelements(cache.elements))
fstar2_R = CUDA.zeros(Float64, nvariables(equations), nnodes(dg), nnodes(dg) + 1,
fstar2_R = CUDA.zeros(uEltype, nvariables(equations), nnodes(dg), nnodes(dg) + 1,
nnodes(dg), nelements(cache.elements))
fstar3_L = CUDA.zeros(Float64, nvariables(equations), nnodes(dg), nnodes(dg),
fstar3_L = CUDA.zeros(uEltype, nvariables(equations), nnodes(dg), nnodes(dg),
nnodes(dg) + 1, nelements(cache.elements))
fstar3_R = CUDA.zeros(Float64, nvariables(equations), nnodes(dg), nnodes(dg),
fstar3_R = CUDA.zeros(uEltype, nvariables(equations), nnodes(dg), nnodes(dg),
nnodes(dg) + 1, nelements(cache.elements))

cache = create_cache_gpu(mesh, equations,
Expand All @@ -198,21 +198,21 @@ end

function create_cache_gpu(mesh::TreeMesh{3}, equations,
mortar_l2::LobattoLegendreMortarL2, uEltype, cache)
fstar_primary_upper_left = CUDA.zeros(Float64, nvariables(equations), nnodes(mortar_l2),
fstar_primary_upper_left = CUDA.zeros(uEltype, nvariables(equations), nnodes(mortar_l2),
nnodes(mortar_l2), nmortars(cache.mortars))
fstar_primary_upper_right = CUDA.zeros(Float64, nvariables(equations), nnodes(mortar_l2),
fstar_primary_upper_right = CUDA.zeros(uEltype, nvariables(equations), nnodes(mortar_l2),
nnodes(mortar_l2), nmortars(cache.mortars))
fstar_primary_lower_left = CUDA.zeros(Float64, nvariables(equations), nnodes(mortar_l2),
fstar_primary_lower_left = CUDA.zeros(uEltype, nvariables(equations), nnodes(mortar_l2),
nnodes(mortar_l2), nmortars(cache.mortars))
fstar_primary_lower_right = CUDA.zeros(Float64, nvariables(equations), nnodes(mortar_l2),
fstar_primary_lower_right = CUDA.zeros(uEltype, nvariables(equations), nnodes(mortar_l2),
nnodes(mortar_l2), nmortars(cache.mortars))
fstar_secondary_upper_left = CUDA.zeros(Float64, nvariables(equations), nnodes(mortar_l2),
fstar_secondary_upper_left = CUDA.zeros(uEltype, nvariables(equations), nnodes(mortar_l2),
nnodes(mortar_l2), nmortars(cache.mortars))
fstar_secondary_upper_right = CUDA.zeros(Float64, nvariables(equations), nnodes(mortar_l2),
fstar_secondary_upper_right = CUDA.zeros(uEltype, nvariables(equations), nnodes(mortar_l2),
nnodes(mortar_l2), nmortars(cache.mortars))
fstar_secondary_lower_left = CUDA.zeros(Float64, nvariables(equations), nnodes(mortar_l2),
fstar_secondary_lower_left = CUDA.zeros(uEltype, nvariables(equations), nnodes(mortar_l2),
nnodes(mortar_l2), nmortars(cache.mortars))
fstar_secondary_lower_right = CUDA.zeros(Float64, nvariables(equations), nnodes(mortar_l2),
fstar_secondary_lower_right = CUDA.zeros(uEltype, nvariables(equations), nnodes(mortar_l2),
nnodes(mortar_l2), nmortars(cache.mortars))

# Temporary arrays can also be created here
Expand Down
6 changes: 3 additions & 3 deletions src/solvers/common.jl
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
# Some common functions that are shared between the solvers.

# Copy data from CPU to GPU
function reset_du!(du::CuArray{Float64})
function reset_du!(du::CuArray)
du_zero = zero(du)
du .= du_zero # no scalar indexing

return nothing
end

# Set diagonal entries of a matrix to zeros
function set_diagonal_to_zero!(A::Array{Float64})
function set_diagonal_to_zero!(A::Array)
n = min(size(A)...)
for i in 1:n
A[i, i] = 0.0 # change back to `Float32`
A[i, i] = zero(eltype(A))
end
return nothing
end
Expand Down
Loading

0 comments on commit 8981b7a

Please sign in to comment.