Skip to content

Commit

Permalink
Merge branch 'main' into compathelper/new_version/2024-08-10-01-28-27…
Browse files Browse the repository at this point in the history
…-675-04036776483
  • Loading branch information
huiyuxie authored Aug 10, 2024
2 parents e8309b8 + cf8836a commit 3a00ca5
Show file tree
Hide file tree
Showing 14 changed files with 528 additions and 68 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/FormatCheck.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
- name: Install JuliaFormatter and format
run: |
julia -e 'using Pkg; Pkg.add(PackageSpec(name = "JuliaFormatter", version="1.0.56"))'
julia -e 'using JuliaFormatter; format(["src", "test", "utils"])'
julia -e 'using JuliaFormatter; format(["examples", "src", "test", "utils"])'
- name: Format check
run: |
julia -e '
Expand Down
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ TrixiBase = "9a0f1c46-06d5-4909-a5a3-ce25d3fa3284"
BenchmarkTools = "1"
CUDA = "5"
SimpleUnPack = "1"
SciMLBase = "2"
StaticArrays = "1"
StrideArrays = "0.1"
Trixi = "0.8"
Expand Down
47 changes: 47 additions & 0 deletions examples/advection_basic_1d.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# 1D Linear Advection Equation
using Trixi, TrixiGPU
using OrdinaryDiffEq

advection_velocity = 1.0f0
equations = LinearScalarAdvectionEquation1D(advection_velocity)

solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)

coordinates_min = -1.0f0
coordinates_max = 1.0f0

mesh = TreeMesh(coordinates_min, coordinates_max, initial_refinement_level = 4,
n_cells_max = 30_000)

semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test,
solver)

tspan = (0.0f0, 1.0f0)

# FIXME: Remember to export
ode = TrixiGPU.semidiscretize_gpu(semi, tspan)

summary_callback = SummaryCallback()

analysis_interval = 100
analysis_callback = AnalysisCallback(semi, interval = analysis_interval,
extra_analysis_errors = (:l2_error_primitive,
:linf_error_primitive))

alive_callback = AliveCallback(analysis_interval = analysis_interval)

save_solution = SaveSolutionCallback(interval = 100,
save_initial_solution = true,
save_final_solution = true,
solution_variables = cons2prim)

stepsize_callback = StepsizeCallback(cfl = 0.8)

callbacks = CallbackSet(summary_callback,
analysis_callback, alive_callback,
save_solution,
stepsize_callback)

sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
dt = 1.0, save_everystep = false, callback = callbacks);
summary_callback()
39 changes: 39 additions & 0 deletions examples/advection_basic_2d.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# 2D Linear Advection Equation
using Trixi, TrixiGPU
using OrdinaryDiffEq

advection_velocity = (0.2f0, -0.7f0)
equations = LinearScalarAdvectionEquation2D(advection_velocity)

solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)

coordinates_min = (-1.0f0, -1.0f0)
coordinates_max = (1.0f0, 1.0f0)

mesh = TreeMesh(coordinates_min, coordinates_max,
initial_refinement_level = 4,
n_cells_max = 30_000)

semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test,
solver)

tspan = (0.0f0, 1.0f0)

# FIXME: Remember to export
ode = TrixiGPU.semidiscretize_gpu(semi, tspan)

summary_callback = SummaryCallback()

analysis_callback = AnalysisCallback(semi, interval = 100)

save_solution = SaveSolutionCallback(interval = 100,
solution_variables = cons2prim)

stepsize_callback = StepsizeCallback(cfl = 1.6)

callbacks = CallbackSet(summary_callback, analysis_callback, save_solution,
stepsize_callback)

sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
dt = 1.0, save_everystep = false, callback = callbacks);
summary_callback()
Binary file added out/mesh.h5
Binary file not shown.
Binary file added out/solution_000000000.h5
Binary file not shown.
Binary file added out/solution_000000018.h5
Binary file not shown.
Binary file added out/solution_000000040.h5
Binary file not shown.
14 changes: 7 additions & 7 deletions src/TrixiGPU.jl
Original file line number Diff line number Diff line change
@@ -1,29 +1,29 @@
module TrixiGPU

# Include other packages that are used in TrixiGPU.jl (? reorder)
# Include other packages that are used in TrixiGPU.jl (# FIXME: Remember to reorder)
# using Reexport: @reexport

using CUDA: @cuda, CuArray, HostKernel,
threadIdx, blockIdx, blockDim, similar,
launch_configuration

using Trixi: AbstractEquations, TreeMesh, VolumeIntegralWeakForm,
SemidiscretizationHyperbolic,
BoundaryConditionPeriodic, DGSEM,
using Trixi: AbstractEquations, TreeMesh, DGSEM,
BoundaryConditionPeriodic, SemidiscretizationHyperbolic,
VolumeIntegralWeakForm,
flux, ntuple, nvariables,
True, False,
compute_coefficients, wrap_array, have_nonconservative_terms
wrap_array, compute_coefficients, have_nonconservative_terms

import Trixi: get_node_vars, get_node_coords, get_surface_node_vars

using SciMLBase: ODEProblem, FullSpecialize

using StrideArrays: PtrArray

using StaticArrays: SVector

using SimpleUnPack: @unpack

using SciMLBase: ODEProblem, FullSpecialize

# Include other source files
include("function.jl")
include("auxiliary/auxiliary.jl")
Expand Down
6 changes: 3 additions & 3 deletions src/solvers/common.jl
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
# Define common functions for solvers

# Copy matrices from host to device
function copy_to_device!(du::PtrArray, u::PtrArray) # ? PtrArray{Float64}
function copy_to_device!(du::PtrArray, u::PtrArray)
du = CuArray{Float32}(zero(du))
u = CuArray{Float32}(u)

return (du, u)
end

# Copy matrices from device to host
function copy_to_host!(du::CuArray, u::CuArray) # ? CuArray{Float32}
# ? direct CuArray to PtrArray conversion is impossible
function copy_to_host!(du::CuArray, u::CuArray)
# FIXME: Maybe direct CuArray to PtrArray conversion is possible
du = PtrArray(Array{Float64}(du))
u = PtrArray(Array{Float64}(u))

Expand Down
69 changes: 26 additions & 43 deletions src/solvers/dg_1d.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
# Everything related to a DG semidiscretization in 1D

# TODO: Please check whether `equations::AbstractEquations{1}` is needed for each function here!!
# Functions end with `_kernel` are CUDA kernels that are going to be launed by the `@cuda` macro.
# TODO: Check whether `equations::AbstractEquations{1}` is needed for each function here

# Functions that end with `_kernel` are CUDA kernels that are going to be launched by
# the @cuda macro with parameters from the kernel configurator. They are purely run on
# the device (i.e., GPU).

# Kernel for calculating fluxes along normal direction
function flux_kernel!(flux_arr, u, equations::AbstractEquations{1}, flux::Function)
Expand Down Expand Up @@ -97,7 +100,8 @@ function interface_flux_kernel!(surface_flux_values, surface_flux_arr, neighbor_
end

# Kernel for calculating surface integrals
function surface_integral_kernel!(du, factor_arr, surface_flux_values)
function surface_integral_kernel!(du, factor_arr, surface_flux_values,
equations::AbstractEquations{1})
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
j = (blockIdx().y - 1) * blockDim().y + threadIdx().y
k = (blockIdx().z - 1) * blockDim().z + threadIdx().z
Expand All @@ -114,7 +118,7 @@ function surface_integral_kernel!(du, factor_arr, surface_flux_values)
end

# Kernel for applying inverse Jacobian
function jacobian_kernel!(du, inverse_jacobian)
function jacobian_kernel!(du, inverse_jacobian, equations::AbstractEquations{1})
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
j = (blockIdx().y - 1) * blockDim().y + threadIdx().y
k = (blockIdx().z - 1) * blockDim().z + threadIdx().z
Expand All @@ -126,8 +130,9 @@ function jacobian_kernel!(du, inverse_jacobian)
return nothing
end

# Functions begin with `cuda_` are the functions that pack CUDA kernels together,
# calling them from the host (i.e., CPU) and running them on the device (i.e., GPU).
# Functions that begin with `cuda_` are the functions that pack CUDA kernels together to do
# partial work in semidiscretization. They are used to invoke kernels from the host (i.e., CPU)
# and run them on the device (i.e., GPU).

# Pack kernels for calculating volume integrals
function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms, equations,
Expand Down Expand Up @@ -203,17 +208,15 @@ function cuda_prolong2boundaries!(u, mesh::TreeMesh{1},
end

# Dummy function for asserting boundary fluxes
function cuda_boundary_flux!(t,
mesh::TreeMesh{1},
boundary_condition::BoundaryConditionPeriodic,
equations,
dg::DGSEM,
cache)
function cuda_boundary_flux!(t, mesh::TreeMesh{1},
boundary_condition::BoundaryConditionPeriodic, equations,
dg::DGSEM, cache)
@assert iszero(length(cache.boundaries.orientations))
end

# Pack kernels for calculating surface integrals
function cuda_surface_integral!(du, mesh::TreeMesh{1}, dg::DGSEM, cache)
function cuda_surface_integral!(du, mesh::TreeMesh{1}, equations, dg::DGSEM, cache)
# FIXME: `surface_integral`
factor_arr = CuArray{Float32}([
dg.basis.boundary_interpolation[1, 1],
dg.basis.boundary_interpolation[size(du, 2), 2]
Expand All @@ -223,21 +226,21 @@ function cuda_surface_integral!(du, mesh::TreeMesh{1}, dg::DGSEM, cache)
size_arr = CuArray{Float32}(undef, size(du, 1), size(du, 2), size(du, 3))

surface_integral_kernel = @cuda launch=false surface_integral_kernel!(du, factor_arr,
surface_flux_values)
surface_integral_kernel(du,
factor_arr,
surface_flux_values;
surface_flux_values,
equations)
surface_integral_kernel(du, factor_arr, surface_flux_values, equations;
configurator_3d(surface_integral_kernel, size_arr)...,)

return nothing
end

# Pack kernels for applying Jacobian to reference element
function cuda_jacobian!(du, mesh::TreeMesh{1}, cache)
function cuda_jacobian!(du, mesh::TreeMesh{1}, equations, cache)
inverse_jacobian = CuArray{Float32}(cache.elements.inverse_jacobian)

jacobian_kernel = @cuda launch=false jacobian_kernel!(du, inverse_jacobian)
jacobian_kernel(du, inverse_jacobian; configurator_3d(jacobian_kernel, du)...)
jacobian_kernel = @cuda launch=false jacobian_kernel!(du, inverse_jacobian, equations)
jacobian_kernel(du, inverse_jacobian, equations;
configurator_3d(jacobian_kernel, du)...)

return nothing
end
Expand All @@ -248,7 +251,7 @@ function cuda_sources!(du, u, t, source_terms::Nothing, equations::AbstractEquat
return nothing
end

# Put everything together in single function
# Put everything together into a single function
# Ref: `rhs!` function in Trixi.jl

function rhs_gpu!(du_cpu, u_cpu, t, mesh::TreeMesh{1}, equations, initial_condition,
Expand All @@ -267,9 +270,9 @@ function rhs_gpu!(du_cpu, u_cpu, t, mesh::TreeMesh{1}, equations, initial_condit

cuda_boundary_flux!(t, mesh, boundary_conditions, equations, dg, cache)

cuda_surface_integral!(du, mesh, dg, cache)
cuda_surface_integral!(du, mesh, equations, dg, cache)

cuda_jacobian!(du, mesh, cache)
cuda_jacobian!(du, mesh, equations, cache)

cuda_sources!(du, u, t, source_terms, equations, cache)

Expand All @@ -278,23 +281,3 @@ function rhs_gpu!(du_cpu, u_cpu, t, mesh::TreeMesh{1}, equations, initial_condit

return nothing
end

function rhs_gpu!(du_ode, u_ode, semi::SemidiscretizationHyperbolic, t)
@unpack mesh, equations, initial_condition, boundary_conditions, source_terms, solver, cache = semi

u = wrap_array(u_ode, mesh, equations, solver, cache)
du = wrap_array(du_ode, mesh, equations, solver, cache)

rhs_gpu!(du, u, t, mesh, equations, initial_condition, boundary_conditions,
source_terms, solver, cache)

return nothing
end

function semidiscretize_gpu(semi::SemidiscretizationHyperbolic, tspan)
u0_ode = compute_coefficients(first(tspan), semi)

iip = true
specialize = FullSpecialize
return ODEProblem{iip, specialize}(rhs_gpu!, u0_ode, tspan, semi)
end
Loading

0 comments on commit 3a00ca5

Please sign in to comment.