Skip to content

Commit

Permalink
Merge branch 'trixibase-timers' of github.com:efaulhaber/Trixi.jl int…
Browse files Browse the repository at this point in the history
…o trixibase-timers
  • Loading branch information
efaulhaber committed Jun 11, 2024
2 parents fa7714d + bc365b7 commit 6a579c7
Show file tree
Hide file tree
Showing 15 changed files with 90 additions and 83 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@ from the Trixi Framework ecosystem:
* [**Towards Aerodynamic Simulations in Julia with Trixi.jl**](https://pretalx.com/juliacon2024/talk/XH8KBG/),<br/>
[*Daniel Doehring*](https://github.com/danieldoehring/),
10th July 2024, 15:00pm–15:30pm, While Loop (4.2)
* [**libtrixi: serving legacy codes in earth system modeling with fresh Julia CFD**](https://pretalx.com/juliacon2024/talk/JBKVGF/),<br/>
* [**libtrixi: serving legacy codes in earth system modeling with fresh Julia CFD**](https://pretalx.com/juliacon2024/talk/SXC7LA/),<br/>
[*Benedict Geihe*](https://github.com/benegee/),
12th July 2024, 14:00pm–17:00pm, Function (4.1)

The last talk is part of the
[Julia for High-Performance Computing](https://pretalx.com/juliacon2024/talk/JBKVGF/)
[Julia for High-Performance Computing](https://juliacon.org/2024/minisymposia/hpc/)
minisymposium, which this year is hosted by our own [*Hendrik Ranocha*](https://github.com/ranocha/).

We are looking forward to seeing you there ♥️
Expand Down
4 changes: 2 additions & 2 deletions src/callbacks_step/amr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ function (amr_callback::AMRCallback)(u_ode::AbstractVector, mesh::TreeMesh,

if mpi_isparallel()
# Collect lambda for all elements
lambda_global = Vector{eltype(lambda)}(undef, nelementsglobal(dg, cache))
lambda_global = Vector{eltype(lambda)}(undef, nelementsglobal(mesh, dg, cache))
# Use parent because n_elements_by_rank is an OffsetArray
recvbuf = MPI.VBuffer(lambda_global, parent(cache.mpi_cache.n_elements_by_rank))
MPI.Allgatherv!(lambda, recvbuf, mpi_comm())
Expand Down Expand Up @@ -380,7 +380,7 @@ function (amr_callback::AMRCallback)(u_ode::AbstractVector, mesh::TreeMesh,
error("MPI has not been verified yet for parabolic AMR")

# Collect lambda for all elements
lambda_global = Vector{eltype(lambda)}(undef, nelementsglobal(dg, cache))
lambda_global = Vector{eltype(lambda)}(undef, nelementsglobal(mesh, dg, cache))
# Use parent because n_elements_by_rank is an OffsetArray
recvbuf = MPI.VBuffer(lambda_global, parent(cache.mpi_cache.n_elements_by_rank))
MPI.Allgatherv!(lambda, recvbuf, mpi_comm())
Expand Down
91 changes: 28 additions & 63 deletions src/callbacks_step/analysis.jl
Original file line number Diff line number Diff line change
Expand Up @@ -308,11 +308,11 @@ function (analysis_callback::AnalysisCallback)(u_ode, du_ode, integrator, semi)
mpi_println(" " * " " *
" " *
" PID: " * @sprintf("%10.8e s", performance_index))
mpi_println(" #DOFs per field:" * @sprintf("% 14d", ndofs(semi)) *
mpi_println(" #DOFs per field:" * @sprintf("% 14d", ndofsglobal(semi)) *
" " *
" alloc'd memory: " * @sprintf("%14.3f MiB", memory_use))
mpi_println(" #elements: " *
@sprintf("% 14d", nelements(mesh, solver, cache)))
@sprintf("% 14d", nelementsglobal(mesh, solver, cache)))

# Level information (only show for AMR)
print_amr_information(integrator.opts.callback, mesh, solver, cache)
Expand Down Expand Up @@ -494,88 +494,53 @@ function print_amr_information(callbacks, mesh, solver, cache)
# Return early if there is nothing to print
uses_amr(callbacks) || return nothing

levels = Vector{Int}(undef, nelements(solver, cache))
min_level = typemax(Int)
max_level = typemin(Int)
for element in eachelement(solver, cache)
current_level = mesh.tree.levels[cache.elements.cell_ids[element]]
levels[element] = current_level
min_level = min(min_level, current_level)
max_level = max(max_level, current_level)
# Get global minimum and maximum level from the AMRController
min_level = max_level = 0
for cb in callbacks.discrete_callbacks
if cb.affect! isa AMRCallback
min_level = cb.affect!.controller.base_level
max_level = cb.affect!.controller.max_level
end
end

# Get local element count per level
elements_per_level = get_elements_per_level(min_level, max_level, mesh, solver,
cache)

# Sum up across all ranks
MPI.Reduce!(elements_per_level, +, mpi_root(), mpi_comm())

# Print
for level in max_level:-1:(min_level + 1)
mpi_println(" ├── level $level: " *
@sprintf("% 14d", count(==(level), levels)))
@sprintf("% 14d", elements_per_level[level + 1 - min_level]))
end
mpi_println(" └── level $min_level: " *
@sprintf("% 14d", count(==(min_level), levels)))
@sprintf("% 14d", elements_per_level[1]))

return nothing
end

# Print level information only if AMR is enabled
function print_amr_information(callbacks, mesh::P4estMesh, solver, cache)

# Return early if there is nothing to print
uses_amr(callbacks) || return nothing

function get_elements_per_level(min_level, max_level, mesh::P4estMesh, solver, cache)
elements_per_level = zeros(P4EST_MAXLEVEL + 1)

for tree in unsafe_wrap_sc(p4est_tree_t, mesh.p4est.trees)
elements_per_level .+= tree.quadrants_per_level
end

# levels start at zero but Julia's standard indexing starts at 1
min_level_1 = findfirst(i -> i > 0, elements_per_level)
max_level_1 = findlast(i -> i > 0, elements_per_level)

# Check if there is at least one level with an element
if isnothing(min_level_1) || isnothing(max_level_1)
return nothing
end

min_level = min_level_1 - 1
max_level = max_level_1 - 1

for level in max_level:-1:(min_level + 1)
mpi_println(" ├── level $level: " *
@sprintf("% 14d", elements_per_level[level + 1]))
end
mpi_println(" └── level $min_level: " *
@sprintf("% 14d", elements_per_level[min_level + 1]))

return nothing
return @view(elements_per_level[(min_level + 1):(max_level + 1)])
end

# Print level information only if AMR is enabled
function print_amr_information(callbacks, mesh::T8codeMesh, solver, cache)

# Return early if there is nothing to print
uses_amr(callbacks) || return nothing

# TODO: Switch to global element levels array when MPI supported or find
# another solution.
function get_elements_per_level(min_level, max_level, mesh::T8codeMesh, solver, cache)
levels = trixi_t8_get_local_element_levels(mesh.forest)

min_level = minimum(levels)
max_level = maximum(levels)

mpi_println(" minlevel = $min_level")
mpi_println(" maxlevel = $max_level")

if min_level > 0
elements_per_level = [count(==(l), levels) for l in 1:max_level]

for level in max_level:-1:(min_level + 1)
mpi_println(" ├── level $level: " *
@sprintf("% 14d", elements_per_level[level]))
end
mpi_println(" └── level $min_level: " *
@sprintf("% 14d", elements_per_level[min_level]))
end
return [count(==(l), levels) for l in min_level:max_level]
end

return nothing
function get_elements_per_level(min_level, max_level, mesh::TreeMesh, solver, cache)
levels = [mesh.tree.levels[cache.elements.cell_ids[element]]
for element in eachelement(solver, cache)]
return [count(==(l), levels) for l in min_level:max_level]
end

# Iterate over tuples of analysis integrals in a type-stable way using "lispy tuple programming".
Expand Down
7 changes: 7 additions & 0 deletions src/callbacks_step/analysis_dgmulti.jl
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,13 @@ end
SolutionAnalyzer(rd::RefElemData) = rd

nelements(mesh::DGMultiMesh, ::DGMulti, other_args...) = mesh.md.num_elements
function nelementsglobal(mesh::DGMultiMesh, solver::DGMulti, cache)
if mpi_isparallel()
error("`nelementsglobal` is not implemented for `DGMultiMesh` when used in parallel with MPI")
else
return ndofs(mesh, solver, cache)
end
end
function ndofsglobal(mesh::DGMultiMesh, solver::DGMulti, cache)
if mpi_isparallel()
error("`ndofsglobal` is not implemented for `DGMultiMesh` when used in parallel with MPI")
Expand Down
4 changes: 2 additions & 2 deletions src/callbacks_step/save_restart_dg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ function save_restart_file_parallel(u, time, dt, timestep,
attributes(file)["equations"] = get_name(equations)
attributes(file)["polydeg"] = polydeg(dg)
attributes(file)["n_vars"] = nvariables(equations)
attributes(file)["n_elements"] = nelementsglobal(dg, cache)
attributes(file)["n_elements"] = nelementsglobal(mesh, dg, cache)
attributes(file)["mesh_type"] = get_name(mesh)
attributes(file)["mesh_file"] = splitdir(mesh.current_filename)[2]
attributes(file)["time"] = convert(Float64, time) # Ensure that `time` is written as a double precision scalar
Expand Down Expand Up @@ -239,7 +239,7 @@ function load_restart_file_parallel(mesh::Union{ParallelTreeMesh, ParallelP4estM
if read(attributes(file)["polydeg"]) != polydeg(dg)
error("restart mismatch: polynomial degree in solver differs from value in restart file")
end
if read(attributes(file)["n_elements"]) != nelementsglobal(dg, cache)
if read(attributes(file)["n_elements"]) != nelementsglobal(mesh, dg, cache)
error("restart mismatch: number of elements in solver differs from value in restart file")
end

Expand Down
6 changes: 3 additions & 3 deletions src/callbacks_step/save_solution_dg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ function save_solution_file_parallel(data, time, dt, timestep, n_vars,
attributes(file)["equations"] = get_name(equations)
attributes(file)["polydeg"] = polydeg(dg)
attributes(file)["n_vars"] = n_vars
attributes(file)["n_elements"] = nelementsglobal(dg, cache)
attributes(file)["n_elements"] = nelementsglobal(mesh, dg, cache)
attributes(file)["mesh_type"] = get_name(mesh)
attributes(file)["mesh_file"] = splitdir(mesh.current_filename)[2]
attributes(file)["time"] = convert(Float64, time) # Ensure that `time` is written as a double precision scalar
Expand All @@ -183,7 +183,7 @@ function save_solution_file_parallel(data, time, dt, timestep, n_vars,
# Need to create dataset explicitly in parallel case
var = create_dataset(file, "/element_variables_$v",
datatype(eltype(element_variable)),
dataspace((nelementsglobal(dg, cache),)))
dataspace((nelementsglobal(mesh, dg, cache),)))

# Write data of each process in slices (ranks start with 0)
slice = (cum_element_counts[mpi_rank() + 1] + 1):cum_element_counts[mpi_rank() + 2]
Expand Down Expand Up @@ -230,7 +230,7 @@ function save_solution_file_on_root(data, time, dt, timestep, n_vars,
attributes(file)["equations"] = get_name(equations)
attributes(file)["polydeg"] = polydeg(dg)
attributes(file)["n_vars"] = n_vars
attributes(file)["n_elements"] = nelementsglobal(dg, cache)
attributes(file)["n_elements"] = nelementsglobal(mesh, dg, cache)
attributes(file)["mesh_type"] = get_name(mesh)
attributes(file)["mesh_file"] = splitdir(mesh.current_filename)[2]
attributes(file)["time"] = convert(Float64, time) # Ensure that `time` is written as a double precision scalar
Expand Down
6 changes: 2 additions & 4 deletions src/equations/compressible_euler_quasi_1d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -314,10 +314,8 @@ end
# 1D compressible Euler equations scaled by the channel width `a`.
@inline function entropy(u, equations::CompressibleEulerEquationsQuasi1D)
a_rho, a_rho_v1, a_e, a = u
q = a * entropy(SVector(a_rho, a_rho_v1, a_e) / a,
CompressibleEulerEquations1D(equations.gamma))

return SVector(q[1], q[2], q[3], a)
return a * entropy(SVector(a_rho, a_rho_v1, a_e) / a,
CompressibleEulerEquations1D(equations.gamma))
end

# Convert conservative variables to entropy. The entropy variables for the
Expand Down
3 changes: 2 additions & 1 deletion src/meshes/p4est_mesh.jl
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ end
end
# returns Int32 by default which causes a weird method error when creating the cache
@inline ncells(mesh::P4estMesh) = Int(mesh.p4est.local_num_quadrants[])
@inline ncellsglobal(mesh::P4estMesh) = Int(mesh.p4est.global_num_quadrants[])

function Base.show(io::IO, mesh::P4estMesh)
print(io, "P4estMesh{", ndims(mesh), ", ", real(mesh), "}")
Expand All @@ -105,7 +106,7 @@ function Base.show(io::IO, ::MIME"text/plain", mesh::P4estMesh)
else
setup = [
"#trees" => ntrees(mesh),
"current #cells" => ncells(mesh),
"current #cells" => ncellsglobal(mesh),
"polydeg" => length(mesh.nodes) - 1,
]
summary_box(io,
Expand Down
3 changes: 2 additions & 1 deletion src/meshes/t8code_mesh.jl
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ const ParallelT8codeMesh{NDIMS} = T8codeMesh{NDIMS, <:Real, <:True}

@inline ntrees(mesh::T8codeMesh) = size(mesh.tree_node_coordinates)[end]
@inline ncells(mesh::T8codeMesh) = Int(t8_forest_get_local_num_elements(mesh.forest))
@inline ncellsglobal(mesh::T8codeMesh) = Int(t8_forest_get_global_num_elements(mesh.forest))

function Base.show(io::IO, mesh::T8codeMesh)
print(io, "T8codeMesh{", ndims(mesh), ", ", real(mesh), "}")
Expand All @@ -91,7 +92,7 @@ function Base.show(io::IO, ::MIME"text/plain", mesh::T8codeMesh)
else
setup = [
"#trees" => ntrees(mesh),
"current #cells" => ncells(mesh),
"current #cells" => ncellsglobal(mesh),
"polydeg" => length(mesh.nodes) - 1,
]
summary_box(io,
Expand Down
14 changes: 14 additions & 0 deletions src/semidiscretization/semidiscretization.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,19 @@ Return the number of degrees of freedom associated with each scalar variable.
ndofs(mesh, solver, cache)
end

"""
ndofsglobal(semi::AbstractSemidiscretization)
Return the global number of degrees of freedom associated with each scalar variable across all MPI ranks.
This is the same as [`ndofs`](@ref) for simulations running in serial or
parallelized via threads. It will in general be different for simulations
running in parallel with MPI.
"""
@inline function ndofsglobal(semi::AbstractSemidiscretization)
mesh, _, solver, cache = mesh_equations_solver_cache(semi)
ndofsglobal(mesh, solver, cache)
end

"""
integrate_via_indices(func, u_ode, semi::AbstractSemidiscretization, args...; normalize=true)
Expand Down Expand Up @@ -397,6 +410,7 @@ end
# TODO: Taal, document interface?
# New mesh/solver combinations have to implement
# - ndofs(mesh, solver, cache)
# - ndofsgloabal(mesh, solver, cache)
# - ndims(mesh)
# - nnodes(solver)
# - real(solver)
Expand Down
14 changes: 13 additions & 1 deletion src/semidiscretization/semidiscretization_coupled.jl
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ function Base.show(io::IO, ::MIME"text/plain", semi::SemidiscretizationCoupled)
semi.semis[i].source_terms)
summary_line(increment_indent(io), "solver", solver |> typeof |> nameof)
end
summary_line(io, "total #DOFs per field", ndofs(semi))
summary_line(io, "total #DOFs per field", ndofsglobal(semi))
summary_footer(io)
end
end
Expand Down Expand Up @@ -123,6 +123,18 @@ end
sum(ndofs, semi.semis)
end

"""
ndofsglobal(semi::SemidiscretizationCoupled)
Return the global number of degrees of freedom associated with each scalar variable across all MPI ranks, and summed up over all coupled systems.
This is the same as [`ndofs`](@ref) for simulations running in serial or
parallelized via threads. It will in general be different for simulations
running in parallel with MPI.
"""
@inline function ndofsglobal(semi::SemidiscretizationCoupled)
sum(ndofsglobal, semi.semis)
end

function compute_coefficients(t, semi::SemidiscretizationCoupled)
@unpack u_indices = semi

Expand Down
2 changes: 1 addition & 1 deletion src/semidiscretization/semidiscretization_hyperbolic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ function Base.show(io::IO, ::MIME"text/plain", semi::SemidiscretizationHyperboli

summary_line(io, "source terms", semi.source_terms)
summary_line(io, "solver", semi.solver |> typeof |> nameof)
summary_line(io, "total #DOFs per field", ndofs(semi))
summary_line(io, "total #DOFs per field", ndofsglobal(semi))
summary_footer(io)
end
end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ function Base.show(io::IO, ::MIME"text/plain",
summary_line(io, "source terms", semi.source_terms)
summary_line(io, "solver", semi.solver |> typeof |> nameof)
summary_line(io, "parabolic solver", semi.solver_parabolic |> typeof |> nameof)
summary_line(io, "total #DOFs per field", ndofs(semi))
summary_line(io, "total #DOFs per field", ndofsglobal(semi))
summary_footer(io)
end
end
Expand Down
4 changes: 2 additions & 2 deletions src/solvers/dg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ In particular, not the nodes themselves are returned.
# `mesh` for some combinations of mesh/solver.
@inline nelements(mesh, dg::DG, cache) = nelements(dg, cache)
@inline function ndofsglobal(mesh, dg::DG, cache)
nelementsglobal(dg, cache) * nnodes(dg)^ndims(mesh)
nelementsglobal(mesh, dg, cache) * nnodes(dg)^ndims(mesh)
end

"""
Expand Down Expand Up @@ -517,7 +517,7 @@ In particular, not the mortars themselves are returned.
@inline eachmpimortar(dg::DG, cache) = Base.OneTo(nmpimortars(dg, cache))

@inline nelements(dg::DG, cache) = nelements(cache.elements)
@inline function nelementsglobal(dg::DG, cache)
@inline function nelementsglobal(mesh, dg::DG, cache)
mpi_isparallel() ? cache.mpi_cache.n_elements_global : nelements(dg, cache)
end
@inline ninterfaces(dg::DG, cache) = ninterfaces(cache.interfaces)
Expand Down
9 changes: 9 additions & 0 deletions test/test_tree_1d_euler.jl
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,14 @@ end
end
end

@trixi_testset "test_quasi_1D_entropy" begin
a = 0.9
u_1D = SVector(1.1, 0.2, 2.1)
u_quasi_1D = SVector(a * 1.1, a * 0.2, a * 2.1, a)
@test entropy(u_quasi_1D, CompressibleEulerEquationsQuasi1D(1.4))
a * entropy(u_1D, CompressibleEulerEquations1D(1.4))
end

@trixi_testset "elixir_euler_quasi_1d_source_terms.jl" begin
@test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_quasi_1d_source_terms.jl"),
l2=[
Expand All @@ -423,6 +431,7 @@ end
1.821888865105592e-6,
1.1166012159335992e-7,
])

# Ensure that we do not have excessive memory allocations
# (e.g., from type instabilities)
let
Expand Down

0 comments on commit 6a579c7

Please sign in to comment.