diff --git a/README.md b/README.md index 87075c885f9..ee6e45f92a2 100644 --- a/README.md +++ b/README.md @@ -30,12 +30,12 @@ from the Trixi Framework ecosystem: * [**Towards Aerodynamic Simulations in Julia with Trixi.jl**](https://pretalx.com/juliacon2024/talk/XH8KBG/),
[*Daniel Doehring*](https://github.com/danieldoehring/), 10th July 2024, 15:00pm–15:30pm, While Loop (4.2) -* [**libtrixi: serving legacy codes in earth system modeling with fresh Julia CFD**](https://pretalx.com/juliacon2024/talk/JBKVGF/),
+* [**libtrixi: serving legacy codes in earth system modeling with fresh Julia CFD**](https://pretalx.com/juliacon2024/talk/SXC7LA/),
[*Benedict Geihe*](https://github.com/benegee/), 12th July 2024, 14:00pm–17:00pm, Function (4.1) The last talk is part of the -[Julia for High-Performance Computing](https://pretalx.com/juliacon2024/talk/JBKVGF/) +[Julia for High-Performance Computing](https://juliacon.org/2024/minisymposia/hpc/) minisymposium, which this year is hosted by our own [*Hendrik Ranocha*](https://github.com/ranocha/). We are looking forward to seeing you there ♥️ diff --git a/src/callbacks_step/amr.jl b/src/callbacks_step/amr.jl index 45f03fba8fe..b0afd02aff8 100644 --- a/src/callbacks_step/amr.jl +++ b/src/callbacks_step/amr.jl @@ -228,7 +228,7 @@ function (amr_callback::AMRCallback)(u_ode::AbstractVector, mesh::TreeMesh, if mpi_isparallel() # Collect lambda for all elements - lambda_global = Vector{eltype(lambda)}(undef, nelementsglobal(dg, cache)) + lambda_global = Vector{eltype(lambda)}(undef, nelementsglobal(mesh, dg, cache)) # Use parent because n_elements_by_rank is an OffsetArray recvbuf = MPI.VBuffer(lambda_global, parent(cache.mpi_cache.n_elements_by_rank)) MPI.Allgatherv!(lambda, recvbuf, mpi_comm()) @@ -380,7 +380,7 @@ function (amr_callback::AMRCallback)(u_ode::AbstractVector, mesh::TreeMesh, error("MPI has not been verified yet for parabolic AMR") # Collect lambda for all elements - lambda_global = Vector{eltype(lambda)}(undef, nelementsglobal(dg, cache)) + lambda_global = Vector{eltype(lambda)}(undef, nelementsglobal(mesh, dg, cache)) # Use parent because n_elements_by_rank is an OffsetArray recvbuf = MPI.VBuffer(lambda_global, parent(cache.mpi_cache.n_elements_by_rank)) MPI.Allgatherv!(lambda, recvbuf, mpi_comm()) diff --git a/src/callbacks_step/analysis.jl b/src/callbacks_step/analysis.jl index 7b4a97c2a79..2c8497dc28d 100644 --- a/src/callbacks_step/analysis.jl +++ b/src/callbacks_step/analysis.jl @@ -308,11 +308,11 @@ function (analysis_callback::AnalysisCallback)(u_ode, du_ode, integrator, semi) mpi_println(" " * " " * " " * " PID: " * @sprintf("%10.8e s", performance_index)) - mpi_println(" #DOFs per field:" * @sprintf("% 14d", ndofs(semi)) * + mpi_println(" #DOFs per field:" * @sprintf("% 14d", ndofsglobal(semi)) * " " * " alloc'd memory: " * @sprintf("%14.3f MiB", memory_use)) mpi_println(" #elements: " * - @sprintf("% 14d", nelements(mesh, solver, cache))) + @sprintf("% 14d", nelementsglobal(mesh, solver, cache))) # Level information (only show for AMR) print_amr_information(integrator.opts.callback, mesh, solver, cache) @@ -494,88 +494,53 @@ function print_amr_information(callbacks, mesh, solver, cache) # Return early if there is nothing to print uses_amr(callbacks) || return nothing - levels = Vector{Int}(undef, nelements(solver, cache)) - min_level = typemax(Int) - max_level = typemin(Int) - for element in eachelement(solver, cache) - current_level = mesh.tree.levels[cache.elements.cell_ids[element]] - levels[element] = current_level - min_level = min(min_level, current_level) - max_level = max(max_level, current_level) + # Get global minimum and maximum level from the AMRController + min_level = max_level = 0 + for cb in callbacks.discrete_callbacks + if cb.affect! isa AMRCallback + min_level = cb.affect!.controller.base_level + max_level = cb.affect!.controller.max_level + end end + # Get local element count per level + elements_per_level = get_elements_per_level(min_level, max_level, mesh, solver, + cache) + + # Sum up across all ranks + MPI.Reduce!(elements_per_level, +, mpi_root(), mpi_comm()) + + # Print for level in max_level:-1:(min_level + 1) mpi_println(" ├── level $level: " * - @sprintf("% 14d", count(==(level), levels))) + @sprintf("% 14d", elements_per_level[level + 1 - min_level])) end mpi_println(" └── level $min_level: " * - @sprintf("% 14d", count(==(min_level), levels))) + @sprintf("% 14d", elements_per_level[1])) return nothing end -# Print level information only if AMR is enabled -function print_amr_information(callbacks, mesh::P4estMesh, solver, cache) - - # Return early if there is nothing to print - uses_amr(callbacks) || return nothing - +function get_elements_per_level(min_level, max_level, mesh::P4estMesh, solver, cache) elements_per_level = zeros(P4EST_MAXLEVEL + 1) for tree in unsafe_wrap_sc(p4est_tree_t, mesh.p4est.trees) elements_per_level .+= tree.quadrants_per_level end - # levels start at zero but Julia's standard indexing starts at 1 - min_level_1 = findfirst(i -> i > 0, elements_per_level) - max_level_1 = findlast(i -> i > 0, elements_per_level) - - # Check if there is at least one level with an element - if isnothing(min_level_1) || isnothing(max_level_1) - return nothing - end - - min_level = min_level_1 - 1 - max_level = max_level_1 - 1 - - for level in max_level:-1:(min_level + 1) - mpi_println(" ├── level $level: " * - @sprintf("% 14d", elements_per_level[level + 1])) - end - mpi_println(" └── level $min_level: " * - @sprintf("% 14d", elements_per_level[min_level + 1])) - - return nothing + return @view(elements_per_level[(min_level + 1):(max_level + 1)]) end -# Print level information only if AMR is enabled -function print_amr_information(callbacks, mesh::T8codeMesh, solver, cache) - - # Return early if there is nothing to print - uses_amr(callbacks) || return nothing - - # TODO: Switch to global element levels array when MPI supported or find - # another solution. +function get_elements_per_level(min_level, max_level, mesh::T8codeMesh, solver, cache) levels = trixi_t8_get_local_element_levels(mesh.forest) - min_level = minimum(levels) - max_level = maximum(levels) - - mpi_println(" minlevel = $min_level") - mpi_println(" maxlevel = $max_level") - - if min_level > 0 - elements_per_level = [count(==(l), levels) for l in 1:max_level] - - for level in max_level:-1:(min_level + 1) - mpi_println(" ├── level $level: " * - @sprintf("% 14d", elements_per_level[level])) - end - mpi_println(" └── level $min_level: " * - @sprintf("% 14d", elements_per_level[min_level])) - end + return [count(==(l), levels) for l in min_level:max_level] +end - return nothing +function get_elements_per_level(min_level, max_level, mesh::TreeMesh, solver, cache) + levels = [mesh.tree.levels[cache.elements.cell_ids[element]] + for element in eachelement(solver, cache)] + return [count(==(l), levels) for l in min_level:max_level] end # Iterate over tuples of analysis integrals in a type-stable way using "lispy tuple programming". diff --git a/src/callbacks_step/analysis_dgmulti.jl b/src/callbacks_step/analysis_dgmulti.jl index dc294de9e7b..1f0eec2de34 100644 --- a/src/callbacks_step/analysis_dgmulti.jl +++ b/src/callbacks_step/analysis_dgmulti.jl @@ -185,6 +185,13 @@ end SolutionAnalyzer(rd::RefElemData) = rd nelements(mesh::DGMultiMesh, ::DGMulti, other_args...) = mesh.md.num_elements +function nelementsglobal(mesh::DGMultiMesh, solver::DGMulti, cache) + if mpi_isparallel() + error("`nelementsglobal` is not implemented for `DGMultiMesh` when used in parallel with MPI") + else + return ndofs(mesh, solver, cache) + end +end function ndofsglobal(mesh::DGMultiMesh, solver::DGMulti, cache) if mpi_isparallel() error("`ndofsglobal` is not implemented for `DGMultiMesh` when used in parallel with MPI") diff --git a/src/callbacks_step/save_restart_dg.jl b/src/callbacks_step/save_restart_dg.jl index cddeef77bb2..b83402c5f86 100644 --- a/src/callbacks_step/save_restart_dg.jl +++ b/src/callbacks_step/save_restart_dg.jl @@ -126,7 +126,7 @@ function save_restart_file_parallel(u, time, dt, timestep, attributes(file)["equations"] = get_name(equations) attributes(file)["polydeg"] = polydeg(dg) attributes(file)["n_vars"] = nvariables(equations) - attributes(file)["n_elements"] = nelementsglobal(dg, cache) + attributes(file)["n_elements"] = nelementsglobal(mesh, dg, cache) attributes(file)["mesh_type"] = get_name(mesh) attributes(file)["mesh_file"] = splitdir(mesh.current_filename)[2] attributes(file)["time"] = convert(Float64, time) # Ensure that `time` is written as a double precision scalar @@ -239,7 +239,7 @@ function load_restart_file_parallel(mesh::Union{ParallelTreeMesh, ParallelP4estM if read(attributes(file)["polydeg"]) != polydeg(dg) error("restart mismatch: polynomial degree in solver differs from value in restart file") end - if read(attributes(file)["n_elements"]) != nelementsglobal(dg, cache) + if read(attributes(file)["n_elements"]) != nelementsglobal(mesh, dg, cache) error("restart mismatch: number of elements in solver differs from value in restart file") end diff --git a/src/callbacks_step/save_solution_dg.jl b/src/callbacks_step/save_solution_dg.jl index 7367886ca94..deae8f7c930 100644 --- a/src/callbacks_step/save_solution_dg.jl +++ b/src/callbacks_step/save_solution_dg.jl @@ -158,7 +158,7 @@ function save_solution_file_parallel(data, time, dt, timestep, n_vars, attributes(file)["equations"] = get_name(equations) attributes(file)["polydeg"] = polydeg(dg) attributes(file)["n_vars"] = n_vars - attributes(file)["n_elements"] = nelementsglobal(dg, cache) + attributes(file)["n_elements"] = nelementsglobal(mesh, dg, cache) attributes(file)["mesh_type"] = get_name(mesh) attributes(file)["mesh_file"] = splitdir(mesh.current_filename)[2] attributes(file)["time"] = convert(Float64, time) # Ensure that `time` is written as a double precision scalar @@ -183,7 +183,7 @@ function save_solution_file_parallel(data, time, dt, timestep, n_vars, # Need to create dataset explicitly in parallel case var = create_dataset(file, "/element_variables_$v", datatype(eltype(element_variable)), - dataspace((nelementsglobal(dg, cache),))) + dataspace((nelementsglobal(mesh, dg, cache),))) # Write data of each process in slices (ranks start with 0) slice = (cum_element_counts[mpi_rank() + 1] + 1):cum_element_counts[mpi_rank() + 2] @@ -230,7 +230,7 @@ function save_solution_file_on_root(data, time, dt, timestep, n_vars, attributes(file)["equations"] = get_name(equations) attributes(file)["polydeg"] = polydeg(dg) attributes(file)["n_vars"] = n_vars - attributes(file)["n_elements"] = nelementsglobal(dg, cache) + attributes(file)["n_elements"] = nelementsglobal(mesh, dg, cache) attributes(file)["mesh_type"] = get_name(mesh) attributes(file)["mesh_file"] = splitdir(mesh.current_filename)[2] attributes(file)["time"] = convert(Float64, time) # Ensure that `time` is written as a double precision scalar diff --git a/src/equations/compressible_euler_quasi_1d.jl b/src/equations/compressible_euler_quasi_1d.jl index 6844bf9bee5..9c7e3a7269b 100644 --- a/src/equations/compressible_euler_quasi_1d.jl +++ b/src/equations/compressible_euler_quasi_1d.jl @@ -314,10 +314,8 @@ end # 1D compressible Euler equations scaled by the channel width `a`. @inline function entropy(u, equations::CompressibleEulerEquationsQuasi1D) a_rho, a_rho_v1, a_e, a = u - q = a * entropy(SVector(a_rho, a_rho_v1, a_e) / a, - CompressibleEulerEquations1D(equations.gamma)) - - return SVector(q[1], q[2], q[3], a) + return a * entropy(SVector(a_rho, a_rho_v1, a_e) / a, + CompressibleEulerEquations1D(equations.gamma)) end # Convert conservative variables to entropy. The entropy variables for the diff --git a/src/meshes/p4est_mesh.jl b/src/meshes/p4est_mesh.jl index 6bb98196231..2046220aeca 100644 --- a/src/meshes/p4est_mesh.jl +++ b/src/meshes/p4est_mesh.jl @@ -94,6 +94,7 @@ end end # returns Int32 by default which causes a weird method error when creating the cache @inline ncells(mesh::P4estMesh) = Int(mesh.p4est.local_num_quadrants[]) +@inline ncellsglobal(mesh::P4estMesh) = Int(mesh.p4est.global_num_quadrants[]) function Base.show(io::IO, mesh::P4estMesh) print(io, "P4estMesh{", ndims(mesh), ", ", real(mesh), "}") @@ -105,7 +106,7 @@ function Base.show(io::IO, ::MIME"text/plain", mesh::P4estMesh) else setup = [ "#trees" => ntrees(mesh), - "current #cells" => ncells(mesh), + "current #cells" => ncellsglobal(mesh), "polydeg" => length(mesh.nodes) - 1, ] summary_box(io, diff --git a/src/meshes/t8code_mesh.jl b/src/meshes/t8code_mesh.jl index 0af4c6ae023..10a042be3ba 100644 --- a/src/meshes/t8code_mesh.jl +++ b/src/meshes/t8code_mesh.jl @@ -80,6 +80,7 @@ const ParallelT8codeMesh{NDIMS} = T8codeMesh{NDIMS, <:Real, <:True} @inline ntrees(mesh::T8codeMesh) = size(mesh.tree_node_coordinates)[end] @inline ncells(mesh::T8codeMesh) = Int(t8_forest_get_local_num_elements(mesh.forest)) +@inline ncellsglobal(mesh::T8codeMesh) = Int(t8_forest_get_global_num_elements(mesh.forest)) function Base.show(io::IO, mesh::T8codeMesh) print(io, "T8codeMesh{", ndims(mesh), ", ", real(mesh), "}") @@ -91,7 +92,7 @@ function Base.show(io::IO, ::MIME"text/plain", mesh::T8codeMesh) else setup = [ "#trees" => ntrees(mesh), - "current #cells" => ncells(mesh), + "current #cells" => ncellsglobal(mesh), "polydeg" => length(mesh.nodes) - 1, ] summary_box(io, diff --git a/src/semidiscretization/semidiscretization.jl b/src/semidiscretization/semidiscretization.jl index 8518cf27fd3..c6b82d5f37b 100644 --- a/src/semidiscretization/semidiscretization.jl +++ b/src/semidiscretization/semidiscretization.jl @@ -15,6 +15,19 @@ Return the number of degrees of freedom associated with each scalar variable. ndofs(mesh, solver, cache) end +""" + ndofsglobal(semi::AbstractSemidiscretization) + +Return the global number of degrees of freedom associated with each scalar variable across all MPI ranks. +This is the same as [`ndofs`](@ref) for simulations running in serial or +parallelized via threads. It will in general be different for simulations +running in parallel with MPI. +""" +@inline function ndofsglobal(semi::AbstractSemidiscretization) + mesh, _, solver, cache = mesh_equations_solver_cache(semi) + ndofsglobal(mesh, solver, cache) +end + """ integrate_via_indices(func, u_ode, semi::AbstractSemidiscretization, args...; normalize=true) @@ -397,6 +410,7 @@ end # TODO: Taal, document interface? # New mesh/solver combinations have to implement # - ndofs(mesh, solver, cache) +# - ndofsgloabal(mesh, solver, cache) # - ndims(mesh) # - nnodes(solver) # - real(solver) diff --git a/src/semidiscretization/semidiscretization_coupled.jl b/src/semidiscretization/semidiscretization_coupled.jl index cc629d1a674..c5c21584dca 100644 --- a/src/semidiscretization/semidiscretization_coupled.jl +++ b/src/semidiscretization/semidiscretization_coupled.jl @@ -81,7 +81,7 @@ function Base.show(io::IO, ::MIME"text/plain", semi::SemidiscretizationCoupled) semi.semis[i].source_terms) summary_line(increment_indent(io), "solver", solver |> typeof |> nameof) end - summary_line(io, "total #DOFs per field", ndofs(semi)) + summary_line(io, "total #DOFs per field", ndofsglobal(semi)) summary_footer(io) end end @@ -123,6 +123,18 @@ end sum(ndofs, semi.semis) end +""" + ndofsglobal(semi::SemidiscretizationCoupled) + +Return the global number of degrees of freedom associated with each scalar variable across all MPI ranks, and summed up over all coupled systems. +This is the same as [`ndofs`](@ref) for simulations running in serial or +parallelized via threads. It will in general be different for simulations +running in parallel with MPI. +""" +@inline function ndofsglobal(semi::SemidiscretizationCoupled) + sum(ndofsglobal, semi.semis) +end + function compute_coefficients(t, semi::SemidiscretizationCoupled) @unpack u_indices = semi diff --git a/src/semidiscretization/semidiscretization_hyperbolic.jl b/src/semidiscretization/semidiscretization_hyperbolic.jl index dcd211671c8..7c82a132a0b 100644 --- a/src/semidiscretization/semidiscretization_hyperbolic.jl +++ b/src/semidiscretization/semidiscretization_hyperbolic.jl @@ -315,7 +315,7 @@ function Base.show(io::IO, ::MIME"text/plain", semi::SemidiscretizationHyperboli summary_line(io, "source terms", semi.source_terms) summary_line(io, "solver", semi.solver |> typeof |> nameof) - summary_line(io, "total #DOFs per field", ndofs(semi)) + summary_line(io, "total #DOFs per field", ndofsglobal(semi)) summary_footer(io) end end diff --git a/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl b/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl index 57724374acb..16f8da21c1e 100644 --- a/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl +++ b/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl @@ -229,7 +229,7 @@ function Base.show(io::IO, ::MIME"text/plain", summary_line(io, "source terms", semi.source_terms) summary_line(io, "solver", semi.solver |> typeof |> nameof) summary_line(io, "parabolic solver", semi.solver_parabolic |> typeof |> nameof) - summary_line(io, "total #DOFs per field", ndofs(semi)) + summary_line(io, "total #DOFs per field", ndofsglobal(semi)) summary_footer(io) end end diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl index 0ab947e697a..fb4c8f182e0 100644 --- a/src/solvers/dg.jl +++ b/src/solvers/dg.jl @@ -459,7 +459,7 @@ In particular, not the nodes themselves are returned. # `mesh` for some combinations of mesh/solver. @inline nelements(mesh, dg::DG, cache) = nelements(dg, cache) @inline function ndofsglobal(mesh, dg::DG, cache) - nelementsglobal(dg, cache) * nnodes(dg)^ndims(mesh) + nelementsglobal(mesh, dg, cache) * nnodes(dg)^ndims(mesh) end """ @@ -517,7 +517,7 @@ In particular, not the mortars themselves are returned. @inline eachmpimortar(dg::DG, cache) = Base.OneTo(nmpimortars(dg, cache)) @inline nelements(dg::DG, cache) = nelements(cache.elements) -@inline function nelementsglobal(dg::DG, cache) +@inline function nelementsglobal(mesh, dg::DG, cache) mpi_isparallel() ? cache.mpi_cache.n_elements_global : nelements(dg, cache) end @inline ninterfaces(dg::DG, cache) = ninterfaces(cache.interfaces) diff --git a/test/test_tree_1d_euler.jl b/test/test_tree_1d_euler.jl index 784d123128e..dc523586f89 100644 --- a/test/test_tree_1d_euler.jl +++ b/test/test_tree_1d_euler.jl @@ -409,6 +409,14 @@ end end end +@trixi_testset "test_quasi_1D_entropy" begin + a = 0.9 + u_1D = SVector(1.1, 0.2, 2.1) + u_quasi_1D = SVector(a * 1.1, a * 0.2, a * 2.1, a) + @test entropy(u_quasi_1D, CompressibleEulerEquationsQuasi1D(1.4)) ≈ + a * entropy(u_1D, CompressibleEulerEquations1D(1.4)) +end + @trixi_testset "elixir_euler_quasi_1d_source_terms.jl" begin @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_quasi_1d_source_terms.jl"), l2=[ @@ -423,6 +431,7 @@ end 1.821888865105592e-6, 1.1166012159335992e-7, ]) + # Ensure that we do not have excessive memory allocations # (e.g., from type instabilities) let