diff --git a/.github/workflows/SpellCheck.yml b/.github/workflows/SpellCheck.yml index a780e975155..b242b6e811e 100644 --- a/.github/workflows/SpellCheck.yml +++ b/.github/workflows/SpellCheck.yml @@ -10,4 +10,4 @@ jobs: - name: Checkout Actions Repository uses: actions/checkout@v4 - name: Check spelling - uses: crate-ci/typos@v1.16.26 + uses: crate-ci/typos@v1.18.0 diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 6aa4809c1c2..4531c3aee0a 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -26,7 +26,7 @@ jobs: version: ${{ matrix.version }} arch: ${{ matrix.arch }} show-versioninfo: true - - uses: actions/cache@v3 + - uses: actions/cache@v4 env: cache-name: cache-artifacts with: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f287cc5feb2..2e388366fc8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -129,7 +129,7 @@ jobs: - uses: julia-actions/julia-processcoverage@v1 with: directories: src,examples,ext - - uses: codecov/codecov-action@v3 + - uses: codecov/codecov-action@v4 with: file: ./lcov.info flags: unittests diff --git a/docs/literate/src/files/index.jl b/docs/literate/src/files/index.jl index e259d25fb2f..26637e5b24b 100644 --- a/docs/literate/src/files/index.jl +++ b/docs/literate/src/files/index.jl @@ -108,20 +108,26 @@ # software in the Trixi.jl ecosystem, and then run a simulation using Trixi.jl on said mesh. # In the end, the tutorial briefly explains how to simulate an example using AMR via `P4estMesh`. -# ### [15 Explicit time stepping](@ref time_stepping) +# ### [15 P4est mesh from gmsh](@ref p4est_from_gmsh) +#- +# This tutorial describes how to obtain a [`P4estMesh`](@ref) from an existing mesh generated +# by [`gmsh`](https://gmsh.info/) or any other meshing software that can export to the Abaqus +# input `.inp` format. The tutorial demonstrates how edges/faces can be associated with boundary conditions based on the physical nodesets. + +# ### [16 Explicit time stepping](@ref time_stepping) #- # This tutorial is about time integration using [OrdinaryDiffEq.jl](https://github.com/SciML/OrdinaryDiffEq.jl). # It explains how to use their algorithms and presents two types of time step choices - with error-based # and CFL-based adaptive step size control. -# ### [16 Differentiable programming](@ref differentiable_programming) +# ### [17 Differentiable programming](@ref differentiable_programming) #- # This part deals with some basic differentiable programming topics. For example, a Jacobian, its # eigenvalues and a curve of total energy (through the simulation) are calculated and plotted for # a few semidiscretizations. Moreover, we calculate an example for propagating errors with Measurement.jl # at the end. -# ### [17 Custom semidiscretization](@ref custom_semidiscretization) +# ### [18 Custom semidiscretization](@ref custom_semidiscretization) #- # This tutorial describes the [semidiscretiations](@ref overview-semidiscretizations) of Trixi.jl # and explains how to extend them for custom tasks. diff --git a/docs/src/performance.md b/docs/src/performance.md index df66f451b79..82d7f501f63 100644 --- a/docs/src/performance.md +++ b/docs/src/performance.md @@ -267,3 +267,14 @@ requires. It can thus be seen as a proxy for "energy used" and, as an extension, timing result, you need to set the analysis interval such that the `AnalysisCallback` is invoked at least once during the course of the simulation and discard the first PID value. + +## Performance issues with multi-threaded reductions +[False sharing](https://en.wikipedia.org/wiki/False_sharing) is a known performance issue +for systems with distributed caches. It also occurred for the implementation of a thread +parallel bounds checking routine for the subcell IDP limiting +in [PR #1736](https://github.com/trixi-framework/Trixi.jl/pull/1736). +After some [testing and discussion](https://github.com/trixi-framework/Trixi.jl/pull/1736#discussion_r1423881895), +it turned out that initializing a vector of length `n * Threads.nthreads()` and only using every +n-th entry instead of a vector of length `Threads.nthreads()` fixes the problem. +Since there are no processors with caches over 128B, we use `n = 128B / size(uEltype)`. +Now, the bounds checking routine of the IDP limiting scales as hoped. diff --git a/src/meshes/t8code_mesh.jl b/src/meshes/t8code_mesh.jl index 6fb4d861d10..cb2ac787e14 100644 --- a/src/meshes/t8code_mesh.jl +++ b/src/meshes/t8code_mesh.jl @@ -1,7 +1,7 @@ """ T8codeMesh{NDIMS} <: AbstractMesh{NDIMS} -An unstructured curved mesh based on trees that uses the C library +An unstructured curved mesh based on trees that uses the C library ['t8code'](https://github.com/DLR-AMR/t8code) to manage trees and mesh refinement. """ @@ -485,7 +485,7 @@ end # form a family and we decide whether this family should be coarsened # or only the first element should be refined. # Otherwise `is_family` must equal zero and we consider the first entry -# of the element array for refinement. +# of the element array for refinement. # Entries of the element array beyond the first `num_elements` are undefined. # \param [in] forest the forest to which the new elements belong # \param [in] forest_from the forest that is adapted. @@ -542,8 +542,8 @@ Adapt a `T8codeMesh` according to a user-defined `adapt_callback`. 0 : Stay unchanged. 1 : Refine element. -- `kwargs`: - - `recursive = true`: Adapt the forest recursively. If true the caller must ensure that the callback +- `kwargs`: + - `recursive = true`: Adapt the forest recursively. If true the caller must ensure that the callback returns 0 for every analyzed element at some point to stop the recursion. - `balance = true`: Make sure the adapted forest is 2^(NDIMS-1):1 balanced. - `partition = true`: Partition the forest to redistribute elements evenly among MPI ranks. @@ -695,7 +695,7 @@ function count_interfaces(mesh::T8codeMesh) for iface in 0:(num_faces - 1) pelement_indices_ref = Ref{Ptr{t8_locidx_t}}() - pneighbor_leafs_ref = Ref{Ptr{Ptr{t8_element}}}() + pneighbor_leaves_ref = Ref{Ptr{Ptr{t8_element}}}() pneigh_scheme_ref = Ref{Ptr{t8_eclass_scheme}}() dual_faces_ref = Ref{Ptr{Cint}}() @@ -704,7 +704,7 @@ function count_interfaces(mesh::T8codeMesh) forest_is_balanced = Cint(1) t8_forest_leaf_face_neighbors(mesh.forest, itree, element, - pneighbor_leafs_ref, iface, dual_faces_ref, + pneighbor_leaves_ref, iface, dual_faces_ref, num_neighbors_ref, pelement_indices_ref, pneigh_scheme_ref, forest_is_balanced) @@ -713,13 +713,13 @@ function count_interfaces(mesh::T8codeMesh) dual_faces = unsafe_wrap(Array, dual_faces_ref[], num_neighbors) neighbor_ielements = unsafe_wrap(Array, pelement_indices_ref[], num_neighbors) - neighbor_leafs = unsafe_wrap(Array, pneighbor_leafs_ref[], num_neighbors) + neighbor_leaves = unsafe_wrap(Array, pneighbor_leaves_ref[], num_neighbors) neighbor_scheme = pneigh_scheme_ref[] if num_neighbors == 0 local_num_boundary += 1 else - neighbor_level = t8_element_level(neighbor_scheme, neighbor_leafs[1]) + neighbor_level = t8_element_level(neighbor_scheme, neighbor_leaves[1]) if all(neighbor_ielements .< num_local_elements) # Conforming interface: The second condition ensures we @@ -745,7 +745,7 @@ function count_interfaces(mesh::T8codeMesh) neighbor_linear_id = neighbor_global_ghost_itree * max_tree_num_elements + t8_element_get_linear_id(neighbor_scheme, - neighbor_leafs[1], + neighbor_leaves[1], max_level) global_mortar_id = 2 * ndims(mesh) * neighbor_linear_id + dual_faces[1] @@ -759,7 +759,7 @@ function count_interfaces(mesh::T8codeMesh) end t8_free(dual_faces_ref[]) - t8_free(pneighbor_leafs_ref[]) + t8_free(pneighbor_leaves_ref[]) t8_free(pelement_indices_ref[]) end # for @@ -875,7 +875,7 @@ function fill_mesh_info!(mesh::T8codeMesh, interfaces, mortars, boundaries, end pelement_indices_ref = Ref{Ptr{t8_locidx_t}}() - pneighbor_leafs_ref = Ref{Ptr{Ptr{t8_element}}}() + pneighbor_leaves_ref = Ref{Ptr{Ptr{t8_element}}}() pneigh_scheme_ref = Ref{Ptr{t8_eclass_scheme}}() dual_faces_ref = Ref{Ptr{Cint}}() @@ -885,7 +885,7 @@ function fill_mesh_info!(mesh::T8codeMesh, interfaces, mortars, boundaries, # Query neighbor information from t8code. t8_forest_leaf_face_neighbors(mesh.forest, itree, element, - pneighbor_leafs_ref, iface, dual_faces_ref, + pneighbor_leaves_ref, iface, dual_faces_ref, num_neighbors_ref, pelement_indices_ref, pneigh_scheme_ref, forest_is_balanced) @@ -894,7 +894,7 @@ function fill_mesh_info!(mesh::T8codeMesh, interfaces, mortars, boundaries, dual_faces = unsafe_wrap(Array, dual_faces_ref[], num_neighbors) neighbor_ielements = unsafe_wrap(Array, pelement_indices_ref[], num_neighbors) - neighbor_leafs = unsafe_wrap(Array, pneighbor_leafs_ref[], num_neighbors) + neighbor_leaves = unsafe_wrap(Array, pneighbor_leaves_ref[], num_neighbors) neighbor_scheme = pneigh_scheme_ref[] # Now we check for the different cases. The nested if-structure is as follows: @@ -913,7 +913,7 @@ function fill_mesh_info!(mesh::T8codeMesh, interfaces, mortars, boundaries, # else: // `local mortar from smaller elements point of view` # // We only count local mortars once. # - # else: // It must be either a MPI interface or a MPI mortar. + # else: // It must be either a MPI interface or a MPI mortar. # # if `MPI interface`: # @@ -938,7 +938,7 @@ function fill_mesh_info!(mesh::T8codeMesh, interfaces, mortars, boundaries, # Interface or mortar. else - neighbor_level = t8_element_level(neighbor_scheme, neighbor_leafs[1]) + neighbor_level = t8_element_level(neighbor_scheme, neighbor_leaves[1]) # Local interface or mortar. if all(neighbor_ielements .< num_local_elements) @@ -985,7 +985,7 @@ function fill_mesh_info!(mesh::T8codeMesh, interfaces, mortars, boundaries, neighbor_linear_id = neighbor_global_ghost_itree * max_tree_num_elements + t8_element_get_linear_id(neighbor_scheme, - neighbor_leafs[1], + neighbor_leaves[1], max_level) if current_linear_id < neighbor_linear_id @@ -1029,7 +1029,7 @@ function fill_mesh_info!(mesh::T8codeMesh, interfaces, mortars, boundaries, num_local_elements) local_neighbor_ids = [neighbor_ids[i] for i in local_neighbor_positions] - local_neighbor_positions = [map_iface_to_ichild_to_position[dual_faces[1] + 1][t8_element_child_id(neighbor_scheme, neighbor_leafs[i]) + 1] + local_neighbor_positions = [map_iface_to_ichild_to_position[dual_faces[1] + 1][t8_element_child_id(neighbor_scheme, neighbor_leaves[i]) + 1] for i in local_neighbor_positions] # Last entry is the large element. @@ -1059,7 +1059,7 @@ function fill_mesh_info!(mesh::T8codeMesh, interfaces, mortars, boundaries, neighbor_linear_id = neighbor_global_ghost_itree * max_tree_num_elements + t8_element_get_linear_id(neighbor_scheme, - neighbor_leafs[1], + neighbor_leaves[1], max_level) global_mortar_id = 2 * ndims(mesh) * neighbor_linear_id + dual_faces[1] @@ -1100,7 +1100,7 @@ function fill_mesh_info!(mesh::T8codeMesh, interfaces, mortars, boundaries, end t8_free(dual_faces_ref[]) - t8_free(pneighbor_leafs_ref[]) + t8_free(pneighbor_leaves_ref[]) t8_free(pelement_indices_ref[]) end # for iface diff --git a/test/test_parabolic_2d.jl b/test/test_parabolic_2d.jl index f7185a1a904..9f1382caa62 100644 --- a/test/test_parabolic_2d.jl +++ b/test/test_parabolic_2d.jl @@ -218,9 +218,9 @@ end "elixir_advection_diffusion.jl"), tspan=(0.0, 0.0)) LLID = Trixi.local_leaf_cells(mesh.tree) - num_leafs = length(LLID) - @assert num_leafs % 8 == 0 - Trixi.refine!(mesh.tree, LLID[1:Int(num_leafs / 8)]) + num_leaves = length(LLID) + @assert num_leaves % 8 == 0 + Trixi.refine!(mesh.tree, LLID[1:Int(num_leaves / 8)]) tspan = (0.0, 1.5) semi = SemidiscretizationHyperbolicParabolic(mesh, (equations, equations_parabolic), @@ -414,9 +414,9 @@ end "elixir_navierstokes_convergence.jl"), tspan=(0.0, 0.0), initial_refinement_level=3) LLID = Trixi.local_leaf_cells(mesh.tree) - num_leafs = length(LLID) - @assert num_leafs % 4 == 0 - Trixi.refine!(mesh.tree, LLID[1:Int(num_leafs / 4)]) + num_leaves = length(LLID) + @assert num_leaves % 4 == 0 + Trixi.refine!(mesh.tree, LLID[1:Int(num_leaves / 4)]) tspan = (0.0, 0.5) semi = SemidiscretizationHyperbolicParabolic(mesh, (equations, equations_parabolic), initial_condition, solver; diff --git a/test/test_parabolic_3d.jl b/test/test_parabolic_3d.jl index 6fbfb8259d4..1eaa9f51a56 100644 --- a/test/test_parabolic_3d.jl +++ b/test/test_parabolic_3d.jl @@ -252,9 +252,9 @@ end "elixir_navierstokes_convergence.jl"), tspan=(0.0, 0.0)) LLID = Trixi.local_leaf_cells(mesh.tree) - num_leafs = length(LLID) - @assert num_leafs % 16 == 0 - Trixi.refine!(mesh.tree, LLID[1:Int(num_leafs / 16)]) + num_leaves = length(LLID) + @assert num_leaves % 16 == 0 + Trixi.refine!(mesh.tree, LLID[1:Int(num_leaves / 16)]) tspan = (0.0, 0.25) semi = SemidiscretizationHyperbolicParabolic(mesh, (equations, equations_parabolic), initial_condition, solver; @@ -325,9 +325,9 @@ end "elixir_navierstokes_taylor_green_vortex.jl"), tspan=(0.0, 0.0)) LLID = Trixi.local_leaf_cells(mesh.tree) - num_leafs = length(LLID) - @assert num_leafs % 32 == 0 - Trixi.refine!(mesh.tree, LLID[1:Int(num_leafs / 32)]) + num_leaves = length(LLID) + @assert num_leaves % 32 == 0 + Trixi.refine!(mesh.tree, LLID[1:Int(num_leaves / 32)]) tspan = (0.0, 0.1) semi = SemidiscretizationHyperbolicParabolic(mesh, (equations, equations_parabolic), initial_condition, solver) @@ -429,8 +429,8 @@ end "elixir_advection_diffusion_amr.jl"), l2=[0.000355780485397024], linf=[0.0010810770271614256]) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) let t = sol.t[end] u_ode = sol.u[end] @@ -444,8 +444,8 @@ end "elixir_advection_diffusion_nonperiodic.jl"), l2=[0.0009808996243280868], linf=[0.01732621559135459]) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) let t = sol.t[end] u_ode = sol.u[end] @@ -472,8 +472,8 @@ end 0.12129218723807476, 0.8433893297612087, ]) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) let t = sol.t[end] u_ode = sol.u[end] @@ -495,8 +495,8 @@ end 0.6782397526873181, 0.17663702154066238, 0.17663702154066266, 0.17663702154066238, 1.7327849844825238, ]) - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) let t = sol.t[end] u_ode = sol.u[end]