diff --git a/.github/workflows/SpellCheck.yml b/.github/workflows/SpellCheck.yml
index a780e975155..b242b6e811e 100644
--- a/.github/workflows/SpellCheck.yml
+++ b/.github/workflows/SpellCheck.yml
@@ -10,4 +10,4 @@ jobs:
       - name: Checkout Actions Repository
         uses: actions/checkout@v4
       - name: Check spelling
-        uses: crate-ci/typos@v1.16.26
+        uses: crate-ci/typos@v1.18.0
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 6aa4809c1c2..4531c3aee0a 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -26,7 +26,7 @@ jobs:
           version: ${{ matrix.version }}
           arch: ${{ matrix.arch }}
           show-versioninfo: true
-      - uses: actions/cache@v3
+      - uses: actions/cache@v4
         env:
           cache-name: cache-artifacts
         with:
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index f287cc5feb2..2e388366fc8 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -129,7 +129,7 @@ jobs:
       - uses: julia-actions/julia-processcoverage@v1
         with:
           directories: src,examples,ext
-      - uses: codecov/codecov-action@v3
+      - uses: codecov/codecov-action@v4
         with:
           file: ./lcov.info
           flags: unittests
diff --git a/docs/literate/src/files/index.jl b/docs/literate/src/files/index.jl
index e259d25fb2f..26637e5b24b 100644
--- a/docs/literate/src/files/index.jl
+++ b/docs/literate/src/files/index.jl
@@ -108,20 +108,26 @@
 # software in the Trixi.jl ecosystem, and then run a simulation using Trixi.jl on said mesh.
 # In the end, the tutorial briefly explains how to simulate an example using AMR via `P4estMesh`.
 
-# ### [15 Explicit time stepping](@ref time_stepping)
+# ### [15 P4est mesh from gmsh](@ref p4est_from_gmsh)
+#-
+# This tutorial describes how to obtain a [`P4estMesh`](@ref) from an existing mesh generated
+# by [`gmsh`](https://gmsh.info/) or any other meshing software that can export to the Abaqus
+# input `.inp` format. The tutorial demonstrates how edges/faces can be associated with boundary conditions based on the physical nodesets.
+
+# ### [16 Explicit time stepping](@ref time_stepping)
 #-
 # This tutorial is about time integration using [OrdinaryDiffEq.jl](https://github.com/SciML/OrdinaryDiffEq.jl).
 # It explains how to use their algorithms and presents two types of time step choices - with error-based
 # and CFL-based adaptive step size control.
 
-# ### [16 Differentiable programming](@ref differentiable_programming)
+# ### [17 Differentiable programming](@ref differentiable_programming)
 #-
 # This part deals with some basic differentiable programming topics. For example, a Jacobian, its
 # eigenvalues and a curve of total energy (through the simulation) are calculated and plotted for
 # a few semidiscretizations. Moreover, we calculate an example for propagating errors with Measurement.jl
 # at the end.
 
-# ### [17 Custom semidiscretization](@ref custom_semidiscretization)
+# ### [18 Custom semidiscretization](@ref custom_semidiscretization)
 #-
 # This tutorial describes the [semidiscretiations](@ref overview-semidiscretizations) of Trixi.jl
 # and explains how to extend them for custom tasks.
diff --git a/docs/src/performance.md b/docs/src/performance.md
index df66f451b79..82d7f501f63 100644
--- a/docs/src/performance.md
+++ b/docs/src/performance.md
@@ -267,3 +267,14 @@ requires. It can thus be seen as a proxy for "energy used" and, as an extension,
     timing result, you need to set the analysis interval such that the
     `AnalysisCallback` is invoked at least once during the course of the simulation and
     discard the first PID value.
+
+## Performance issues with multi-threaded reductions
+[False sharing](https://en.wikipedia.org/wiki/False_sharing) is a known performance issue
+for systems with distributed caches. It also occurred for the implementation of a thread
+parallel bounds checking routine for the subcell IDP limiting
+in [PR #1736](https://github.com/trixi-framework/Trixi.jl/pull/1736).
+After some [testing and discussion](https://github.com/trixi-framework/Trixi.jl/pull/1736#discussion_r1423881895),
+it turned out that initializing a vector of length `n * Threads.nthreads()` and only using every
+n-th entry instead of a vector of length `Threads.nthreads()` fixes the problem.
+Since there are no processors with caches over 128B, we use `n = 128B / size(uEltype)`.
+Now, the bounds checking routine of the IDP limiting scales as hoped.
diff --git a/src/meshes/t8code_mesh.jl b/src/meshes/t8code_mesh.jl
index 6fb4d861d10..cb2ac787e14 100644
--- a/src/meshes/t8code_mesh.jl
+++ b/src/meshes/t8code_mesh.jl
@@ -1,7 +1,7 @@
 """
     T8codeMesh{NDIMS} <: AbstractMesh{NDIMS}
 
-An unstructured curved mesh based on trees that uses the C library 
+An unstructured curved mesh based on trees that uses the C library
 ['t8code'](https://github.com/DLR-AMR/t8code)
 to manage trees and mesh refinement.
 """
@@ -485,7 +485,7 @@ end
 # form a family and we decide whether this family should be coarsened
 # or only the first element should be refined.
 # Otherwise `is_family` must equal zero and we consider the first entry
-# of the element array for refinement. 
+# of the element array for refinement.
 # Entries of the element array beyond the first `num_elements` are undefined.
 # \param [in] forest       the forest to which the new elements belong
 # \param [in] forest_from  the forest that is adapted.
@@ -542,8 +542,8 @@ Adapt a `T8codeMesh` according to a user-defined `adapt_callback`.
            0 : Stay unchanged.
            1 : Refine element.
 
-- `kwargs`: 
-    - `recursive = true`: Adapt the forest recursively. If true the caller must ensure that the callback 
+- `kwargs`:
+    - `recursive = true`: Adapt the forest recursively. If true the caller must ensure that the callback
                           returns 0 for every analyzed element at some point to stop the recursion.
     - `balance = true`: Make sure the adapted forest is 2^(NDIMS-1):1 balanced.
     - `partition = true`: Partition the forest to redistribute elements evenly among MPI ranks.
@@ -695,7 +695,7 @@ function count_interfaces(mesh::T8codeMesh)
 
             for iface in 0:(num_faces - 1)
                 pelement_indices_ref = Ref{Ptr{t8_locidx_t}}()
-                pneighbor_leafs_ref = Ref{Ptr{Ptr{t8_element}}}()
+                pneighbor_leaves_ref = Ref{Ptr{Ptr{t8_element}}}()
                 pneigh_scheme_ref = Ref{Ptr{t8_eclass_scheme}}()
 
                 dual_faces_ref = Ref{Ptr{Cint}}()
@@ -704,7 +704,7 @@ function count_interfaces(mesh::T8codeMesh)
                 forest_is_balanced = Cint(1)
 
                 t8_forest_leaf_face_neighbors(mesh.forest, itree, element,
-                                              pneighbor_leafs_ref, iface, dual_faces_ref,
+                                              pneighbor_leaves_ref, iface, dual_faces_ref,
                                               num_neighbors_ref,
                                               pelement_indices_ref, pneigh_scheme_ref,
                                               forest_is_balanced)
@@ -713,13 +713,13 @@ function count_interfaces(mesh::T8codeMesh)
                 dual_faces = unsafe_wrap(Array, dual_faces_ref[], num_neighbors)
                 neighbor_ielements = unsafe_wrap(Array, pelement_indices_ref[],
                                                  num_neighbors)
-                neighbor_leafs = unsafe_wrap(Array, pneighbor_leafs_ref[], num_neighbors)
+                neighbor_leaves = unsafe_wrap(Array, pneighbor_leaves_ref[], num_neighbors)
                 neighbor_scheme = pneigh_scheme_ref[]
 
                 if num_neighbors == 0
                     local_num_boundary += 1
                 else
-                    neighbor_level = t8_element_level(neighbor_scheme, neighbor_leafs[1])
+                    neighbor_level = t8_element_level(neighbor_scheme, neighbor_leaves[1])
 
                     if all(neighbor_ielements .< num_local_elements)
                         # Conforming interface: The second condition ensures we
@@ -745,7 +745,7 @@ function count_interfaces(mesh::T8codeMesh)
                             neighbor_linear_id = neighbor_global_ghost_itree *
                                                  max_tree_num_elements +
                                                  t8_element_get_linear_id(neighbor_scheme,
-                                                                          neighbor_leafs[1],
+                                                                          neighbor_leaves[1],
                                                                           max_level)
                             global_mortar_id = 2 * ndims(mesh) * neighbor_linear_id +
                                                dual_faces[1]
@@ -759,7 +759,7 @@ function count_interfaces(mesh::T8codeMesh)
                 end
 
                 t8_free(dual_faces_ref[])
-                t8_free(pneighbor_leafs_ref[])
+                t8_free(pneighbor_leaves_ref[])
                 t8_free(pelement_indices_ref[])
             end # for
 
@@ -875,7 +875,7 @@ function fill_mesh_info!(mesh::T8codeMesh, interfaces, mortars, boundaries,
                 end
 
                 pelement_indices_ref = Ref{Ptr{t8_locidx_t}}()
-                pneighbor_leafs_ref = Ref{Ptr{Ptr{t8_element}}}()
+                pneighbor_leaves_ref = Ref{Ptr{Ptr{t8_element}}}()
                 pneigh_scheme_ref = Ref{Ptr{t8_eclass_scheme}}()
 
                 dual_faces_ref = Ref{Ptr{Cint}}()
@@ -885,7 +885,7 @@ function fill_mesh_info!(mesh::T8codeMesh, interfaces, mortars, boundaries,
 
                 # Query neighbor information from t8code.
                 t8_forest_leaf_face_neighbors(mesh.forest, itree, element,
-                                              pneighbor_leafs_ref, iface, dual_faces_ref,
+                                              pneighbor_leaves_ref, iface, dual_faces_ref,
                                               num_neighbors_ref,
                                               pelement_indices_ref, pneigh_scheme_ref,
                                               forest_is_balanced)
@@ -894,7 +894,7 @@ function fill_mesh_info!(mesh::T8codeMesh, interfaces, mortars, boundaries,
                 dual_faces = unsafe_wrap(Array, dual_faces_ref[], num_neighbors)
                 neighbor_ielements = unsafe_wrap(Array, pelement_indices_ref[],
                                                  num_neighbors)
-                neighbor_leafs = unsafe_wrap(Array, pneighbor_leafs_ref[], num_neighbors)
+                neighbor_leaves = unsafe_wrap(Array, pneighbor_leaves_ref[], num_neighbors)
                 neighbor_scheme = pneigh_scheme_ref[]
 
                 # Now we check for the different cases. The nested if-structure is as follows:
@@ -913,7 +913,7 @@ function fill_mesh_info!(mesh::T8codeMesh, interfaces, mortars, boundaries,
                 #       else: // `local mortar from smaller elements point of view`
                 #         <skip> // We only count local mortars once.
                 #
-                #     else: // It must be either a MPI interface or a MPI mortar.     
+                #     else: // It must be either a MPI interface or a MPI mortar.
                 #
                 #       if `MPI interface`:
                 #         <fill MPI interface info>
@@ -938,7 +938,7 @@ function fill_mesh_info!(mesh::T8codeMesh, interfaces, mortars, boundaries,
 
                     # Interface or mortar.
                 else
-                    neighbor_level = t8_element_level(neighbor_scheme, neighbor_leafs[1])
+                    neighbor_level = t8_element_level(neighbor_scheme, neighbor_leaves[1])
 
                     # Local interface or mortar.
                     if all(neighbor_ielements .< num_local_elements)
@@ -985,7 +985,7 @@ function fill_mesh_info!(mesh::T8codeMesh, interfaces, mortars, boundaries,
                             neighbor_linear_id = neighbor_global_ghost_itree *
                                                  max_tree_num_elements +
                                                  t8_element_get_linear_id(neighbor_scheme,
-                                                                          neighbor_leafs[1],
+                                                                          neighbor_leaves[1],
                                                                           max_level)
 
                             if current_linear_id < neighbor_linear_id
@@ -1029,7 +1029,7 @@ function fill_mesh_info!(mesh::T8codeMesh, interfaces, mortars, boundaries,
                                                                num_local_elements)
                             local_neighbor_ids = [neighbor_ids[i]
                                                   for i in local_neighbor_positions]
-                            local_neighbor_positions = [map_iface_to_ichild_to_position[dual_faces[1] + 1][t8_element_child_id(neighbor_scheme, neighbor_leafs[i]) + 1]
+                            local_neighbor_positions = [map_iface_to_ichild_to_position[dual_faces[1] + 1][t8_element_child_id(neighbor_scheme, neighbor_leaves[i]) + 1]
                                                         for i in local_neighbor_positions]
 
                             # Last entry is the large element.
@@ -1059,7 +1059,7 @@ function fill_mesh_info!(mesh::T8codeMesh, interfaces, mortars, boundaries,
                             neighbor_linear_id = neighbor_global_ghost_itree *
                                                  max_tree_num_elements +
                                                  t8_element_get_linear_id(neighbor_scheme,
-                                                                          neighbor_leafs[1],
+                                                                          neighbor_leaves[1],
                                                                           max_level)
                             global_mortar_id = 2 * ndims(mesh) * neighbor_linear_id +
                                                dual_faces[1]
@@ -1100,7 +1100,7 @@ function fill_mesh_info!(mesh::T8codeMesh, interfaces, mortars, boundaries,
                 end
 
                 t8_free(dual_faces_ref[])
-                t8_free(pneighbor_leafs_ref[])
+                t8_free(pneighbor_leaves_ref[])
                 t8_free(pelement_indices_ref[])
             end # for iface
 
diff --git a/test/test_parabolic_2d.jl b/test/test_parabolic_2d.jl
index f7185a1a904..9f1382caa62 100644
--- a/test/test_parabolic_2d.jl
+++ b/test/test_parabolic_2d.jl
@@ -218,9 +218,9 @@ end
                                  "elixir_advection_diffusion.jl"),
                         tspan=(0.0, 0.0))
     LLID = Trixi.local_leaf_cells(mesh.tree)
-    num_leafs = length(LLID)
-    @assert num_leafs % 8 == 0
-    Trixi.refine!(mesh.tree, LLID[1:Int(num_leafs / 8)])
+    num_leaves = length(LLID)
+    @assert num_leaves % 8 == 0
+    Trixi.refine!(mesh.tree, LLID[1:Int(num_leaves / 8)])
     tspan = (0.0, 1.5)
     semi = SemidiscretizationHyperbolicParabolic(mesh,
                                                  (equations, equations_parabolic),
@@ -414,9 +414,9 @@ end
                                  "elixir_navierstokes_convergence.jl"),
                         tspan=(0.0, 0.0), initial_refinement_level=3)
     LLID = Trixi.local_leaf_cells(mesh.tree)
-    num_leafs = length(LLID)
-    @assert num_leafs % 4 == 0
-    Trixi.refine!(mesh.tree, LLID[1:Int(num_leafs / 4)])
+    num_leaves = length(LLID)
+    @assert num_leaves % 4 == 0
+    Trixi.refine!(mesh.tree, LLID[1:Int(num_leaves / 4)])
     tspan = (0.0, 0.5)
     semi = SemidiscretizationHyperbolicParabolic(mesh, (equations, equations_parabolic),
                                                  initial_condition, solver;
diff --git a/test/test_parabolic_3d.jl b/test/test_parabolic_3d.jl
index 6fbfb8259d4..1eaa9f51a56 100644
--- a/test/test_parabolic_3d.jl
+++ b/test/test_parabolic_3d.jl
@@ -252,9 +252,9 @@ end
                                  "elixir_navierstokes_convergence.jl"),
                         tspan=(0.0, 0.0))
     LLID = Trixi.local_leaf_cells(mesh.tree)
-    num_leafs = length(LLID)
-    @assert num_leafs % 16 == 0
-    Trixi.refine!(mesh.tree, LLID[1:Int(num_leafs / 16)])
+    num_leaves = length(LLID)
+    @assert num_leaves % 16 == 0
+    Trixi.refine!(mesh.tree, LLID[1:Int(num_leaves / 16)])
     tspan = (0.0, 0.25)
     semi = SemidiscretizationHyperbolicParabolic(mesh, (equations, equations_parabolic),
                                                  initial_condition, solver;
@@ -325,9 +325,9 @@ end
                                  "elixir_navierstokes_taylor_green_vortex.jl"),
                         tspan=(0.0, 0.0))
     LLID = Trixi.local_leaf_cells(mesh.tree)
-    num_leafs = length(LLID)
-    @assert num_leafs % 32 == 0
-    Trixi.refine!(mesh.tree, LLID[1:Int(num_leafs / 32)])
+    num_leaves = length(LLID)
+    @assert num_leaves % 32 == 0
+    Trixi.refine!(mesh.tree, LLID[1:Int(num_leaves / 32)])
     tspan = (0.0, 0.1)
     semi = SemidiscretizationHyperbolicParabolic(mesh, (equations, equations_parabolic),
                                                  initial_condition, solver)
@@ -429,8 +429,8 @@ end
                                  "elixir_advection_diffusion_amr.jl"),
                         l2=[0.000355780485397024],
                         linf=[0.0010810770271614256])
-    # Ensure that we do not have excessive memory allocations 
-    # (e.g., from type instabilities) 
+    # Ensure that we do not have excessive memory allocations
+    # (e.g., from type instabilities)
     let
         t = sol.t[end]
         u_ode = sol.u[end]
@@ -444,8 +444,8 @@ end
                                  "elixir_advection_diffusion_nonperiodic.jl"),
                         l2=[0.0009808996243280868],
                         linf=[0.01732621559135459])
-    # Ensure that we do not have excessive memory allocations 
-    # (e.g., from type instabilities) 
+    # Ensure that we do not have excessive memory allocations
+    # (e.g., from type instabilities)
     let
         t = sol.t[end]
         u_ode = sol.u[end]
@@ -472,8 +472,8 @@ end
                             0.12129218723807476,
                             0.8433893297612087,
                         ])
-    # Ensure that we do not have excessive memory allocations 
-    # (e.g., from type instabilities) 
+    # Ensure that we do not have excessive memory allocations
+    # (e.g., from type instabilities)
     let
         t = sol.t[end]
         u_ode = sol.u[end]
@@ -495,8 +495,8 @@ end
                             0.6782397526873181, 0.17663702154066238,
                             0.17663702154066266, 0.17663702154066238, 1.7327849844825238,
                         ])
-    # Ensure that we do not have excessive memory allocations 
-    # (e.g., from type instabilities) 
+    # Ensure that we do not have excessive memory allocations
+    # (e.g., from type instabilities)
     let
         t = sol.t[end]
         u_ode = sol.u[end]