From 6069149fef0590c824ddee54b6d12d7531b6c790 Mon Sep 17 00:00:00 2001 From: ArseniyKholod <119304909+ArseniyKholod@users.noreply.github.com> Date: Fri, 15 Sep 2023 08:10:03 +0200 Subject: [PATCH] Handles restarting problem of adaptive time integration methods (#1565) * Create test.jl * Delete test.jl * loadcallback * adding_parallel_support * formatting * minimize dependencies * combine loadrestart and saverestart * fix * Update test_threaded.jl * fix * test fix * fix * MODULE add * fix * runtime macros * Update test_threaded.jl * handle MPI issues * enable PIDController test * Update test_mpi_tree.jl * fix * add asserts * Update save_restart.jl * add IController tests * enable HDF5 parallel * fix shot * fix shot 2 * fix shot 3 * fix shot 4 * fix shot 5 * fix shot 6 * fix shot 7 * fix shot 8 * fix shot 9 * fix shot 10 * fix shot 11 * fix shot 12 * fix shot 13 * fix shot 14 * fix shot 15 * fix shot 16 * fix shot 17 * fix shot 18 * enable additional configuration only in mpi test on linux * enable environment * test coverage issue * disable mpi macOs CI because of failure * disable new configurations to test coverage * disable PID and I test to test coverage issue * enable old coverage all * undo last commit and enable coverage on windows * enable new tests, mpi macOs and HDF5 parallel * fix * enable coverage on threads * test HDF5 parallel * test HDF5 parallel 2 * test HDF5 parallel 3 * fix * Update save_restart_dg.jl * test HDF5 parallel 4 * test HDF5 parallel 5 * Update configure_packages.jl * delete unnecessary changes * Update save_restart_dg.jl * Update save_restart_dg.jl * remove dependency on OrdinaryDiffEq * format * discard unrelated changes * delete barrier * delete eval() * comments & delete mpi_parallel * format * Update runtests.jl * Update runtests.jl * simplify tests * test failing MPI on windiws and macOs * test with RDPK3SpFSAL49 * test with RDPK3SpFSAL35 * change tests * fix and new test * Update test_tree_2d_euler.jl * fix and delete unnecessary test * add printing format * add docstrings * Update src/callbacks_step/save_restart.jl Co-authored-by: Michael Schlottke-Lakemper * fix * formatting * Update src/callbacks_step/save_restart_dg.jl Co-authored-by: Michael Schlottke-Lakemper * Update src/callbacks_step/save_restart_dg.jl Co-authored-by: Michael Schlottke-Lakemper * Update src/callbacks_step/save_restart.jl Co-authored-by: Michael Schlottke-Lakemper * Update src/callbacks_step/save_restart.jl Co-authored-by: Michael Schlottke-Lakemper * Update src/callbacks_step/save_restart.jl Co-authored-by: Michael Schlottke-Lakemper * suggested changes * new test * fix * fix * Update test_tree_2d_advection.jl * fix * fix error mpi on windows * rerun * Update src/callbacks_step/save_restart.jl Co-authored-by: Hendrik Ranocha * Add comments * format --------- Co-authored-by: Michael Schlottke-Lakemper Co-authored-by: Hendrik Ranocha --- .../elixir_advection_extended.jl | 7 ++-- .../tree_2d_dgsem/elixir_advection_restart.jl | 19 +++++---- src/Trixi.jl | 3 +- src/callbacks_step/save_restart.jl | 36 +++++++++++++++++ src/callbacks_step/save_restart_dg.jl | 24 ++++++++++++ test/test_mpi_tree.jl | 20 ++++++++-- test/test_threaded.jl | 39 ++++++++++++------- test/test_tree_2d_advection.jl | 20 ++++++++-- 8 files changed, 135 insertions(+), 33 deletions(-) diff --git a/examples/tree_2d_dgsem/elixir_advection_extended.jl b/examples/tree_2d_dgsem/elixir_advection_extended.jl index 8c837957ffd..278dc85386d 100644 --- a/examples/tree_2d_dgsem/elixir_advection_extended.jl +++ b/examples/tree_2d_dgsem/elixir_advection_extended.jl @@ -54,7 +54,7 @@ analysis_callback = AnalysisCallback(semi, interval=analysis_interval, alive_callback = AliveCallback(analysis_interval=analysis_interval) # The SaveRestartCallback allows to save a file from which a Trixi.jl simulation can be restarted -save_restart = SaveRestartCallback(interval=100, +save_restart = SaveRestartCallback(interval=40, save_final_restart=true) # The SaveSolutionCallback allows to save the solution to a file in regular intervals @@ -77,9 +77,10 @@ callbacks = CallbackSet(summary_callback, # run the simulation # OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks -sol = solve(ode, CarpenterKennedy2N54(williamson_condition=false), +alg = CarpenterKennedy2N54(williamson_condition=false) +sol = solve(ode, alg, dt=1.0, # solve needs some value here but it will be overwritten by the stepsize_callback - save_everystep=false, callback=callbacks); + save_everystep=false, callback=callbacks; ode_default_options()...); # Print the timer summary summary_callback() diff --git a/examples/tree_2d_dgsem/elixir_advection_restart.jl b/examples/tree_2d_dgsem/elixir_advection_restart.jl index 771ec5aefe7..b63a8d1f7bc 100644 --- a/examples/tree_2d_dgsem/elixir_advection_restart.jl +++ b/examples/tree_2d_dgsem/elixir_advection_restart.jl @@ -3,9 +3,10 @@ using OrdinaryDiffEq using Trixi ############################################################################### -# create a restart file - -trixi_include(@__MODULE__, joinpath(@__DIR__, "elixir_advection_extended.jl")) +# Define time integration algorithm +alg = CarpenterKennedy2N54(williamson_condition=false) +# Create a restart file +trixi_include(@__MODULE__, joinpath(@__DIR__, "elixir_advection_extended.jl"), alg = alg, tspan = (0.0, 10.0)) ############################################################################### @@ -14,22 +15,26 @@ trixi_include(@__MODULE__, joinpath(@__DIR__, "elixir_advection_extended.jl")) # Note: If you get a restart file from somewhere else, you need to provide # appropriate setups in the elixir loading a restart file -restart_filename = joinpath("out", "restart_000018.h5") +restart_filename = joinpath("out", "restart_000040.h5") mesh = load_mesh(restart_filename) semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) -tspan = (load_time(restart_filename), 2.0) +tspan = (load_time(restart_filename), 10.0) dt = load_dt(restart_filename) ode = semidiscretize(semi, tspan, restart_filename); # Do not overwrite the initial snapshot written by elixir_advection_extended.jl. save_solution.condition.save_initial_solution = false -alg = CarpenterKennedy2N54(williamson_condition=false) integrator = init(ode, alg, dt=dt, # solve needs some value here but it will be overwritten by the stepsize_callback - save_everystep=false, callback=callbacks) + save_everystep=false, callback=callbacks; ode_default_options()...) + +# Load saved context for adaptive time integrator +if integrator.opts.adaptive + load_adaptive_time_integrator!(integrator, restart_filename) +end # Get the last time index and work with that. load_timestep!(integrator, restart_filename) diff --git a/src/Trixi.jl b/src/Trixi.jl index c883c3bf19f..b65d03e7975 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -255,7 +255,8 @@ export SummaryCallback, SteadyStateCallback, AnalysisCallback, AliveCallback, GlmSpeedCallback, LBMCollisionCallback, EulerAcousticsCouplingCallback, TrivialCallback, AnalysisCallbackCoupled -export load_mesh, load_time, load_timestep, load_timestep!, load_dt +export load_mesh, load_time, load_timestep, load_timestep!, load_dt, + load_adaptive_time_integrator! export ControllerThreeLevel, ControllerThreeLevelCombined, IndicatorLöhner, IndicatorLoehner, IndicatorMax, diff --git a/src/callbacks_step/save_restart.jl b/src/callbacks_step/save_restart.jl index 06817a9b730..0d174d85805 100644 --- a/src/callbacks_step/save_restart.jl +++ b/src/callbacks_step/save_restart.jl @@ -105,6 +105,11 @@ function (restart_callback::SaveRestartCallback)(integrator) end save_restart_file(u_ode, t, dt, iter, semi, restart_callback) + # If using an adaptive time stepping scheme, store controller values for restart + if integrator.opts.adaptive + save_adaptive_time_integrator(integrator, integrator.opts.controller, + restart_callback) + end end # avoid re-evaluating possible FSAL stages @@ -168,5 +173,36 @@ function load_restart_file(semi::AbstractSemidiscretization, restart_file) load_restart_file(mesh_equations_solver_cache(semi)..., restart_file) end +""" + load_adaptive_time_integrator!(integrator, restart_file::AbstractString) + +Load the context information for time integrators with error-based step size control +saved in a `restart_file`. +""" +function load_adaptive_time_integrator!(integrator, restart_file::AbstractString) + controller = integrator.opts.controller + # Read context information for controller + h5open(restart_file, "r") do file + # Ensure that the necessary information was saved + if !("time_integrator_qold" in keys(attributes(file))) || + !("time_integrator_dtpropose" in keys(attributes(file))) || + (hasproperty(controller, :err) && + !("time_integrator_controller_err" in keys(attributes(file)))) + error("Missing data in restart file: check the consistency of adaptive time controller with initial setup!") + end + # Load data that is required both for PIController and PIDController + integrator.qold = read(attributes(file)["time_integrator_qold"]) + integrator.dtpropose = read(attributes(file)["time_integrator_dtpropose"]) + # Accept step to use dtpropose already in the first step + integrator.accept_step = true + # Reevaluate integrator.fsal_first on the first step + integrator.reeval_fsal = true + # Load additional parameters for PIDController + if hasproperty(controller, :err) # Distinguish PIDController from PIController + controller.err[:] = read(attributes(file)["time_integrator_controller_err"]) + end + end +end + include("save_restart_dg.jl") end # @muladd diff --git a/src/callbacks_step/save_restart_dg.jl b/src/callbacks_step/save_restart_dg.jl index 8db6db2d2b8..cddeef77bb2 100644 --- a/src/callbacks_step/save_restart_dg.jl +++ b/src/callbacks_step/save_restart_dg.jl @@ -327,4 +327,28 @@ function load_restart_file_on_root(mesh::Union{ParallelTreeMesh, ParallelP4estMe return u_ode end + +# Store controller values for an adaptive time stepping scheme +function save_adaptive_time_integrator(integrator, + controller, restart_callback) + # Save only on root + if mpi_isroot() + @unpack output_directory = restart_callback + timestep = integrator.stats.naccept + + # Filename based on current time step + filename = joinpath(output_directory, @sprintf("restart_%06d.h5", timestep)) + + # Open file (preserve existing content) + h5open(filename, "r+") do file + # Add context information as attributes both for PIController and PIDController + attributes(file)["time_integrator_qold"] = integrator.qold + attributes(file)["time_integrator_dtpropose"] = integrator.dtpropose + # For PIDController is necessary to save additional parameters + if hasproperty(controller, :err) # Distinguish PIDController from PIController + attributes(file)["time_integrator_controller_err"] = controller.err + end + end + end +end end # @muladd diff --git a/test/test_mpi_tree.jl b/test/test_mpi_tree.jl index 8403fcf1b04..8f08a9d72e7 100644 --- a/test/test_mpi_tree.jl +++ b/test/test_mpi_tree.jl @@ -23,10 +23,22 @@ CI_ON_WINDOWS = (get(ENV, "GITHUB_ACTIONS", false) == "true") && Sys.iswindows() end @trixi_testset "elixir_advection_restart.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_restart.jl"), - # Expected errors are exactly the same as in the serial test! - l2 = [7.81674284320524e-6], - linf = [6.314906965243505e-5]) + using OrdinaryDiffEq: RDPK3SpFSAL49 + Trixi.mpi_isroot() && println("═"^100) + Trixi.mpi_isroot() && println(joinpath(EXAMPLES_DIR, "elixir_advection_extended.jl")) + trixi_include(@__MODULE__, joinpath(EXAMPLES_DIR, "elixir_advection_extended.jl"), + alg = RDPK3SpFSAL49(), tspan = (0.0, 10.0)) + l2_expected, linf_expected = analysis_callback(sol) + + Trixi.mpi_isroot() && println("═"^100) + Trixi.mpi_isroot() && println(joinpath(EXAMPLES_DIR, "elixir_advection_restart.jl")) + # Errors are exactly the same as in the elixir_advection_extended.jl + trixi_include(@__MODULE__, joinpath(EXAMPLES_DIR, "elixir_advection_restart.jl"), + alg = RDPK3SpFSAL49()) + l2_actual, linf_actual = analysis_callback(sol) + + Trixi.mpi_isroot() && @test l2_actual == l2_expected + Trixi.mpi_isroot() && @test linf_actual == linf_expected end @trixi_testset "elixir_advection_mortar.jl" begin diff --git a/test/test_threaded.jl b/test/test_threaded.jl index 9b30836d0ed..2337d73f30a 100644 --- a/test/test_threaded.jl +++ b/test/test_threaded.jl @@ -12,27 +12,38 @@ Trixi.mpi_isroot() && isdir(outdir) && rm(outdir, recursive=true) @testset "Threaded tests" begin @testset "TreeMesh" begin @trixi_testset "elixir_advection_restart.jl" begin - @test_trixi_include(joinpath(examples_dir(), "tree_2d_dgsem", "elixir_advection_restart.jl"), - # Expected errors are exactly the same as in the serial test! - l2 = [7.81674284320524e-6], - linf = [6.314906965243505e-5]) + elixir = joinpath(examples_dir(), "tree_2d_dgsem", "elixir_advection_extended.jl") + Trixi.mpi_isroot() && println("═"^100) + Trixi.mpi_isroot() && println(elixir) + trixi_include(@__MODULE__, elixir, tspan = (0.0, 10.0)) + l2_expected, linf_expected = analysis_callback(sol) + + elixir = joinpath(examples_dir(), "tree_2d_dgsem", "elixir_advection_restart.jl") + Trixi.mpi_isroot() && println("═"^100) + Trixi.mpi_isroot() && println(elixir) + # Errors are exactly the same as in the elixir_advection_extended.jl + trixi_include(@__MODULE__, elixir) + l2_actual, linf_actual = analysis_callback(sol) + + Trixi.mpi_isroot() && @test l2_actual == l2_expected + Trixi.mpi_isroot() && @test linf_actual == linf_expected - # Ensure that we do not have excessive memory allocations - # (e.g., from type instabilities) - let - t = sol.t[end] - u_ode = sol.u[end] - du_ode = similar(u_ode) - @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 5000 - end + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) + let + t = sol.t[end] + u_ode = sol.u[end] + du_ode = similar(u_ode) + @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 5000 + end end @trixi_testset "elixir_advection_restart.jl with threaded time integration" begin @test_trixi_include(joinpath(examples_dir(), "tree_2d_dgsem", "elixir_advection_restart.jl"), alg = CarpenterKennedy2N54(williamson_condition = false, thread = OrdinaryDiffEq.True()), # Expected errors are exactly the same as in the serial test! - l2 = [7.81674284320524e-6], - linf = [6.314906965243505e-5]) + l2 = [8.005068880114254e-6], + linf = [6.39093577996519e-5]) end @trixi_testset "elixir_advection_amr_refine_twice.jl" begin diff --git a/test/test_tree_2d_advection.jl b/test/test_tree_2d_advection.jl index 973d0caf88b..36cb1e882cc 100644 --- a/test/test_tree_2d_advection.jl +++ b/test/test_tree_2d_advection.jl @@ -25,10 +25,22 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_2d_dgsem") end @trixi_testset "elixir_advection_restart.jl" begin - @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_restart.jl"), - # Expected errors are exactly the same as in the parallel test! - l2 = [7.81674284320524e-6], - linf = [6.314906965243505e-5]) + using OrdinaryDiffEq: SSPRK43 + println("═"^100) + println(joinpath(EXAMPLES_DIR, "elixir_advection_extended.jl")) + trixi_include(@__MODULE__, joinpath(EXAMPLES_DIR, "elixir_advection_extended.jl"), + alg = SSPRK43(), tspan = (0.0, 10.0)) + l2_expected, linf_expected = analysis_callback(sol) + + println("═"^100) + println(joinpath(EXAMPLES_DIR, "elixir_advection_restart.jl")) + # Errors are exactly the same as in the elixir_advection_extended.jl + trixi_include(@__MODULE__, joinpath(EXAMPLES_DIR, "elixir_advection_restart.jl"), + alg = SSPRK43()) + l2_actual, linf_actual = analysis_callback(sol) + + @test l2_actual == l2_expected + @test linf_actual == linf_expected end @trixi_testset "elixir_advection_mortar.jl" begin