From 355f69b4db96d3096f7a3b69f4cf4e967be2d2dd Mon Sep 17 00:00:00 2001 From: Charles Kawczynski Date: Wed, 11 Sep 2024 09:20:07 -0400 Subject: [PATCH] Generate JET test-opt reports for all jobs --- examples/hybrid/driver.jl | 384 ++++++++++++++++++----------------- perf/jet_report_nfailures.jl | 9 +- 2 files changed, 202 insertions(+), 191 deletions(-) diff --git a/examples/hybrid/driver.jl b/examples/hybrid/driver.jl index 8777ee59eaf..d13ba42a11d 100644 --- a/examples/hybrid/driver.jl +++ b/examples/hybrid/driver.jl @@ -16,193 +16,197 @@ if !(@isdefined config) end simulation = CA.get_simulation(config) (; integrator) = simulation -sol_res = CA.solve_atmos!(simulation) - -(; atmos, params) = integrator.p -(; p) = integrator - -import ClimaCore -import ClimaCore: Topologies, Quadratures, Spaces -import ClimaAtmos.InitialConditions as ICs -using Statistics: mean -import ClimaAtmos.Parameters as CAP -import Thermodynamics as TD -import ClimaComms -using SciMLBase -using PrettyTables -using JLD2 -using NCDatasets -using ClimaTimeSteppers -import JSON -using Test -import Tar -import Base.Filesystem: rm -import OrderedCollections -include(joinpath(pkgdir(CA), "post_processing", "ci_plots.jl")) - -ref_job_id = config.parsed_args["reference_job_id"] -reference_job_id = isnothing(ref_job_id) ? simulation.job_id : ref_job_id - -if sol_res.ret_code == :simulation_crashed - error( - "The ClimaAtmos simulation has crashed. See the stack trace for details.", - ) -end -# Simulation did not crash -(; sol, walltime) = sol_res - -# we gracefully exited, so we won't have reached t_end -if !isempty(integrator.tstops) - @assert last(sol.t) == simulation.t_end -end -CA.verify_callbacks(sol.t) - -# Scaling check -if CA.is_distributed(config.comms_ctx) - nprocs = ClimaComms.nprocs(config.comms_ctx) - comms_ctx = config.comms_ctx - output_dir = simulation.output_dir - # replace sol.u on the root processor with the global sol.u - if ClimaComms.iamroot(comms_ctx) - Y = sol.u[1] - center_space = axes(Y.c) - horz_space = Spaces.horizontal_space(center_space) - horz_topology = Spaces.topology(horz_space) - quadrature_style = Spaces.quadrature_style(horz_space) - Nq = Quadratures.degrees_of_freedom(quadrature_style) - nlocalelems = Topologies.nlocalelems(horz_topology) - ncols_per_process = nlocalelems * Nq * Nq - scaling_file = - joinpath(output_dir, "scaling_data_$(nprocs)_processes.jld2") - @info( - "Writing scaling data", - "walltime (seconds)" = walltime, - scaling_file - ) - JLD2.jldsave(scaling_file; nprocs, ncols_per_process, walltime) - end -end - -# Check if selected output has changed from the previous recorded output (bit-wise comparison) -include(joinpath(@__DIR__, "..", "..", "regression_tests", "mse_tables.jl")) -if config.parsed_args["regression_test"] - # Test results against main branch - include( - joinpath( - @__DIR__, - "..", - "..", - "regression_tests", - "regression_tests.jl", - ), - ) - @testset "Test regression table entries" begin - mse_keys = sort(collect(keys(all_best_mse[simulation.job_id]))) - pcs = collect(Fields.property_chains(sol.u[end])) - for prop_chain in mse_keys - @test prop_chain in pcs - end - end - perform_regression_tests( - simulation.job_id, - sol.u[end], - all_best_mse, - simulation.output_dir, - ) -end - -@info "Callback verification, n_expected_calls: $(CA.n_expected_calls(integrator))" -@info "Callback verification, n_measured_calls: $(CA.n_measured_calls(integrator))" - -# Conservation checks -if config.parsed_args["check_conservation"] - FT = Spaces.undertype(axes(sol.u[end].c.ρ)) - @info "Checking conservation" - (; energy_conservation, mass_conservation, water_conservation) = - CA.check_conservation(sol) - - @info " Net energy change / total energy: $energy_conservation" - @info " Net mass change / total mass: $mass_conservation" - @info " Net water change / total water: $water_conservation" - - sfc = p.atmos.surface_model - - if CA.has_no_source_or_sink(config.parsed_args) - @test energy_conservation ≈ 0 atol = 50 * eps(FT) - @test mass_conservation ≈ 0 atol = 50 * eps(FT) - @test water_conservation ≈ 0 atol = 50 * eps(FT) - else - @test energy_conservation ≈ 0 atol = sqrt(eps(FT)) - @test mass_conservation ≈ 0 atol = sqrt(eps(FT)) - if sfc isa CA.PrognosticSurfaceTemperature - @test water_conservation ≈ 0 atol = sqrt(eps(FT)) - end - end -end - -# Visualize the solution -if ClimaComms.iamroot(config.comms_ctx) - include( - joinpath(pkgdir(CA), "regression_tests", "self_reference_or_path.jl"), - ) - @info "Plotting" - path = self_reference_or_path() # __build__ path (not job path) - if path == :self_reference - make_plots(Val(Symbol(reference_job_id)), simulation.output_dir) - else - main_job_path = joinpath(path, reference_job_id) - nc_dir = joinpath(main_job_path, "nc_files") - if ispath(nc_dir) - @info "nc_dir exists" - else - mkpath(nc_dir) - # Try to extract nc files from tarball: - @info "Comparing against $(readdir(nc_dir))" - end - if isempty(readdir(nc_dir)) - if isfile(joinpath(main_job_path, "nc_files.tar")) - Tar.extract(joinpath(main_job_path, "nc_files.tar"), nc_dir) - else - @warn "No nc_files found" - end - else - @info "Files already extracted" - end - - paths = if isempty(readdir(nc_dir)) - simulation.output_dir - else - [simulation.output_dir, nc_dir] - end - make_plots(Val(Symbol(reference_job_id)), paths) - end - @info "Plotting done" - - if islink(simulation.output_dir) - symlink_to_fullpath(path) = joinpath(dirname(path), readlink(path)) - else - symlink_to_fullpath(path) = path - end - - @info "Creating tarballs" - # These NC files are used by our reproducibility tests, - # and need to be found later when comparing against the - # main branch. If "nc_files.tar" is renamed, then please - # search for "nc_files.tar" globally and rename it in the - # reproducibility test folder. - Tar.create( - f -> endswith(f, ".nc"), - symlink_to_fullpath(simulation.output_dir), - joinpath(simulation.output_dir, "nc_files.tar"), - ) - Tar.create( - f -> endswith(f, r"hdf5|h5"), - symlink_to_fullpath(simulation.output_dir), - joinpath(simulation.output_dir, "hdf5_files.tar"), - ) - - foreach(readdir(simulation.output_dir)) do f - endswith(f, r"nc|hdf5|h5") && rm(joinpath(simulation.output_dir, f)) - end - @info "Tarballs created" -end +import SciMLBase +SciMLBase.step!(integrator) # compile +include(joinpath(@__DIR__, "..", "..", "perf", "jet_report_nfailures.jl")) + +# # sol_res = CA.solve_atmos!(simulation) + +# (; atmos, params) = integrator.p +# (; p) = integrator + +# import ClimaCore +# import ClimaCore: Topologies, Quadratures, Spaces +# import ClimaAtmos.InitialConditions as ICs +# using Statistics: mean +# import ClimaAtmos.Parameters as CAP +# import Thermodynamics as TD +# import ClimaComms +# using SciMLBase +# using PrettyTables +# using JLD2 +# using NCDatasets +# using ClimaTimeSteppers +# import JSON +# using Test +# import Tar +# import Base.Filesystem: rm +# import OrderedCollections +# include(joinpath(pkgdir(CA), "post_processing", "ci_plots.jl")) + +# ref_job_id = config.parsed_args["reference_job_id"] +# reference_job_id = isnothing(ref_job_id) ? simulation.job_id : ref_job_id + +# if sol_res.ret_code == :simulation_crashed +# error( +# "The ClimaAtmos simulation has crashed. See the stack trace for details.", +# ) +# end +# # Simulation did not crash +# (; sol, walltime) = sol_res + +# # we gracefully exited, so we won't have reached t_end +# if !isempty(integrator.tstops) +# @assert last(sol.t) == simulation.t_end +# end +# CA.verify_callbacks(sol.t) + +# # Scaling check +# if CA.is_distributed(config.comms_ctx) +# nprocs = ClimaComms.nprocs(config.comms_ctx) +# comms_ctx = config.comms_ctx +# output_dir = simulation.output_dir +# # replace sol.u on the root processor with the global sol.u +# if ClimaComms.iamroot(comms_ctx) +# Y = sol.u[1] +# center_space = axes(Y.c) +# horz_space = Spaces.horizontal_space(center_space) +# horz_topology = Spaces.topology(horz_space) +# quadrature_style = Spaces.quadrature_style(horz_space) +# Nq = Quadratures.degrees_of_freedom(quadrature_style) +# nlocalelems = Topologies.nlocalelems(horz_topology) +# ncols_per_process = nlocalelems * Nq * Nq +# scaling_file = +# joinpath(output_dir, "scaling_data_$(nprocs)_processes.jld2") +# @info( +# "Writing scaling data", +# "walltime (seconds)" = walltime, +# scaling_file +# ) +# JLD2.jldsave(scaling_file; nprocs, ncols_per_process, walltime) +# end +# end + +# # Check if selected output has changed from the previous recorded output (bit-wise comparison) +# include(joinpath(@__DIR__, "..", "..", "regression_tests", "mse_tables.jl")) +# if config.parsed_args["regression_test"] +# # Test results against main branch +# include( +# joinpath( +# @__DIR__, +# "..", +# "..", +# "regression_tests", +# "regression_tests.jl", +# ), +# ) +# @testset "Test regression table entries" begin +# mse_keys = sort(collect(keys(all_best_mse[simulation.job_id]))) +# pcs = collect(Fields.property_chains(sol.u[end])) +# for prop_chain in mse_keys +# @test prop_chain in pcs +# end +# end +# perform_regression_tests( +# simulation.job_id, +# sol.u[end], +# all_best_mse, +# simulation.output_dir, +# ) +# end + +# @info "Callback verification, n_expected_calls: $(CA.n_expected_calls(integrator))" +# @info "Callback verification, n_measured_calls: $(CA.n_measured_calls(integrator))" + +# # Conservation checks +# if config.parsed_args["check_conservation"] +# FT = Spaces.undertype(axes(sol.u[end].c.ρ)) +# @info "Checking conservation" +# (; energy_conservation, mass_conservation, water_conservation) = +# CA.check_conservation(sol) + +# @info " Net energy change / total energy: $energy_conservation" +# @info " Net mass change / total mass: $mass_conservation" +# @info " Net water change / total water: $water_conservation" + +# sfc = p.atmos.surface_model + +# if CA.has_no_source_or_sink(config.parsed_args) +# @test energy_conservation ≈ 0 atol = 50 * eps(FT) +# @test mass_conservation ≈ 0 atol = 50 * eps(FT) +# @test water_conservation ≈ 0 atol = 50 * eps(FT) +# else +# @test energy_conservation ≈ 0 atol = sqrt(eps(FT)) +# @test mass_conservation ≈ 0 atol = sqrt(eps(FT)) +# if sfc isa CA.PrognosticSurfaceTemperature +# @test water_conservation ≈ 0 atol = sqrt(eps(FT)) +# end +# end +# end + +# # Visualize the solution +# if ClimaComms.iamroot(config.comms_ctx) +# include( +# joinpath(pkgdir(CA), "regression_tests", "self_reference_or_path.jl"), +# ) +# @info "Plotting" +# path = self_reference_or_path() # __build__ path (not job path) +# if path == :self_reference +# make_plots(Val(Symbol(reference_job_id)), simulation.output_dir) +# else +# main_job_path = joinpath(path, reference_job_id) +# nc_dir = joinpath(main_job_path, "nc_files") +# if ispath(nc_dir) +# @info "nc_dir exists" +# else +# mkpath(nc_dir) +# # Try to extract nc files from tarball: +# @info "Comparing against $(readdir(nc_dir))" +# end +# if isempty(readdir(nc_dir)) +# if isfile(joinpath(main_job_path, "nc_files.tar")) +# Tar.extract(joinpath(main_job_path, "nc_files.tar"), nc_dir) +# else +# @warn "No nc_files found" +# end +# else +# @info "Files already extracted" +# end + +# paths = if isempty(readdir(nc_dir)) +# simulation.output_dir +# else +# [simulation.output_dir, nc_dir] +# end +# make_plots(Val(Symbol(reference_job_id)), paths) +# end +# @info "Plotting done" + +# if islink(simulation.output_dir) +# symlink_to_fullpath(path) = joinpath(dirname(path), readlink(path)) +# else +# symlink_to_fullpath(path) = path +# end + +# @info "Creating tarballs" +# # These NC files are used by our reproducibility tests, +# # and need to be found later when comparing against the +# # main branch. If "nc_files.tar" is renamed, then please +# # search for "nc_files.tar" globally and rename it in the +# # reproducibility test folder. +# Tar.create( +# f -> endswith(f, ".nc"), +# symlink_to_fullpath(simulation.output_dir), +# joinpath(simulation.output_dir, "nc_files.tar"), +# ) +# Tar.create( +# f -> endswith(f, r"hdf5|h5"), +# symlink_to_fullpath(simulation.output_dir), +# joinpath(simulation.output_dir, "hdf5_files.tar"), +# ) + +# foreach(readdir(simulation.output_dir)) do f +# endswith(f, r"nc|hdf5|h5") && rm(joinpath(simulation.output_dir, f)) +# end +# @info "Tarballs created" +# end diff --git a/perf/jet_report_nfailures.jl b/perf/jet_report_nfailures.jl index 9c4b6709020..2a5388b03b3 100644 --- a/perf/jet_report_nfailures.jl +++ b/perf/jet_report_nfailures.jl @@ -26,6 +26,11 @@ Yₜ = similar(Y); Yₜ_exp = similar(Y); Yₜ_lim = similar(Y); ref_Y = similar(Y); + + +using JET +@test_opt CA.set_precomputed_quantities!(Y, p, t) + #! format: off n["step!"] = @n_failures SciMLBase.step!(integrator); n["horizontal_advection_tendency!"] = @n_failures CA.horizontal_advection_tendency!(Yₜ, Y, p, t); @@ -34,7 +39,7 @@ n["explicit_vertical_advection_tendency!"] = @n_failures CA.explicit_verti n["hyperdiffusion_tendency!"] = @n_failures CA.hyperdiffusion_tendency!(Yₜ_exp, Yₜ_lim, Y, p, t); n["remaining_tendency!"] = @n_failures CA.remaining_tendency!(Yₜ_exp, Yₜ_lim, Y, p, t); n["additional_tendency!"] = @n_failures CA.additional_tendency!(Yₜ, Y, p, t); -n["vertical_diffusion_boundary_layer_tendency!"] = @n_failures CA.vertical_diffusion_boundary_layer_tendency!(Yₜ, Y, p, t); +n["vertical_diffusion_boundary_layer_tendency!"] = @n_failures CA.vertical_diffusion_boundary_layer_tendency!(Yₜ, Y, p, t, p.atmos.vert_diff); n["implicit_tendency!"] = @n_failures CA.implicit_tendency!(Yₜ, Y, p, t); n["set_precomputed_quantities!"] = @n_failures CA.set_precomputed_quantities!(Y, p, t); n["limiters_func!"] = @n_failures CA.limiters_func!(Y, p, t, ref_Y); @@ -47,3 +52,5 @@ n = filter(x -> x.second ≠ 0, n) @info "n-jet failures (excluding n=0):" show(IOContext(stdout, :limit => false), MIME"text/plain"(), n) println() + +