Skip to content

Commit

Permalink
Add analysis tool for nsight reports
Browse files Browse the repository at this point in the history
Try fixes

Try multiline

Add analysis to more jobs
  • Loading branch information
charleskawczynski committed Nov 11, 2024
1 parent aa735e9 commit 8410c59
Show file tree
Hide file tree
Showing 3 changed files with 282 additions and 4 deletions.
6 changes: 6 additions & 0 deletions .buildkite/analysis/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[deps]
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228"
VegaLite = "112f6efa-9a02-5b7d-90c0-432ed331239a"
61 changes: 57 additions & 4 deletions .buildkite/gpu_pipeline/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ steps:
- julia --project=perf -e 'using CUDA; CUDA.precompile_runtime()'
- julia --project=perf -e 'using Pkg; Pkg.status()'

- echo "--- Instantiate analysis"
- julia --project=.buildkite/analysis -e 'using Pkg; Pkg.instantiate(;verbose=true)'
- julia --project=.buildkite/analysis -e 'using Pkg; Pkg.precompile()'
- julia --project=.buildkite/analysis -e 'using Pkg; Pkg.status()'

- echo "--- Download artifacts"
- julia --project=examples artifacts/download_artifacts.jl

Expand All @@ -55,6 +60,9 @@ steps:
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}target_gpu_implicit_baroclinic_wave.yml
--job_id target_gpu_implicit_baroclinic_wave
- nsys stats --report cuda_gpu_trace target_gpu_implicit_baroclinic_wave/output_active/report.nsys-rep --output target_gpu_implicit_baroclinic_wave/output_active/ --format csv
- julia --project=.buildkite/analysis .buildkite/nsight_analysis.jl --out_dir target_gpu_implicit_baroclinic_wave/output_active/
artifact_paths: "target_gpu_implicit_baroclinic_wave/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
Expand All @@ -72,6 +80,9 @@ steps:
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_hs_rhoe_equil_55km_nz63_0M.yml
--job_id gpu_hs_rhoe_equil_55km_nz63_0M
- nsys stats --report cuda_gpu_trace gpu_hs_rhoe_equil_55km_nz63_0M/output_active/report.nsys-rep --output gpu_hs_rhoe_equil_55km_nz63_0M/output_active/ --format csv
- julia --project=.buildkite/analysis .buildkite/nsight_analysis.jl --out_dir gpu_hs_rhoe_equil_55km_nz63_0M/output_active/
artifact_paths: "gpu_hs_rhoe_equil_55km_nz63_0M/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
Expand All @@ -90,6 +101,10 @@ steps:
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_hs_rhoe_equil_55km_nz63_0M.yml
--job_id gpu_hs_rhoe_equil_55km_nz63_0M_4process
# TODO: add analysis for all gpu devices
- nsys stats --report cuda_gpu_trace gpu_hs_rhoe_equil_55km_nz63_0M_4process/output_active/report-0.nsys-rep --output gpu_hs_rhoe_equil_55km_nz63_0M_4process/output_active/ --format csv
- julia --project=.buildkite/analysis .buildkite/nsight_analysis.jl --out_dir gpu_hs_rhoe_equil_55km_nz63_0M_4process/output_active/
artifact_paths: "gpu_hs_rhoe_equil_55km_nz63_0M_4process/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
Expand All @@ -110,6 +125,10 @@ steps:
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}target_gpu_implicit_baroclinic_wave.yml
--job_id target_gpu_implicit_baroclinic_wave_4process
# TODO: add analysis for all gpu devices
- nsys stats --report cuda_gpu_trace target_gpu_implicit_baroclinic_wave_4process/output_active/report-0.nsys-rep --output target_gpu_implicit_baroclinic_wave_4process/output_active/ --format csv
- julia --project=.buildkite/analysis .buildkite/nsight_analysis.jl --out_dir target_gpu_implicit_baroclinic_wave_4process/output_active/
artifact_paths: "target_gpu_implicit_baroclinic_wave_4process/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
Expand All @@ -131,6 +150,9 @@ steps:
nsys profile --delay 100 --trace=nvtx,mpi,cuda,osrt --output=gpu_aquaplanet_dyamond_diag_1process/output_active/report julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_diag_1process.yml
--job_id gpu_aquaplanet_dyamond_diag_1process
- nsys stats --report cuda_gpu_trace gpu_aquaplanet_dyamond_diag_1process/output_active/report.nsys-rep --output gpu_aquaplanet_dyamond_diag_1process/output_active/ --format csv
- julia --project=.buildkite/analysis .buildkite/nsight_analysis.jl --out_dir gpu_aquaplanet_dyamond_diag_1process/output_active/
artifact_paths: "gpu_aquaplanet_dyamond_diag_1process/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
Expand All @@ -139,7 +161,7 @@ steps:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
slurm_ntasks: 1
slurm_mem: 32G
slurm_mem: 40G
slurm_time: 8:00:00
slurm_exclusive:

Expand All @@ -152,6 +174,9 @@ steps:
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_ss.yml
--job_id gpu_aquaplanet_dyamond_ss_1process
- nsys stats --report cuda_gpu_trace gpu_aquaplanet_dyamond_ss_1process/output_active/report.nsys-rep --output gpu_aquaplanet_dyamond_ss_1process/output_active/ --format csv
- julia --project=.buildkite/analysis .buildkite/nsight_analysis.jl --out_dir gpu_aquaplanet_dyamond_ss_1process/output_active/
artifact_paths: "gpu_aquaplanet_dyamond_ss_1process/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
Expand All @@ -160,7 +185,7 @@ steps:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
slurm_ntasks: 1
slurm_mem: 32G
slurm_mem: 40G
slurm_time: 8:00:00
slurm_exclusive:

Expand All @@ -169,9 +194,14 @@ steps:
- mkdir -p gpu_aquaplanet_dyamond_ss_2process
- >
srun --cpu-bind=threads --cpus-per-task=4
nsys profile --delay 100 --trace=nvtx,mpi,cuda,osrt --output=gpu_aquaplanet_dyamond_ss_2process/output_active/report
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_ss.yml
--job_id gpu_aquaplanet_dyamond_ss_2process
# TODO: add analysis for all gpu devices
- nsys stats --report cuda_gpu_trace gpu_aquaplanet_dyamond_ss_2process/output_active/report-0.nsys-rep --output gpu_aquaplanet_dyamond_ss_2process/output_active/ --format csv
- julia --project=.buildkite/analysis .buildkite/nsight_analysis.jl --out_dir gpu_aquaplanet_dyamond_ss_2process/output_active/
artifact_paths: "gpu_aquaplanet_dyamond_ss_2process/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
Expand All @@ -180,7 +210,7 @@ steps:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
slurm_ntasks: 2
slurm_mem: 32G
slurm_mem: 40G
slurm_time: 8:00:00
slurm_exclusive:

Expand All @@ -189,9 +219,14 @@ steps:
- mkdir -p gpu_aquaplanet_dyamond_ss_4process
- >
srun --cpu-bind=threads --cpus-per-task=4
nsys profile --delay 100 --trace=nvtx,mpi,cuda,osrt --output=gpu_aquaplanet_dyamond_ss_4process/output_active/report
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_ss.yml
--job_id gpu_aquaplanet_dyamond_ss_4process
# TODO: add analysis for all gpu devices
- nsys stats --report cuda_gpu_trace gpu_aquaplanet_dyamond_ss_4process/output_active/report-0.nsys-rep --output gpu_aquaplanet_dyamond_ss_4process/output_active/ --format csv
- julia --project=.buildkite/analysis .buildkite/nsight_analysis.jl --out_dir gpu_aquaplanet_dyamond_ss_4process/output_active/
artifact_paths: "gpu_aquaplanet_dyamond_ss_4process/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
Expand All @@ -200,7 +235,7 @@ steps:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
slurm_ntasks: 4
slurm_mem: 32G
slurm_mem: 40G
slurm_time: 8:00:00
slurm_exclusive:

Expand All @@ -227,9 +262,13 @@ steps:
- mkdir -p gpu_aquaplanet_dyamond_ws_1process
- >
srun --cpu-bind=threads --cpus-per-task=4
nsys profile --delay 100 --trace=nvtx,mpi,cuda,osrt --output=gpu_aquaplanet_dyamond_ws_1process/output_active/report
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_ws_1process.yml
--job_id gpu_aquaplanet_dyamond_ws_1process
- nsys stats --report cuda_gpu_trace gpu_aquaplanet_dyamond_ws_1process/output_active/report.nsys-rep --output gpu_aquaplanet_dyamond_ws_1process/output_active/ --format csv
- julia --project=.buildkite/analysis .buildkite/nsight_analysis.jl --out_dir gpu_aquaplanet_dyamond_ws_1process/output_active/
artifact_paths: "gpu_aquaplanet_dyamond_ws_1process/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
Expand All @@ -247,9 +286,13 @@ steps:
- mkdir -p gpu_aquaplanet_dyamond_ws_2process
- >
srun --cpu-bind=threads --cpus-per-task=4
nsys profile --delay 100 --trace=nvtx,mpi,cuda,osrt --output=gpu_aquaplanet_dyamond_ws_2process/output_active/report
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_ws_2process.yml
--job_id gpu_aquaplanet_dyamond_ws_2process
- nsys stats --report cuda_gpu_trace gpu_aquaplanet_dyamond_ws_2process/output_active/report-0.nsys-rep --output gpu_aquaplanet_dyamond_ws_2process/output_active/ --format csv
- julia --project=.buildkite/analysis .buildkite/nsight_analysis.jl --out_dir gpu_aquaplanet_dyamond_ws_2process/output_active/
artifact_paths: "gpu_aquaplanet_dyamond_ws_2process/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
Expand All @@ -267,9 +310,13 @@ steps:
- mkdir -p gpu_aquaplanet_dyamond_ws_4process
- >
srun --cpu-bind=threads --cpus-per-task=4
nsys profile --delay 100 --trace=nvtx,mpi,cuda,osrt --output=gpu_aquaplanet_dyamond_ws_4process/output_active/report
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_ws_4process.yml
--job_id gpu_aquaplanet_dyamond_ws_4process
- nsys stats --report cuda_gpu_trace gpu_aquaplanet_dyamond_ws_4process/output_active/report-0.nsys-rep --output gpu_aquaplanet_dyamond_ws_4process/output_active/ --format csv
- julia --project=.buildkite/analysis .buildkite/nsight_analysis.jl --out_dir gpu_aquaplanet_dyamond_ws_4process/output_active/
artifact_paths: "gpu_aquaplanet_dyamond_ws_4process/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
Expand Down Expand Up @@ -311,6 +358,9 @@ steps:
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${MODEL_CONFIG_PATH}aquaplanet_diagedmf.yml
--job_id gpu_aquaplanet_diagedmf
- nsys stats --report cuda_gpu_trace gpu_aquaplanet_diagedmf/output_active/report.nsys-rep --output gpu_aquaplanet_diagedmf/output_active/ --format csv
- julia --project=.buildkite/analysis .buildkite/nsight_analysis.jl --out_dir gpu_aquaplanet_diagedmf/output_active/
artifact_paths: "gpu_aquaplanet_diagedmf/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
Expand Down Expand Up @@ -345,6 +395,9 @@ steps:
julia --threads=3 --color=yes --project=examples examples/hybrid/driver.jl
--config_file ${MODEL_CONFIG_PATH}aquaplanet_progedmf.yml
--job_id gpu_aquaplanet_progedmf
- nsys stats --report cuda_gpu_trace gpu_aquaplanet_progedmf/output_active/report.nsys-rep --output gpu_aquaplanet_progedmf/output_active/ --format csv
- julia --project=.buildkite/analysis .buildkite/nsight_analysis.jl --out_dir gpu_aquaplanet_progedmf/output_active/
artifact_paths: "gpu_aquaplanet_progedmf/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
Expand Down
Loading

0 comments on commit 8410c59

Please sign in to comment.