-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
nonmem run: support Slurm submission
The new 'bbi nonmem run slurm' command follows 'bbi nonmem run sge' in terms of going through gridSpec, attempting to align the template directives, and testing with the same helpers. The one deliberate deviation in behavior is the output file name. For SGE, standard output and standard error are redirected to the default file name of '{run name}.o{job id}'. There's been a request (gh-312) to clean up these .o* and .po* files (only the former is relevant for Slurm). Cleaning them up is probably a bit aggressive because the .o* file contains the 'bbi run local ...' output, which is useful for troubleshooting. However, I suspect a core pain point with the these files is the _changing name_ when overwriting a model for reasons that have to do with how SVN handles deletions. So, don't mimic this behavior and instead use a consistent name to hopefully avoid this issue for Slurm. Closes #303
- Loading branch information
Showing
11 changed files
with
225 additions
and
21 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
package cmd | ||
|
||
import ( | ||
"fmt" | ||
|
||
"github.com/spf13/cobra" | ||
"github.com/spf13/viper" | ||
) | ||
|
||
const slurmTemplate string = `#!/bin/bash | ||
#SBATCH --job-name={{.JobName | shquote}} | ||
#SBATCH --output=slurm.out | ||
#SBATCH --export=ALL | ||
{{- if .Config.Parallel}} | ||
#SBATCH --ntasks={{.Config.Threads}}{{end}} | ||
#SBATCH --chdir={{.WorkingDirectory | shquote}} | ||
{{range .Command}}{{. | shquote}} {{end}} | ||
` | ||
|
||
func NewSlurmCmd() *cobra.Command { | ||
cmd := &cobra.Command{ | ||
Use: "slurm [flags] <model> [<model>...]", | ||
Short: "Run models via Slurm", | ||
Example: fmt.Sprintf(runExamples, "slurm"), | ||
Run: slurm, | ||
} | ||
|
||
cmd.PersistentFlags().String("bbi_binary", "", | ||
"bbi executable to use in the Slurm submission script (default: current process's executable)") | ||
errpanic(viper.BindPFlag("bbi_binary", cmd.PersistentFlags().Lookup("bbi_binary"))) | ||
|
||
const gridNamePrefixIdentifier string = "grid_name_prefix" | ||
cmd.PersistentFlags().String(gridNamePrefixIdentifier, "", | ||
"prefix to add to the name of submitted jobs") | ||
errpanic(viper.BindPFlag(gridNamePrefixIdentifier, cmd.PersistentFlags().Lookup(gridNamePrefixIdentifier))) | ||
|
||
return cmd | ||
} | ||
|
||
func slurm(_ *cobra.Command, args []string) { | ||
gs := &gridSpec{ | ||
Name: "Slurm", | ||
Template: slurmTemplate, | ||
SubmitCommand: "sbatch", | ||
IgnoreError: func(_ error, _ string) bool { return false }, | ||
} | ||
gs.run(args) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
## bbi nonmem run slurm | ||
|
||
Run models via Slurm | ||
|
||
``` | ||
bbi nonmem run slurm [flags] <model> [<model>...] | ||
``` | ||
|
||
### Examples | ||
|
||
``` | ||
# Execute model run001 | ||
bbi nonmem run slurm run001.mod | ||
# Run models run001.mod, run002.mod, and run003.mod | ||
bbi nonmem run slurm 'run[001:003].mod' | ||
# Run all models in the current directory | ||
bbi nonmem run slurm . | ||
``` | ||
|
||
### Options | ||
|
||
``` | ||
--bbi_binary string bbi executable to use in the Slurm submission script (default: current process's executable) | ||
--grid_name_prefix string prefix to add to the name of submitted jobs | ||
-h, --help help for slurm | ||
``` | ||
|
||
### Options inherited from parent commands | ||
|
||
``` | ||
--additional_post_work_envs strings additional values (as ENV KEY=VALUE) to provide for the post execution environment | ||
--background RAW NMFE OPTION - tell NONMEM not to scan stdin for control characters | ||
--clean_lvl int clean level used for output (default 1) | ||
--config string path to another bbi.yaml to load | ||
--copy_lvl int copy level used for output | ||
-d, --debug debug mode | ||
--delay int Selects a random number of seconds between 1 and this value to stagger / jitter job execution. Assists in dealing with large volumes of work dealing with the same data set. May avoid NMTRAN issues about not being able read / close files | ||
--git whether git is used | ||
--json show JSON output, if possible | ||
--licfile string RAW NMFE OPTION - NONMEM license file to use | ||
--log_file string file into which to store the output / logging details from bbi | ||
--maxlim int RAW NMFE OPTION - set the maximum values for the buffers used by NONMEM (if 0, don't pass -maxlim to nmfe) (default 2) | ||
--mpi_exec_path string fully qualified path to mpiexec to use for NONMEM parallel operations (default "/usr/local/mpich3/bin/mpiexec") | ||
--nm_version string version of NONMEM from the configuration list to use | ||
--nmqual whether to execute with nmqual (autolog.pl) | ||
--nobuild RAW NMFE OPTION - do not build a new NONMEM executable | ||
-o, --output string output file | ||
--output_dir string Go template for the output directory to use for storing details of each executed model (default "{{ .Name }}") | ||
--overwrite whether to remove existing output directories | ||
--parafile string location of a user-provided parafile to use for parallel execution | ||
--parallel whether to run NONMEM in parallel mode | ||
--parallel_timeout int amount of time to wait for parallel operations in NONMEM before timing out (default 2147483647) | ||
--post_work_executable string script or binary to run when job execution completes or fails | ||
--prcompile RAW NMFE OPTION - forces PREDPP compilation | ||
--prdefault RAW NMFE OPTION - do not recompile any routines other than FSUBS | ||
-p, --preview preview action, but don't actually run command | ||
--prsame RAW NMFE OPTION - tell NONMEM to skip the PREDPP compilation step | ||
--save_config whether to save the existing configuration to the output directory (default true) | ||
--threads int number of threads to execute with locally or nodes to execute on in parallel (default 4) | ||
--tprdefault RAW NMFE OPTION - test if is okay to do -prdefault | ||
-v, --verbose verbose output | ||
``` | ||
|
||
### SEE ALSO | ||
|
||
* [bbi nonmem run](bbi_nonmem_run.md) - Run models locally or on the grid | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
package nonmem | ||
|
||
import ( | ||
"bytes" | ||
"os/exec" | ||
"testing" | ||
"time" | ||
|
||
log "github.com/sirupsen/logrus" | ||
) | ||
|
||
func TestBbiCompletesSlurmExecution(tt *testing.T) { | ||
if !FeatureEnabled("SLURM") { | ||
tt.Skip("Slurm is not enabled") | ||
} | ||
|
||
tests := []string{ | ||
"acop", | ||
"ctl_test", | ||
"leading-path-with space", | ||
} | ||
|
||
checkGridExecution(tt, "slurm", tests, WaitForSlurmToTerminate) | ||
} | ||
|
||
func TestBbiCompletesParallelSlurmExecution(tt *testing.T) { | ||
if !FeatureEnabled("SLURM") { | ||
tt.Skip("Slurm is not enabled") | ||
} | ||
|
||
tests := []string{ | ||
"acop", | ||
"ctl_test", | ||
"leading-path-with space", | ||
} | ||
|
||
checkParallelGridExecution(tt, "slurm", tests, WaitForSlurmToTerminate) | ||
} | ||
|
||
func WaitForSlurmToTerminate(name string) error { | ||
log.Infof("waiting for Slurm job %s", name) | ||
secs := 30 * time.Second | ||
for { | ||
time.Sleep(secs) | ||
contains, err := squeueContains(name) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
if !contains { | ||
break | ||
} | ||
|
||
log.Infof("%s is still in squeue output; checking again in %s", name, secs) | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func squeueContains(name string) (bool, error) { | ||
cmd := exec.Command("squeue", "--noheader", "--format=%i", "--name="+name) | ||
out, err := cmd.Output() | ||
if err != nil { | ||
return false, err | ||
} | ||
|
||
return len(bytes.TrimSpace(out)) > 0, nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters