-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
6512ef2
commit 49bebb8
Showing
2 changed files
with
199 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
#!/bin/bash -l | ||
#SBATCH --partition=main | ||
#SBATCH -o log/runSnudda-%j-output.txt | ||
#SBATCH -e log/runSnudda-%j-error.txt | ||
#SBATCH -t 00:30:00 | ||
#SBATCH -J Snudda | ||
#SBATCH -A naiss2023-5-231 | ||
#SBATCH --nodes=2 | ||
#SBATCH -n 256 | ||
#SBATCH --cpus-per-task=2 | ||
#SBATCH --mem-per-cpu=930M | ||
#SBATCH --mail-type=ALL | ||
module load snic-env | ||
|
||
|
||
#.. | ||
#export OMP_STACKSIZE=128G | ||
ulimit -s unlimited | ||
|
||
|
||
#let NWORKERS="$SLURM_NTASKS-2" | ||
let NWORKERS="100" | ||
|
||
export IPNWORKERS=$NWORKERS | ||
|
||
|
||
export IPYTHONDIR="/cfs/klemming/scratch/${USER:0:1}/$USER/.ipython" | ||
rm -r $IPYTHONDIR | ||
export IPYTHON_PROFILE=default | ||
source $HOME/Snudda/snudda_env/bin/activate | ||
|
||
|
||
#.. Start the ipcontroller | ||
export FI_CXI_DEFAULT_VNI=$(od -vAn -N4 -tu < /dev/urandom) | ||
srun -n 1 -N 1 -c 2 --exact --overlap --mem=0 ./ipcontroller_new.sh & | ||
|
||
|
||
echo ">>> waiting 60s for controller to start" | ||
sleep 60 | ||
|
||
#.. Read in CONTROLLERIP | ||
CONTROLLERIP=$(<controller_ip.txt) | ||
|
||
|
||
##.. Start the engines | ||
echo ">>> starting ${IPNWORKERS} engines " | ||
#srun -n ${IPNWORKERS} -c 2 --exact --overlap ipengine --location=${CONTROLLERIP} --profile=${IPYTHON_PROFILE} --mpi \ | ||
#--ipython-dir=${IPYTHONDIR} --timeout=30.0 --log-level=DEBUG \ | ||
#--BaseParallelApplication.verbose_crash=True --IPEngine.verbose_crash=True \ | ||
#--Kernel.stop_on_error_timeout=1.0 --IPythonKernel.stop_on_error_timeout=1.0 \ | ||
#Session.buffer_threshold=4096 Session.copy_threshold=250000 \ | ||
#Session.digest_history_size=250000 c.EngineFactory.max_heartbeat_misses=10 c.MPI.use='mpi4py' \ | ||
#1> ipe_${SLURM_JOBID}.out 2> ipe_${SLURM_JOBID}.err & | ||
|
||
#srun -n ${IPNWORKERS} -c 2 --exact --overlap valgrind --leak-check=full --show-leak-kinds=all \ | ||
#ipengine --location=${CONTROLLERIP} --profile=${IPYTHON_PROFILE} --mpi \ | ||
#--ipython-dir=${IPYTHONDIR} --timeout=30.0 c.EngineFactory.max_heartbeat_misses=10 c.MPI.use='mpi4py' \ | ||
#1> ipe_${SLURM_JOBID}.out 2> ipe_${SLURM_JOBID}.err & | ||
|
||
export FI_CXI_DEFAULT_VNI=$(od -vAn -N4 -tu < /dev/urandom) | ||
srun -n ${IPNWORKERS} -c 2 -N ${SLURM_JOB_NUM_NODES} --exact --overlap --mem=0 ipengine \ | ||
--location=${CONTROLLERIP} --profile=${IPYTHON_PROFILE} --mpi \ | ||
--ipython-dir=${IPYTHONDIR} --timeout=30.0 c.EngineFactory.max_heartbeat_misses=10 c.MPI.use='mpi4py' \ | ||
1> ipe_${SLURM_JOBID}.out 2> ipe_${SLURM_JOBID}.err & | ||
|
||
|
||
echo ">>> waiting 60s for engines to start" | ||
sleep 30 | ||
|
||
export FI_CXI_DEFAULT_VNI=$(od -vAn -N4 -tu < /dev/urandom) | ||
srun -n 1 -N 1 --exact --overlap --mem=0 ./Dardel_runSnudda_inside.sh | ||
|
||
|
||
echo " " | ||
|
||
echo "JOB END "`date` start_time_network_connect.txt | ||
|
||
wait | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
#!/bin/bash | ||
|
||
|
||
|
||
SNUDDA_DIR=$HOME/Snudda/snudda | ||
JOBDIR=networks/test_10k | ||
|
||
SIMSIZE=10000 | ||
|
||
# If the BasalGangliaData directory exists, then use that for our data | ||
#/cfs/klemming/scratch/${USER:0:1}/$USER/BasalGangliaData/data | ||
#BasalGangliaData/Parkinson/PD0 | ||
if [[ -d "$HOME/BasalGangliaData/data" ]]; then | ||
export SNUDDA_DATA="$HOME/BasalGangliaData/data" | ||
echo "Setting SNUDDA_DATA to $SNUDDA_DATA" | ||
else | ||
echo "SNUDDA_DATA environment variable not changed (may be empty): $SNUDDA_DATA" | ||
fi | ||
|
||
mkdir -p $JOBDIR | ||
|
||
echo "Dardel_runSnudda.sh should be started with srun -n 1, to only get one process" | ||
|
||
echo "SLURM_PROCID = $SLURM_PROCID" | ||
|
||
if [ "$SLURM_PROCID" -gt 0 ]; then | ||
mock_string="Not main process" | ||
else | ||
|
||
# For debug purposes: | ||
echo "PATH: "$PATH | ||
echo "IPYTHONDIR: "$IPYTHONDIR | ||
echo "PYTHONPATH: "$PYTHONPATH | ||
echo "LD_LIBRARY_PATH: "$LD_LIBRARY_PATH | ||
|
||
echo ">>>>>> Main process starting ipcluster" | ||
echo | ||
|
||
echo "Start time: " > start_time_network_connect.txt | ||
date >> start_time_network_connect.txt | ||
|
||
echo ">>> Init: "`date` | ||
snudda init ${JOBDIR} --size ${SIMSIZE} --overwrite --randomseed 1234 --stayInside | ||
|
||
if [ $? != 0 ]; then | ||
echo "Something went wrong during init, aborting!" | ||
ipcluster stop | ||
exit -1 | ||
fi | ||
|
||
# WE NOW START IPCLUSTER USING ipcontroller.sh INSTEAD... | ||
# | ||
# echo "SLURM_NODELIST = $SLURM_NODELIST" | ||
# let NWORKERS="$SLURM_NTASKS - 1" | ||
# | ||
# echo ">>> NWORKERS " $NWORKERS | ||
# echo ">>> Starting ipcluster `date`" | ||
# | ||
# #.. Start the ipcluster | ||
# ipcluster start -n ${NWORKERS} \ | ||
# --ip='*' \ | ||
# --HeartMonitor.max_heartmonitor_misses=1000 \ | ||
# --HubFactory.registration_timeout=600 \ | ||
# --HeartMonitor.period=10000 & | ||
# | ||
# | ||
# #.. Sleep to allow engines to start | ||
# echo ">>> Wait 120s to allow engines to start" | ||
# sleep 120 #60 | ||
|
||
echo ">>> Place: "`date` | ||
snudda place ${JOBDIR} --verbose | ||
|
||
if [ $? != 0 ]; then | ||
echo "Something went wrong during placement, aborting!" | ||
# ipcluster stop | ||
exit -1 | ||
fi | ||
|
||
echo ">>> Detect: "`date` | ||
snudda detect ${JOBDIR} --hvsize 50 --parallel | ||
|
||
if [ $? != 0 ]; then | ||
echo "Something went wrong during detection, aborting!" | ||
# ipcluster stop | ||
exit -1 | ||
fi | ||
|
||
echo ">>> Prune: "`date` | ||
snudda prune ${JOBDIR} --parallel | ||
|
||
if [ $? != 0 ]; then | ||
echo "Something went wrong during pruning, aborting!" | ||
# ipcluster stop | ||
exit -1 | ||
fi | ||
|
||
# Disable input generation at the moment | ||
|
||
#echo ">>> Input: "`date` | ||
# cp -a $SNUDDA_DIR/data/input_config/input-v10-scaled.json ${JOBDIR}/input.json | ||
cp -a $SNUDDA_DIR/data/input_config/external-input-dSTR-scaled-v4.json ${JOBDIR}/input.json | ||
|
||
snudda input ${JOBDIR} --parallel --time 5 | ||
|
||
|
||
#.. Shut down cluster | ||
# ipcluster stop | ||
#.. Shutdown ipcontroller | ||
echo "Shutting down ipcontroller" | ||
|
||
python ipcontroller_shutdown.py | ||
|
||
|
||
date | ||
#echo "JOB END "`date` start_time_network_connect.txt | ||
|
||
echo "EXITING Dardel_runjob.sh" | ||
|
||
fi |