Skip to content

Commit

Permalink
Adding inside
Browse files Browse the repository at this point in the history
  • Loading branch information
Hjorthmedh committed Oct 25, 2023
1 parent 6512ef2 commit 49bebb8
Show file tree
Hide file tree
Showing 2 changed files with 199 additions and 0 deletions.
79 changes: 79 additions & 0 deletions examples/parallel/KTH_PDC/Dardel_runSnudda_inside.job
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#!/bin/bash -l
#SBATCH --partition=main
#SBATCH -o log/runSnudda-%j-output.txt
#SBATCH -e log/runSnudda-%j-error.txt
#SBATCH -t 00:30:00
#SBATCH -J Snudda
#SBATCH -A naiss2023-5-231
#SBATCH --nodes=2
#SBATCH -n 256
#SBATCH --cpus-per-task=2
#SBATCH --mem-per-cpu=930M
#SBATCH --mail-type=ALL
module load snic-env


#..
#export OMP_STACKSIZE=128G
ulimit -s unlimited


#let NWORKERS="$SLURM_NTASKS-2"
let NWORKERS="100"

export IPNWORKERS=$NWORKERS


export IPYTHONDIR="/cfs/klemming/scratch/${USER:0:1}/$USER/.ipython"
rm -r $IPYTHONDIR
export IPYTHON_PROFILE=default
source $HOME/Snudda/snudda_env/bin/activate


#.. Start the ipcontroller
export FI_CXI_DEFAULT_VNI=$(od -vAn -N4 -tu < /dev/urandom)
srun -n 1 -N 1 -c 2 --exact --overlap --mem=0 ./ipcontroller_new.sh &


echo ">>> waiting 60s for controller to start"
sleep 60

#.. Read in CONTROLLERIP
CONTROLLERIP=$(<controller_ip.txt)


##.. Start the engines
echo ">>> starting ${IPNWORKERS} engines "
#srun -n ${IPNWORKERS} -c 2 --exact --overlap ipengine --location=${CONTROLLERIP} --profile=${IPYTHON_PROFILE} --mpi \
#--ipython-dir=${IPYTHONDIR} --timeout=30.0 --log-level=DEBUG \
#--BaseParallelApplication.verbose_crash=True --IPEngine.verbose_crash=True \
#--Kernel.stop_on_error_timeout=1.0 --IPythonKernel.stop_on_error_timeout=1.0 \
#Session.buffer_threshold=4096 Session.copy_threshold=250000 \
#Session.digest_history_size=250000 c.EngineFactory.max_heartbeat_misses=10 c.MPI.use='mpi4py' \
#1> ipe_${SLURM_JOBID}.out 2> ipe_${SLURM_JOBID}.err &

#srun -n ${IPNWORKERS} -c 2 --exact --overlap valgrind --leak-check=full --show-leak-kinds=all \
#ipengine --location=${CONTROLLERIP} --profile=${IPYTHON_PROFILE} --mpi \
#--ipython-dir=${IPYTHONDIR} --timeout=30.0 c.EngineFactory.max_heartbeat_misses=10 c.MPI.use='mpi4py' \
#1> ipe_${SLURM_JOBID}.out 2> ipe_${SLURM_JOBID}.err &

export FI_CXI_DEFAULT_VNI=$(od -vAn -N4 -tu < /dev/urandom)
srun -n ${IPNWORKERS} -c 2 -N ${SLURM_JOB_NUM_NODES} --exact --overlap --mem=0 ipengine \
--location=${CONTROLLERIP} --profile=${IPYTHON_PROFILE} --mpi \
--ipython-dir=${IPYTHONDIR} --timeout=30.0 c.EngineFactory.max_heartbeat_misses=10 c.MPI.use='mpi4py' \
1> ipe_${SLURM_JOBID}.out 2> ipe_${SLURM_JOBID}.err &


echo ">>> waiting 60s for engines to start"
sleep 30

export FI_CXI_DEFAULT_VNI=$(od -vAn -N4 -tu < /dev/urandom)
srun -n 1 -N 1 --exact --overlap --mem=0 ./Dardel_runSnudda_inside.sh


echo " "

echo "JOB END "`date` start_time_network_connect.txt

wait

120 changes: 120 additions & 0 deletions examples/parallel/KTH_PDC/Dardel_runSnudda_inside.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#!/bin/bash



SNUDDA_DIR=$HOME/Snudda/snudda
JOBDIR=networks/test_10k

SIMSIZE=10000

# If the BasalGangliaData directory exists, then use that for our data
#/cfs/klemming/scratch/${USER:0:1}/$USER/BasalGangliaData/data
#BasalGangliaData/Parkinson/PD0
if [[ -d "$HOME/BasalGangliaData/data" ]]; then
export SNUDDA_DATA="$HOME/BasalGangliaData/data"
echo "Setting SNUDDA_DATA to $SNUDDA_DATA"
else
echo "SNUDDA_DATA environment variable not changed (may be empty): $SNUDDA_DATA"
fi

mkdir -p $JOBDIR

echo "Dardel_runSnudda.sh should be started with srun -n 1, to only get one process"

echo "SLURM_PROCID = $SLURM_PROCID"

if [ "$SLURM_PROCID" -gt 0 ]; then
mock_string="Not main process"
else

# For debug purposes:
echo "PATH: "$PATH
echo "IPYTHONDIR: "$IPYTHONDIR
echo "PYTHONPATH: "$PYTHONPATH
echo "LD_LIBRARY_PATH: "$LD_LIBRARY_PATH

echo ">>>>>> Main process starting ipcluster"
echo

echo "Start time: " > start_time_network_connect.txt
date >> start_time_network_connect.txt

echo ">>> Init: "`date`
snudda init ${JOBDIR} --size ${SIMSIZE} --overwrite --randomseed 1234 --stayInside

if [ $? != 0 ]; then
echo "Something went wrong during init, aborting!"
ipcluster stop
exit -1
fi

# WE NOW START IPCLUSTER USING ipcontroller.sh INSTEAD...
#
# echo "SLURM_NODELIST = $SLURM_NODELIST"
# let NWORKERS="$SLURM_NTASKS - 1"
#
# echo ">>> NWORKERS " $NWORKERS
# echo ">>> Starting ipcluster `date`"
#
# #.. Start the ipcluster
# ipcluster start -n ${NWORKERS} \
# --ip='*' \
# --HeartMonitor.max_heartmonitor_misses=1000 \
# --HubFactory.registration_timeout=600 \
# --HeartMonitor.period=10000 &
#
#
# #.. Sleep to allow engines to start
# echo ">>> Wait 120s to allow engines to start"
# sleep 120 #60

echo ">>> Place: "`date`
snudda place ${JOBDIR} --verbose

if [ $? != 0 ]; then
echo "Something went wrong during placement, aborting!"
# ipcluster stop
exit -1
fi

echo ">>> Detect: "`date`
snudda detect ${JOBDIR} --hvsize 50 --parallel

if [ $? != 0 ]; then
echo "Something went wrong during detection, aborting!"
# ipcluster stop
exit -1
fi

echo ">>> Prune: "`date`
snudda prune ${JOBDIR} --parallel

if [ $? != 0 ]; then
echo "Something went wrong during pruning, aborting!"
# ipcluster stop
exit -1
fi

# Disable input generation at the moment

#echo ">>> Input: "`date`
# cp -a $SNUDDA_DIR/data/input_config/input-v10-scaled.json ${JOBDIR}/input.json
cp -a $SNUDDA_DIR/data/input_config/external-input-dSTR-scaled-v4.json ${JOBDIR}/input.json

snudda input ${JOBDIR} --parallel --time 5


#.. Shut down cluster
# ipcluster stop
#.. Shutdown ipcontroller
echo "Shutting down ipcontroller"

python ipcontroller_shutdown.py


date
#echo "JOB END "`date` start_time_network_connect.txt

echo "EXITING Dardel_runjob.sh"

fi

0 comments on commit 49bebb8

Please sign in to comment.